### Clinical Longformer Model

In [41]:
import os
import numpy as np
import pandas as pd
import logging
logging.basicConfig(level=logging.INFO)


import warnings
warnings.filterwarnings('ignore')

In [42]:
import torch

print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
else:
    print("No GPU detected")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


CUDA available: True
CUDA device count: 2
CUDA device name: NVIDIA A100-SXM4-80GB


### Load train and test folds

In [None]:
import pandas as pd
import os

# Directory where the folds are saved
folds_dir = 'path/to/folds'

# Initialize lists to store data for each fold
train_data_list = []
test_data_list = []

# Load train and test folds for each fold
for fold in range(1, 6):
    train_file = os.path.join(folds_dir, f'fold_{fold}_train.csv')
    test_file = os.path.join(folds_dir, f'fold_{fold}_test.csv')
    
    train_data = pd.read_csv(train_file)
    test_data = pd.read_csv(test_file)
    
    train_data_list.append(train_data)
    test_data_list.append(test_data)
    
    print(f"Fold {fold} - Train data shape: {train_data.shape}")
    print(f"Fold {fold} - Test data shape: {test_data.shape}")

# Access individual fold data using train_data_list[fold_index] and test_data_list[fold_index]

In [46]:
label_cols = ['Active_bleeding_from_named_vessel', 'Active_bleeding_from_solid_organ',
               'Bowel_resection', 'Class_I', 'Class_II', 'Class_III', 
               'Class_IV', 'Colostomy', 'Fascia_Closed_(Interrupted)', 
               'Fascia_Closed_(Running/Continuous)', 
               'Fascia_Left_Open', 'Hand-Sewn_Anastomosis', 
               'Ileostomy', 'Primary_Repair', 
               'Serosal_tear_repair', 'Skin_Closed_(Full w/ Prevena)', 
               'Skin_Closed_(Full)', 'Skin_Closed_(Partial)', 
               'Skin_Left_Open', 'Stapled_Anastomosis', 'Synthetic']


### Load Model, Tokenizer, and Encodings

In [47]:
from transformers import AutoTokenizer, AutoModelForMaskedLM
from transformers import AutoModelForSequenceClassification
from torch.utils.data import Dataset

In [None]:
tokenizer = AutoTokenizer.from_pretrained("yikuan8/Clinical-Longformer")
model = AutoModelForSequenceClassification.from_pretrained("yikuan8/Clinical-Longformer", num_labels=len(label_cols))

In [49]:
train_encodings = tokenizer(
    train_data['Text_desc'].tolist(),
    truncation='only_second',  # tokenizing the latter part when token length is exceeded 
    max_length=4096,
    padding=True
)

test_encodings = tokenizer(
    test_data['Text_desc'].tolist(),
    truncation='only_second',  # tokenizing the latter part when token length is exceeded 
    max_length=4096,
    padding=True
)

### Create dataset

In [51]:
class ReadDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

    def __len__(self):
        return len(self.labels)

train_datasets = []
test_datasets = []

for fold, (train_index, test_index) in enumerate(kf.split(train_data)):
    train_fold_encodings = tokenizer(
        train_data.iloc[train_index]['Text_desc'].tolist(),
        truncation='only_second',
        max_length=4096,
        padding=True
    )
    test_fold_encodings = tokenizer(
        train_data.iloc[test_index]['Text_desc'].tolist(),
        truncation='only_second',
        max_length=4096,
        padding=True
    )
    
    train_fold_dataset = ReadDataset(train_fold_encodings, train_data.iloc[train_index][label_cols].values)
    test_fold_dataset = ReadDataset(test_fold_encodings, train_data.iloc[test_index][label_cols].values)
    
    train_datasets.append(train_fold_dataset)
    test_datasets.append(test_fold_dataset)

### Calculate Class Weights and Build Custome Loss Function

In [52]:
from torch import nn
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback


# Calculate class weights
original_path = '/path/to/op_note_data.csv' 
data = pd.read_csv(original_path)
data = data.drop(['ID', 'Unnamed: 0', 'wound_class'], axis=1) 

class_counts = data[label_cols].sum()
total_counts = len(data)
class_weights = total_counts / (len(label_cols) * class_counts)
class_weights = torch.tensor(class_weights.values, dtype=torch.float).to(device)


class MultilabelTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get('logits')
        loss_fct = nn.BCEWithLogitsLoss(pos_weight=class_weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels),
                        labels.float().view(-1, self.model.config.num_labels))
        return (loss, outputs) if return_outputs else loss

### Function to compute metrics for training

In [53]:
import numpy as np
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold

def sigmoid(x):  
    return 1 / (1 + np.exp(-x))  # Correct sigmoid function

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    preds = sigmoid(logits) > 0.1  # Convert logits to binary predictions with a threshold of 0.5
    
    # Debugging: Print some logits, predictions, and labels
    print("Logits:", logits[:5])
    print("Predictions:", preds[:5])
    print("Labels:", labels[:5])
    
    res = {}
    for i, d in enumerate(label_cols):
        res[f'f1_{d}'] = f1_score(labels[:, i], preds[:, i])  # F1 score for each label
    res['f1_micro'] = f1_score(labels, preds, average='micro')  # Micro F1 score across all labels
    return res

### Training Arguments

In [54]:
from transformers import Trainer, TrainingArguments, BertForSequenceClassification, LongformerForSequenceClassification

output_dir = 'path/to/output_directory'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
logging_dir = 'path/to/logging_directory'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)


training_args = TrainingArguments(
    output_dir=output_dir,          # output directory
    num_train_epochs=500,              # total number of training epochs
    per_device_train_batch_size=4,  # batch size per device during training
    per_device_eval_batch_size=8,   # batch size for evaluation
    evaluation_strategy='epoch',
    save_strategy='epoch',
    learning_rate=2e-5,
    fp16=True,
    load_best_model_at_end=True,
    metric_for_best_model='f1_micro',
    greater_is_better=True,
    logging_dir=logging_dir,
    logging_steps=10
)


### Train model

In [None]:

trainer = MultilabelTrainer(
    model=model,                         # model, defined above
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
)

trainer.train()

# Evaluate model

### Find best f1 threshold

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score

# Define the sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Define the path to the saved logits
logits_paths = [
    f'/path/to/logits_fold_{fold}.csv'
    for fold in range(1, 6)
]

all_logits = []

# Load the logits
for logits_path in logits_paths:
    logits = np.loadtxt(logits_path, delimiter=",")
    all_logits.append(logits)

# Find the best threshold for each fold
best_thresholds = []

for fold, (true_labels, logits) in enumerate(zip(all_true_labels, all_logits), start=1):
    best_threshold = 0.0
    best_f1_score = 0.0
    
    for threshold in np.arange(0.1, 0.9, 0.05):
        probabilities = sigmoid(logits)
        predictions = (probabilities > threshold).astype(int)
        
        # Calculate average F1 score
        avg_f1 = f1_score(true_labels, predictions, average='micro')
        
        if avg_f1 > best_f1_score:
            best_f1_score = avg_f1
            best_threshold = threshold
    
    best_thresholds.append(best_threshold)
    print(f"Fold {fold} - Best threshold: {best_threshold} with average F1 score: {best_f1_score}")

# Calculate the average of the best thresholds
average_best_threshold = np.mean(best_thresholds)
print(f"Average of best thresholds across folds: {average_best_threshold}")

### Calculate Avg F1 Micro and hamming loss across all folds

In [86]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score

# Define the sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Define the path to the saved logits
logits_paths = [
    f'/path/to/logits_fold_{fold}.csv'
    for fold in range(1, 6)
]

# Initialize lists to store true labels and predictions
all_true_labels = []
all_predicted_labels = []

# Load the true labels from the test datasets
for fold, test_data in enumerate(test_data_list, start=1):
    # Drop the 'Text_desc' column to get the true labels
    true_labels = test_data.drop(columns=['Text_desc']).values
    all_true_labels.append(true_labels)
    
    # Save true labels for each fold
    true_labels_path = f'/path/to/true_labels_fold_{fold}.csv'
    np.savetxt(true_labels_path, true_labels, delimiter=",")

In [None]:
from sklearn.metrics import hamming_loss

f1_micro_scores = []
hamming_losses = []

# Load the logits and calculate predictions
for fold, logits_path in enumerate(logits_paths, start=1):
    logits = np.loadtxt(logits_path, delimiter=",")
    probabilities = sigmoid(logits)
    predictions = (probabilities > 0.11).astype(int)
    
    # Load true labels for the current fold
    true_labels_path = f'/path/to/true_labels_fold_{fold}.csv'
    true_labels = np.loadtxt(true_labels_path, delimiter=",")
    
    # Calculate F1 micro score and Hamming loss for the current fold
    f1_micro = f1_score(true_labels, predictions, average='micro')
    hamming_loss_value = hamming_loss(true_labels, predictions)
    
    f1_micro_scores.append(f1_micro)
    hamming_losses.append(hamming_loss_value)

# Print the F1 micro scores and Hamming losses for each fold
for fold, (f1_micro, hamming_loss_value) in enumerate(zip(f1_micro_scores, hamming_losses), start=1):
    print(f"Fold {fold} - F1 Micro Score: {f1_micro}, Hamming Loss: {hamming_loss_value}")
    
# Calculate min, max, and average F1 micro scores and Hamming losses across all folds
min_f1_micro = min(f1_micro_scores)
max_f1_micro = max(f1_micro_scores)
avg_f1_micro = sum(f1_micro_scores) / len(f1_micro_scores)

min_hamming_loss = min(hamming_losses)
max_hamming_loss = max(hamming_losses)
avg_hamming_loss = sum(hamming_losses) / len(hamming_losses)

print(f"Min F1 Micro Score: {min_f1_micro}")
print(f"Max F1 Micro Score: {max_f1_micro}")
print(f"Avg F1 Micro Score: {avg_f1_micro}")

print(f"Min Hamming Loss: {min_hamming_loss}")
print(f"Max Hamming Loss: {max_hamming_loss}")
print(f"Avg Hamming Loss: {avg_hamming_loss}")

### Create min, max, avg f1

In [None]:
from sklearn.metrics import f1_score, hamming_loss
import pandas as pd

f1_scores_per_label = []

# Load the logits and calculate predictions
for fold, logits_path in enumerate(logits_paths, start=1):
    logits = np.loadtxt(logits_path, delimiter=",")
    probabilities = sigmoid(logits)
    predictions = (probabilities > 0.11).astype(int)
    
    # Load true labels for the current fold
    true_labels_path = f'/path/to/true_labels_fold_{fold}.csv'
    true_labels = np.loadtxt(true_labels_path, delimiter=",")
    
    # Calculate F1 score for each label
    f1_scores = f1_score(true_labels, predictions, average=None)
    f1_scores_per_label.append(f1_scores)
    
    # Calculate Hamming loss for the current fold
    hamming_loss_value = hamming_loss(true_labels, predictions)
    hamming_losses.append(hamming_loss_value)

# Create a DataFrame to store F1 scores and Hamming losses
f1_scores_df = pd.DataFrame(f1_scores_per_label, columns=label_cols)

# Calculate min, max, and average F1 scores for each label
min_f1_scores = f1_scores_df[label_cols].min()
max_f1_scores = f1_scores_df[label_cols].max()
avg_f1_scores = f1_scores_df[label_cols].mean()

# Create a new DataFrame to store these statistics
f1_stats_df = pd.DataFrame({
    'Label': label_cols,
    'Min F1 Score': min_f1_scores,
    'Max F1 Score': max_f1_scores,
    'Average F1 Score': avg_f1_scores
})

# Print the DataFrame
print(f1_stats_df)

f1_stats_df.to_csv('/path/to/f1_stats.csv', index=False)


### Report AUROC, AUPRC, and F1 for each label

In [None]:
from sklearn.metrics import f1_score, roc_auc_score, average_precision_score
import pandas as pd
import numpy as np

f1_scores_per_label = []
auroc_scores_per_label = []
auprc_scores_per_label = []

# Load the logits and calculate predictions
for fold, logits_path in enumerate(logits_paths, start=1):
    logits = np.loadtxt(logits_path, delimiter=",")
    probabilities = sigmoid(logits)
    predictions = (probabilities > 0.11).astype(int)
    
    # Load true labels for the current fold
    true_labels_path = f'/path/to/true_labels_fold_{fold}.csv'
    true_labels = np.loadtxt(true_labels_path, delimiter=",")
    
    # Calculate F1 score for each label
    f1_scores = f1_score(true_labels, predictions, average=None)
    f1_scores_per_label.append(f1_scores)
    
    # Calculate AUROC and AUPRC for each label
    auroc_scores = [roc_auc_score(true_labels[:, i], probabilities[:, i]) for i in range(len(label_cols))]
    auprc_scores = [average_precision_score(true_labels[:, i], probabilities[:, i]) for i in range(len(label_cols))]
    
    auroc_scores_per_label.append(auroc_scores)
    auprc_scores_per_label.append(auprc_scores)

# Create DataFrames to store F1, AUROC, and AUPRC scores
f1_scores_df = pd.DataFrame(f1_scores_per_label, columns=label_cols)
auroc_scores_df = pd.DataFrame(auroc_scores_per_label, columns=label_cols)
auprc_scores_df = pd.DataFrame(auprc_scores_per_label, columns=label_cols)

# Calculate average scores across all folds
avg_f1_scores = f1_scores_df.mean()
avg_auroc_scores = auroc_scores_df.mean()
avg_auprc_scores = auprc_scores_df.mean()

# Create a DataFrame to store the average scores
avg_scores_df = pd.DataFrame({
    'Label': label_cols,
    'Average F1 Score': avg_f1_scores,
    'Average AUROC': avg_auroc_scores,
    'Average AUPRC': avg_auprc_scores
})

# Print the DataFrame
print(avg_scores_df)

# Optionally, save the DataFrame to a CSV file
avg_scores_df.to_csv('/path/to/avg_scores.csv', index=False)