In [1]:
# Install PyTorch
%pip install torch tensorboard

# Install Hugging Face libraries
%pip install --upgrade transformers datasets accelerate evaluate bitsandbytes huggingface_hub trl peft

# Installing Dependencies


Collecting transformers
  Downloading transformers-4.47.0-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting accelerate
  Downloading accelerate-1.2.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Collecting huggingface_hub
  Downloading huggingface_hub-0.26.5-py3-none-any.whl.metadata (13 kB)
Collecting trl
  Downloading trl-0.12.2-py3-none-any.whl.metadata (11 kB)
Collecting peft
  Downloading peft-0.14.0-py3-none-any.whl.metadata (13 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecti

In [2]:
import os

# Save the token in an environment variable
os.environ["HF_TOKEN"] = "token"

# Authenticate with Hugging Face
from huggingface_hub import login
login(token=os.environ["HF_TOKEN"])

#Logging into Huggingface to access models

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
# Installing More Dependencies
import os
import random
import functools
import csv
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
import evaluate

from sklearn.utils import shuffle
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, confusion_matrix, classification_report, balanced_accuracy_score, accuracy_score

from datasets import Dataset, DatasetDict
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    EarlyStoppingCallback
)

In [4]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive')

# TODO: Enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cisc6000/assignments/assignment2/'
FOLDERNAME = "Patient_data/"
assert FOLDERNAME is not None, "[!] Enter the foldername."

# Now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

df = pd.read_csv("/content/drive/My Drive/Patient_data/descriptive_text_2.csv")
df

Mounted at /content/drive


Unnamed: 0,summary,label
0,Age: 31.8 years Sex: Female Race: White Ethnic...,Predicted future relapse: Yes
1,Age: 34.0 years Sex: Female Race: White Ethnic...,Predicted future relapse: Yes
2,Age: 18.1 years Sex: Female Race: White Ethnic...,Predicted future relapse: No
3,Age: 20.3 years Sex: Male Race: White Ethnicit...,Predicted future relapse: Yes
4,Age: 26.7 years Sex: Female Race: White Ethnic...,Predicted future relapse: Yes
...,...,...
2187,Age: 41.2 years Sex: Female Race: White Ethnic...,Predicted future relapse: Yes
2188,Age: 55.0 years Sex: Female Race: White Ethnic...,Predicted future relapse: No
2189,Age: 26.3 years Sex: Male Race: White Ethnicit...,Predicted future relapse: No
2190,Age: 47.1 years Sex: Female Race: White Ethnic...,Predicted future relapse: Yes


In [5]:

df['label'] = df['label'].astype('category').cat.reorder_categories(["Predicted future relapse: No","Predicted future relapse: Yes"], ordered=True)
df['target'] = df['label'].cat.codes


df.head()

Unnamed: 0,summary,label,target
0,Age: 31.8 years Sex: Female Race: White Ethnic...,Predicted future relapse: Yes,1
1,Age: 34.0 years Sex: Female Race: White Ethnic...,Predicted future relapse: Yes,1
2,Age: 18.1 years Sex: Female Race: White Ethnic...,Predicted future relapse: No,0
3,Age: 20.3 years Sex: Male Race: White Ethnicit...,Predicted future relapse: Yes,1
4,Age: 26.7 years Sex: Female Race: White Ethnic...,Predicted future relapse: Yes,1


In [6]:
df['label'].cat.categories

Index(['Predicted future relapse: No', 'Predicted future relapse: Yes'], dtype='object')

In [7]:
category_map = {code: category for code, category in enumerate(df['label'].cat.categories)}
category_map

{0: 'Predicted future relapse: No', 1: 'Predicted future relapse: Yes'}

In [8]:
# Shuffle the dataset randomly
df = shuffle(df, random_state=42).reset_index(drop=True)

train_end_point = int(df.shape[0] * 0.6)
val_end_point = int(df.shape[0] * 0.8)

df_train = df.iloc[:train_end_point, :]
df_val = df.iloc[train_end_point:val_end_point, :]
df_test = df.iloc[val_end_point:, :]

print(f"Training set shape: {df_train.shape}")
print(f"Validation set shape: {df_val.shape}")
print(f"Test set shape: {df_test.shape}")


Training set shape: (1315, 3)
Validation set shape: (438, 3)
Test set shape: (439, 3)


In [9]:
# Converting pandas DataFrames into Hugging Face Dataset objects:
dataset_train = Dataset.from_pandas(df_train.drop('label',axis=1))
dataset_val = Dataset.from_pandas(df_val.drop('label',axis=1))
dataset_test = Dataset.from_pandas(df_test.drop('label',axis=1))


In [10]:
# Combine them into a single DatasetDict
dataset = DatasetDict({
    'train': dataset_train,
    'val': dataset_val,
    'test': dataset_test
})
dataset

DatasetDict({
    train: Dataset({
        features: ['summary', 'target'],
        num_rows: 1315
    })
    val: Dataset({
        features: ['summary', 'target'],
        num_rows: 438
    })
    test: Dataset({
        features: ['summary', 'target'],
        num_rows: 439
    })
})

In [11]:
dataset['train']

Dataset({
    features: ['summary', 'target'],
    num_rows: 1315
})

In [12]:
df_train.target.value_counts(normalize=True)

Unnamed: 0_level_0,proportion
target,Unnamed: 1_level_1
0,0.637262
1,0.362738


In [13]:
# class_weights=(1/df_train.target.value_counts(normalize=True).sort_index()).tolist()
# class_weights=torch.tensor(class_weights)
# class_weights=class_weights/class_weights.sum()
# class_weights

# Manually set weights for each class (higher for class 1)
class_weights = torch.tensor([0.3, 0.7], dtype=torch.float32)
class_weights

tensor([0.3000, 0.7000])

In [14]:
# Load model
model_name = "meta-llama/Meta-Llama-3.1-8B"

In [15]:
# Quantization Config (for QLORA)
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, # enable 4-bit quantization
    bnb_4bit_quant_type = 'nf4', # information theoretically optimal dtype for normally distributed weights
    bnb_4bit_use_double_quant = True, # quantize quantized weights //insert xzibit meme
    bnb_4bit_compute_dtype = torch.bfloat16 # optimized fp format for ML
)

In [16]:
# Lora Config
lora_config = LoraConfig(
    r = 16, # the dimension of the low-rank matrices
    lora_alpha = 8, # scaling factor for LoRA activations vs pre-trained weight activations
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    lora_dropout = 0.05, # dropout probability of the LoRA layers
    bias = 'none', # whether to train bias weights, set to 'none' for attention layers
    task_type = 'SEQ_CLS'
)

In [17]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from huggingface_hub import login

# Login using your Hugging Face token
login("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")

# Load model

# AutomodelForSequenceClassification
# Num Labels is num of classes

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    num_labels=2
)

model

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now default to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3.1-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LlamaForSequenceClassification(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNor

In [18]:
# prepare_model_for_kbit_training() function to preprocess the quantized model for training
model = prepare_model_for_kbit_training(model)
model

LlamaForSequenceClassification(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNor

In [19]:
# get_peft_model prepares a model for training with a PEFT method such as LoRA by wrapping the base model and PEFT configuration with get_peft_model
model = get_peft_model(model, lora_config)
model

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): LlamaForSequenceClassification(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
           

**Load the tokenizer**

Since LLAMA3 pre-training doesn't have EOS token

Set the pad_token_id to eos_token_id
Set pad token ot eos_token

In [20]:
tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)

tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

**Update some model configs**

In [21]:
# Must use .cache = False as below or it crashes
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False
model.config.pretraining_tp = 1

**Loop through dataset to measure performance before training/fitting the model**

In [22]:
# Use a batch size 32 to kinda vectorize and to avoid memory errors
sentences = df_test.summary.tolist()
sentences[0:2]

['Age: 29.1 years Sex: Male Race: White Ethnicity: Not hispanic or latino Family History of MS: No BMI: 25.6 Smoking History: No Disease Duration at First Visit: 6.7 years Disease Category: Relapsing-Remitting MS EDSS Score: 1.5 Functional Scores: - Pyramidal: 1 - Cerebellar: 1 - Brainstem: 0 - Sensory: 1 - Bowel/Bladder: 0 - Visual: 0 - Mental: 0 Relapse History: - Total relapses before FV: 1 - Relapses in the past 3 years: 0 - Relapses in the past year: 0 - Time since last attack: 6.7 years Treatment Before First Visit: No Relapse in the year before FV: No Relapse in the 3 years before FV: No Injectable Medication Treatment: Yes New T2 Lesion in the Past Year: Unknown New Gad Lesion in the Past Year: Unknown',
 'Age: 24.3 years Sex: Female Race: American Indian or Alaska Native Ethnicity: Not hispanic or latino Family History of MS: No Smoking History: No Disease Duration at First Visit: 0.3 years Disease Category: Suspected MS EDSS Score: 1.5 Functional Scores: - Pyramidal: 0 - Cere

In [23]:
# Convert summaries to a list
sentences = df_test.summary.tolist()

# Define the batch size
batch_size = 32  # Adjust this based on system's memory capacity

# Initialize an empty list to store the model outputs
all_outputs = []

# Process the sentences in batches
for i in range(0, len(sentences), batch_size):
    # Get the batch of sentences
    batch_sentences = sentences[i:i + batch_size]

    # Tokenize the batch
    inputs = tokenizer(batch_sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)

    # Move tensors to the device where the model is (e.g., GPU or CPU)
    inputs = {k: v.to('cuda' if torch.cuda.is_available() else 'cpu') for k, v in inputs.items()}

    # Perform inference and store the logits
    with torch.no_grad():
        outputs = model(**inputs)
        all_outputs.append(outputs['logits'])

In [24]:
# Concatenate all outputs into a single tensor
final_outputs = torch.cat(all_outputs, dim=0)
final_outputs

tensor([[-1.4919,  0.3719],
        [-1.6639,  0.8630],
        [-1.7732,  0.8791],
        [-1.7281,  0.2872],
        [-1.9501,  0.6226],
        [-1.4233,  1.4325],
        [-1.7563,  0.9827],
        [-1.8891,  0.8862],
        [-1.0563,  1.2468],
        [-1.6157,  0.6952],
        [-1.9152,  1.0437],
        [-1.1767,  0.9050],
        [-1.4802,  1.0657],
        [-1.8667,  0.6839],
        [-1.9542,  0.8963],
        [-2.6241,  0.7286],
        [-2.1943,  0.5828],
        [-1.4803,  1.1287],
        [-1.8039,  0.5488],
        [-2.3754,  0.8596],
        [-1.3378,  0.9587],
        [-2.0042,  0.7366],
        [-1.9821,  1.0835],
        [-1.8462,  0.7456],
        [-1.7659,  0.7237],
        [-0.7409,  0.5229],
        [-1.9723,  0.4763],
        [-1.6985,  0.1498],
        [-2.2096,  0.7905],
        [-1.8059,  0.5604],
        [-1.8276,  0.7805],
        [-1.5421,  1.1717],
        [-1.5501,  0.6543],
        [-1.6950,  0.3908],
        [-2.2608,  0.1690],
        [-1.5771,  1

In [25]:
# argmax to get class prediction
final_outputs.argmax(axis=1)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [26]:
# Move to CPU so we can use numpy and set prediction column to it
df_test['predictions']=final_outputs.argmax(axis=1).cpu().numpy()
df_test['predictions']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['predictions']=final_outputs.argmax(axis=1).cpu().numpy()


Unnamed: 0,predictions
1753,1
1754,1
1755,1
1756,1
1757,1
...,...
2187,1
2188,1
2189,1
2190,1


In [27]:
df_test['predictions'].value_counts()

Unnamed: 0_level_0,count
predictions,Unnamed: 1_level_1
1,439


In [28]:
# Use category map to get back category names
df_test['predictions']=df_test['predictions'].apply(lambda l:category_map[l])
df_test['predictions']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['predictions']=df_test['predictions'].apply(lambda l:category_map[l])


Unnamed: 0,predictions
1753,Predicted future relapse: Yes
1754,Predicted future relapse: Yes
1755,Predicted future relapse: Yes
1756,Predicted future relapse: Yes
1757,Predicted future relapse: Yes
...,...
2187,Predicted future relapse: Yes
2188,Predicted future relapse: Yes
2189,Predicted future relapse: Yes
2190,Predicted future relapse: Yes


In [29]:
# Analyze performance
def get_performance_metrics(df_test):
  y_test = df_test.label
  y_pred = df_test.predictions

  print("Confusion Matrix:")
  print(confusion_matrix(y_test, y_pred))

  print("\nClassification Report:")
  print(classification_report(y_test, y_pred))

  print("Balanced Accuracy Score:", balanced_accuracy_score(y_test, y_pred))
  print("Accuracy Score:", accuracy_score(y_test, y_pred))

In [30]:
get_performance_metrics(df_test)

Confusion Matrix:
[[  0 275]
 [  0 164]]

Classification Report:
                               precision    recall  f1-score   support

 Predicted future relapse: No       0.00      0.00      0.00       275
Predicted future relapse: Yes       0.37      1.00      0.54       164

                     accuracy                           0.37       439
                    macro avg       0.19      0.50      0.27       439
                 weighted avg       0.14      0.37      0.20       439

Balanced Accuracy Score: 0.5
Accuracy Score: 0.3735763097949886


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [31]:
# Create LLAMA tokenized dataset which will house our train/val parts during the training process but after applying tokenization
MAX_LEN = 512
col_to_delete = ['summary']

def llama_preprocessing_function(examples):
    return tokenizer(examples['summary'], truncation=True, max_length=MAX_LEN)

tokenized_datasets = dataset.map(llama_preprocessing_function, batched=True, remove_columns=col_to_delete)
tokenized_datasets = tokenized_datasets.rename_column("target", "label")
tokenized_datasets.set_format("torch")

Map:   0%|          | 0/1315 [00:00<?, ? examples/s]

Map:   0%|          | 0/438 [00:00<?, ? examples/s]

Map:   0%|          | 0/439 [00:00<?, ? examples/s]

In [32]:
# Data Collator
collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [33]:
# Metrics to compute for evaluation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {'balanced_accuracy' : balanced_accuracy_score(predictions, labels),'accuracy':accuracy_score(predictions,labels)}

**Define custom trainer with classweights**

In [34]:
class CustomTrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        # Ensure class weights are a tensor
        if class_weights is not None:
            self.class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.args.device)
        else:
            self.class_weights = None

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        """
        Override the compute_loss method to handle class weights.
        """
        # Extract labels and ensure they are long type for cross_entropy
        labels = inputs.pop("labels").long()

        # Forward pass
        outputs = model(**inputs)

        # Extract logits assuming they are directly outputted by the model
        logits = outputs.get("logits")

        # Compute custom loss with class weights for imbalanced data handling
        if self.class_weights is not None:
            loss = F.cross_entropy(logits, labels, weight=self.class_weights)
        else:
            loss = F.cross_entropy(logits, labels)

        # Return loss and optionally outputs
        return (loss, outputs) if return_outputs else loss

In [35]:
# Set the API key directly
os.environ["WANDB_API_KEY"] = "819e54700607f0b3534b1683c1621569a2546c15"

In [36]:
# Define training args
training_args = TrainingArguments(
    output_dir = 'text2_relapse_classification_v3',
    learning_rate = 1e-4,
    per_device_train_batch_size = 8,
    per_device_eval_batch_size = 8,
    num_train_epochs = 20,
    weight_decay = 0.01,
    evaluation_strategy = 'epoch',
    save_strategy = 'epoch',
    load_best_model_at_end = True,
    save_total_limit=2,
    fp16=True,
    metric_for_best_model='eval_loss',
    greater_is_better=False
)

# # Define training args
# training_args = TrainingArguments(
#     output_dir='text2_relapse_classification_v2',  # Directory for model outputs
#     learning_rate=1e-5,  # Reduced learning rate
#     per_device_train_batch_size=4,  # Smaller batch size for frequent updates
#     per_device_eval_batch_size=8,  # Evaluation batch size
#     gradient_accumulation_steps=2,  # Simulates a batch size of 8
#     num_train_epochs=20,  # Increased number of epochs
#     weight_decay=0.01,  # Regularization strength
#     evaluation_strategy='epoch',  # Evaluate after every epoch
#     save_strategy='epoch',  # Save checkpoints every epoch
#     load_best_model_at_end=True,  # Load the best model checkpoint
#     save_total_limit=2,  # Keep only the 2 most recent checkpoints
#     lr_scheduler_type="linear",  # Linear decay scheduler
#     warmup_steps=500,  # Gradual learning rate warmup
#     fp16=True,  # Enable mixed precision for faster training
#     logging_dir='./logs',  # TensorBoard log directory
#     logging_steps=50,  # Log every 50 steps
#     metric_for_best_model='eval_loss',  # Use validation loss for selecting the best model
#     greater_is_better=False  # Lower values of loss are better
# )



In [37]:
callbacks = [
    EarlyStoppingCallback(
        early_stopping_patience=2,  # Number of evaluation steps without improvement
        early_stopping_threshold=1e-5  # Minimum change to qualify as an improvement
    )
]

In [38]:
# Define custom trainer
trainer = CustomTrainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_datasets['train'],
    eval_dataset = tokenized_datasets['val'],
    tokenizer = tokenizer,
    data_collator = collate_fn,
    compute_metrics = compute_metrics,
    class_weights=class_weights,
)

  super().__init__(*args, **kwargs)
  self.class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.args.device)


In [None]:
# Run trainer
train_result = trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33maakrutikatre[0m ([33maakrutikatre-fordham-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Balanced Accuracy,Accuracy
1,No log,0.667674,0.589893,0.591324
2,No log,0.666562,0.617222,0.646119
3,No log,0.72448,0.607015,0.424658
4,0.760500,0.892386,0.612651,0.431507
5,0.760500,0.664125,0.569028,0.586758
6,0.760500,0.994004,0.830275,0.6621
7,0.716900,0.815811,0.67953,0.680365
8,0.716900,0.701348,0.651776,0.522831
9,0.716900,0.638785,0.646593,0.605023
10,0.726200,0.683599,0.656376,0.694064


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


In [None]:
# Checking results
def make_predictions(model,df_test):


  # Convert summaries to a list
  sentences = df_test.summary.tolist()

  # Define the batch size
  batch_size = 32  # You can adjust this based on your system's memory capacity

  # Initialize an empty list to store the model outputs
  all_outputs = []

  # Process the sentences in batches
  for i in range(0, len(sentences), batch_size):
      # Get the batch of sentences
      batch_sentences = sentences[i:i + batch_size]

      # Tokenize the batch
      inputs = tokenizer(batch_sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)

      # Move tensors to the device where the model is (e.g., GPU or CPU)
      inputs = {k: v.to('cuda' if torch.cuda.is_available() else 'cpu') for k, v in inputs.items()}

      # Perform inference and store the logits
      with torch.no_grad():
          outputs = model(**inputs)
          all_outputs.append(outputs['logits'])

  final_outputs = torch.cat(all_outputs, dim=0)
  df_test['predictions']=final_outputs.argmax(axis=1).cpu().numpy()
  df_test['predictions']=df_test['predictions'].apply(lambda l:category_map[l])


make_predictions(model,df_test)

In [None]:
get_performance_metrics(df_test)

In [None]:
# Saving model trainer state and model adapters
metrics = train_result.metrics
max_train_samples = len(dataset_train)
metrics["train_samples"] = min(max_train_samples, len(dataset_train))
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

In [None]:
# Saving the adapter model
trainer.save_model("text2_adapter_model_v3")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp -r text2_relapse_classification_v3 /content/drive/MyDrive/Patient_data

In [None]:
!cp -r text2_adapter_model_v3 /content/drive/MyDrive/Patient_data/

In [None]:
from google.colab import runtime
runtime.unassign()

Improve loss, increase epochs
use early stopping
delta for early stopping 10^5
save best model
We want class 1 to be really high - putting weight on loss function
Lower threshold
class 1 : 0.8, class 0 : 0.6