In [1]:
# Install PyTorch
%pip install torch tensorboard

# Install Hugging Face libraries
%pip install --upgrade transformers datasets accelerate evaluate bitsandbytes huggingface_hub trl peft

# Installing Dependencies


Collecting transformers
  Downloading transformers-4.46.3-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting huggingface_hub
  Downloading huggingface_hub-0.26.3-py3-none-any.whl.metadata (13 kB)
Collecting trl
  Downloading trl-0.12.1-py3-none-any.whl.metadata (10 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-n

In [28]:
import os

# Save the token in an environment variable
os.environ["HF_TOKEN"] = "token"

# Authenticate with Hugging Face
from huggingface_hub import login
login(token=os.environ["HF_TOKEN"])

#Logging into Huggingface to access models

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [2]:
# Installing More Dependencies
import os
import random
import functools
import csv
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
import evaluate

from sklearn.utils import shuffle
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, confusion_matrix, classification_report, balanced_accuracy_score, accuracy_score

from datasets import Dataset, DatasetDict
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)

In [3]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive')

# TODO: Enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cisc6000/assignments/assignment2/'
FOLDERNAME = "Patient_data/"
assert FOLDERNAME is not None, "[!] Enter the foldername."

# Now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

df = pd.read_csv("/content/drive/My Drive/Patient_data/descriptive_text_1.csv")
df

Mounted at /content/drive


Unnamed: 0,summary,label
0,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ..."
1,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ..."
2,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ..."
3,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ..."
4,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ..."
...,...,...
2187,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ..."
2188,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ..."
2189,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ..."
2190,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ..."


In [10]:

df['label'] = df['label'].astype('category').cat.reorder_categories(["Based on the results, we can predict that the patient is not likely to have a relapse in the future.", "Based on the results, we can predict that the patient is highly likely to have a relapse in the future."], ordered=True)
df['target'] = df['label'].cat.codes


df.head()

Unnamed: 0,summary,label,target
0,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ...",1
1,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ...",1
2,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ...",0
3,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ...",1
4,To determine if a person is likely to develop ...,"Based on the results, we can predict that the ...",1


In [11]:
df['label'].cat.categories

Index(['Based on the results, we can predict that the patient is not likely to have a relapse in the future.', 'Based on the results, we can predict that the patient is highly likely to have a relapse in the future.'], dtype='object')

In [12]:
category_map = {code: category for code, category in enumerate(df['label'].cat.categories)}
category_map

{0: 'Based on the results, we can predict that the patient is not likely to have a relapse in the future.',
 1: 'Based on the results, we can predict that the patient is highly likely to have a relapse in the future.'}

In [13]:
# Shuffle the dataset randomly
df = shuffle(df, random_state=42).reset_index(drop=True)

train_end_point = int(df.shape[0] * 0.6)
val_end_point = int(df.shape[0] * 0.8)

df_train = df.iloc[:train_end_point, :]
df_val = df.iloc[train_end_point:val_end_point, :]
df_test = df.iloc[val_end_point:, :]

print(f"Training set shape: {df_train.shape}")
print(f"Validation set shape: {df_val.shape}")
print(f"Test set shape: {df_test.shape}")


Training set shape: (1315, 3)
Validation set shape: (438, 3)
Test set shape: (439, 3)


In [19]:
# Converting pandas DataFrames into Hugging Face Dataset objects:
dataset_train = Dataset.from_pandas(df_train.drop('label',axis=1))
dataset_val = Dataset.from_pandas(df_val.drop('label',axis=1))
dataset_test = Dataset.from_pandas(df_test.drop('label',axis=1))


In [20]:
# Combine them into a single DatasetDict
dataset = DatasetDict({
    'train': dataset_train,
    'val': dataset_val,
    'test': dataset_test
})
dataset

DatasetDict({
    train: Dataset({
        features: ['summary', 'target'],
        num_rows: 1315
    })
    val: Dataset({
        features: ['summary', 'target'],
        num_rows: 438
    })
    test: Dataset({
        features: ['summary', 'target'],
        num_rows: 439
    })
})

In [21]:
dataset['train']

Dataset({
    features: ['summary', 'target'],
    num_rows: 1315
})

In [22]:
df_train.target.value_counts(normalize=True)

Unnamed: 0_level_0,proportion
target,Unnamed: 1_level_1
0,0.637262
1,0.362738


In [23]:
class_weights=(1/df_train.target.value_counts(normalize=True).sort_index()).tolist()
class_weights=torch.tensor(class_weights)
class_weights=class_weights/class_weights.sum()
class_weights

tensor([0.3627, 0.6373])

In [31]:
# Load model
model_name = "meta-llama/Meta-Llama-3.1-8B"

In [25]:
# Quantization Config (for QLORA)
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, # enable 4-bit quantization
    bnb_4bit_quant_type = 'nf4', # information theoretically optimal dtype for normally distributed weights
    bnb_4bit_use_double_quant = True, # quantize quantized weights //insert xzibit meme
    bnb_4bit_compute_dtype = torch.bfloat16 # optimized fp format for ML
)

In [26]:
# Lora Config
lora_config = LoraConfig(
    r = 16, # the dimension of the low-rank matrices
    lora_alpha = 8, # scaling factor for LoRA activations vs pre-trained weight activations
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    lora_dropout = 0.05, # dropout probability of the LoRA layers
    bias = 'none', # whether to train bias weights, set to 'none' for attention layers
    task_type = 'SEQ_CLS'
)

In [57]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from huggingface_hub import login

# Login using your Hugging Face token
login("token")

# Load model

# AutomodelForSequenceClassification
# Num Labels is num of classes

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    num_labels=2
)

model

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3.1-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LlamaForSequenceClassification(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNor

In [58]:
# prepare_model_for_kbit_training() function to preprocess the quantized model for training
model = prepare_model_for_kbit_training(model)
model

LlamaForSequenceClassification(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNor

In [59]:
# get_peft_model prepares a model for training with a PEFT method such as LoRA by wrapping the base model and PEFT configuration with get_peft_model
model = get_peft_model(model, lora_config)
model

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): LlamaForSequenceClassification(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
           

**Load the tokenizer**

Since LLAMA3 pre-training doesn't have EOS token

Set the pad_token_id to eos_token_id
Set pad token ot eos_token

In [60]:
tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)

tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.pad_token = tokenizer.eos_token

**Update some model configs**

In [61]:
# Must use .cache = False as below or it crashes
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False
model.config.pretraining_tp = 1

**Loop through dataset to measure performance before training/fitting the model**

In [62]:
# Use a batch size 32 to kinda vectorize and to avoid memory errors
sentences = df_test.summary.tolist()
sentences[0:2]

['To determine if a person is likely to develop a particular disease, the following variables are considered: The individual is 29.1 years old at their first visit. They identify as Male. Their race is recorded as White. They belong to the Not hispanic or latino ethnic group. They are Not married. Their BMI is 25.6. Smoking history is recorded as No. They have had the disease for 6.7 years at their first visit. The disease category at first visit is classified as Relapsing-Remitting MS. At their first visit, their Expanded Disability Status Scale (EDSS) score was 1.5. Pyramidal function was assessed with a score of 1. Cerebellar function was scored 1. Brainstem function had a score of 0. Sensory function was assessed with a score of 1. Bowel and bladder function received a score of 0. Visual function was scored 0. Mental function was assessed with a score of 0. Looking at their relapse history, this individual has experienced 1 relapses prior to the first visit. They had 0 relapses in 

In [64]:
# Convert summaries to a list
sentences = df_test.summary.tolist()

# Define the batch size
batch_size = 32  # Adjust this based on system's memory capacity

# Initialize an empty list to store the model outputs
all_outputs = []

# Process the sentences in batches
for i in range(0, len(sentences), batch_size):
    # Get the batch of sentences
    batch_sentences = sentences[i:i + batch_size]

    # Tokenize the batch
    inputs = tokenizer(batch_sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)

    # Move tensors to the device where the model is (e.g., GPU or CPU)
    inputs = {k: v.to('cuda' if torch.cuda.is_available() else 'cpu') for k, v in inputs.items()}

    # Perform inference and store the logits
    with torch.no_grad():
        outputs = model(**inputs)
        all_outputs.append(outputs['logits'])

In [65]:
# Concatenate all outputs into a single tensor
final_outputs = torch.cat(all_outputs, dim=0)
final_outputs

tensor([[ 2.4003e+00,  9.3907e-01],
        [ 3.0431e+00, -7.0160e-02],
        [ 3.0027e+00,  7.7531e-01],
        [ 1.9920e+00,  9.5755e-01],
        [ 1.8055e+00,  1.2927e+00],
        [ 2.9928e+00,  1.9534e+00],
        [ 2.6390e+00,  1.3832e+00],
        [ 2.1690e+00,  1.3328e+00],
        [ 2.6241e+00,  4.4876e-01],
        [ 2.1174e+00,  3.4218e-01],
        [ 1.9475e+00,  1.4451e+00],
        [ 2.9012e+00,  3.7184e-01],
        [ 2.8841e+00,  4.9211e-01],
        [ 2.0415e+00,  1.1682e+00],
        [ 1.4118e+00,  1.0619e+00],
        [ 1.7116e+00, -1.5447e-01],
        [ 2.5283e+00,  9.0177e-01],
        [ 2.9704e+00,  1.9014e+00],
        [ 2.2683e+00,  1.1072e+00],
        [ 2.9824e+00,  8.9920e-01],
        [ 2.4634e+00, -1.5551e-01],
        [ 2.5132e+00,  1.4384e+00],
        [ 1.6520e+00,  7.1460e-01],
        [ 2.0033e+00,  1.2269e+00],
        [ 2.2127e+00,  1.5605e+00],
        [ 3.2784e+00, -2.2300e-01],
        [ 2.8269e+00,  1.2898e+00],
        [ 2.7615e+00,  8.360

In [66]:
# argmax to get class prediction
final_outputs.argmax(axis=1)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [67]:
# Move to CPU so we can use numpy and set prediction column to it
df_test['predictions']=final_outputs.argmax(axis=1).cpu().numpy()
df_test['predictions']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['predictions']=final_outputs.argmax(axis=1).cpu().numpy()


Unnamed: 0,predictions
1753,0
1754,0
1755,0
1756,0
1757,0
...,...
2187,0
2188,0
2189,0
2190,0


In [68]:
df_test['predictions'].value_counts()

Unnamed: 0_level_0,count
predictions,Unnamed: 1_level_1
0,436
1,3


In [69]:
# Use category map to get back category names
df_test['predictions']=df_test['predictions'].apply(lambda l:category_map[l])
df_test['predictions']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['predictions']=df_test['predictions'].apply(lambda l:category_map[l])


Unnamed: 0,predictions
1753,"Based on the results, we can predict that the ..."
1754,"Based on the results, we can predict that the ..."
1755,"Based on the results, we can predict that the ..."
1756,"Based on the results, we can predict that the ..."
1757,"Based on the results, we can predict that the ..."
...,...
2187,"Based on the results, we can predict that the ..."
2188,"Based on the results, we can predict that the ..."
2189,"Based on the results, we can predict that the ..."
2190,"Based on the results, we can predict that the ..."


In [70]:
# Analyze performance
def get_performance_metrics(df_test):
  y_test = df_test.label
  y_pred = df_test.predictions

  print("Confusion Matrix:")
  print(confusion_matrix(y_test, y_pred))

  print("\nClassification Report:")
  print(classification_report(y_test, y_pred))

  print("Balanced Accuracy Score:", balanced_accuracy_score(y_test, y_pred))
  print("Accuracy Score:", accuracy_score(y_test, y_pred))

In [71]:
get_performance_metrics(df_test)

Confusion Matrix:
[[  3 161]
 [  0 275]]

Classification Report:
                                                                                                         precision    recall  f1-score   support

Based on the results, we can predict that the patient is highly likely to have a relapse in the future.       1.00      0.02      0.04       164
   Based on the results, we can predict that the patient is not likely to have a relapse in the future.       0.63      1.00      0.77       275

                                                                                               accuracy                           0.63       439
                                                                                              macro avg       0.82      0.51      0.40       439
                                                                                           weighted avg       0.77      0.63      0.50       439

Balanced Accuracy Score: 0.5091463414634146
Accuracy Score: 0.

In [72]:
# Create LLAMA tokenized dataset which will house our train/val parts during the training process but after applying tokenization
MAX_LEN = 512
col_to_delete = ['summary']

def llama_preprocessing_function(examples):
    return tokenizer(examples['summary'], truncation=True, max_length=MAX_LEN)

tokenized_datasets = dataset.map(llama_preprocessing_function, batched=True, remove_columns=col_to_delete)
tokenized_datasets = tokenized_datasets.rename_column("target", "label")
tokenized_datasets.set_format("torch")

Map:   0%|          | 0/1315 [00:00<?, ? examples/s]

Map:   0%|          | 0/438 [00:00<?, ? examples/s]

Map:   0%|          | 0/439 [00:00<?, ? examples/s]

In [73]:
# Data Collator
collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [74]:
# Metrics to compute for evaluation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {'balanced_accuracy' : balanced_accuracy_score(predictions, labels),'accuracy':accuracy_score(predictions,labels)}

**Define custom trainer with classweights**

In [75]:
class CustomTrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        # Ensure class weights are a tensor
        if class_weights is not None:
            self.class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.args.device)
        else:
            self.class_weights = None

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        """
        Override the compute_loss method to handle class weights.
        """
        # Extract labels and ensure they are long type for cross_entropy
        labels = inputs.pop("labels").long()

        # Forward pass
        outputs = model(**inputs)

        # Extract logits assuming they are directly outputted by the model
        logits = outputs.get("logits")

        # Compute custom loss with class weights for imbalanced data handling
        if self.class_weights is not None:
            loss = F.cross_entropy(logits, labels, weight=self.class_weights)
        else:
            loss = F.cross_entropy(logits, labels)

        # Return loss and optionally outputs
        return (loss, outputs) if return_outputs else loss

In [76]:
# Set the API key directly
os.environ["WANDB_API_KEY"] = "819e54700607f0b3534b1683c1621569a2546c15"

In [77]:
# Define training args
training_args = TrainingArguments(
    output_dir = 'text1_relapse_classification_1',
    learning_rate = 1e-4,
    per_device_train_batch_size = 8,
    per_device_eval_batch_size = 8,
    num_train_epochs = 2,
    weight_decay = 0.01,
    evaluation_strategy = 'epoch',
    save_strategy = 'epoch',
    load_best_model_at_end = True
)



In [78]:
# Define custom trainer
trainer = CustomTrainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_datasets['train'],
    eval_dataset = tokenized_datasets['val'],
    tokenizer = tokenizer,
    data_collator = collate_fn,
    compute_metrics = compute_metrics,
    class_weights=class_weights,
)

  super().__init__(*args, **kwargs)
  self.class_weights = torch.tensor(class_weights, dtype=torch.float32).to(self.args.device)


In [79]:
# Run trainer
train_result = trainer.train()

  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Balanced Accuracy,Accuracy
1,No log,0.666782,0.629697,0.579909
2,No log,0.670938,0.584276,0.630137


  return fn(*args, **kwargs)


In [80]:
# Checking results
def make_predictions(model,df_test):


  # Convert summaries to a list
  sentences = df_test.summary.tolist()

  # Define the batch size
  batch_size = 32  # You can adjust this based on your system's memory capacity

  # Initialize an empty list to store the model outputs
  all_outputs = []

  # Process the sentences in batches
  for i in range(0, len(sentences), batch_size):
      # Get the batch of sentences
      batch_sentences = sentences[i:i + batch_size]

      # Tokenize the batch
      inputs = tokenizer(batch_sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)

      # Move tensors to the device where the model is (e.g., GPU or CPU)
      inputs = {k: v.to('cuda' if torch.cuda.is_available() else 'cpu') for k, v in inputs.items()}

      # Perform inference and store the logits
      with torch.no_grad():
          outputs = model(**inputs)
          all_outputs.append(outputs['logits'])
  final_outputs = torch.cat(all_outputs, dim=0)
  df_test['predictions']=final_outputs.argmax(axis=1).cpu().numpy()
  df_test['predictions']=df_test['predictions'].apply(lambda l:category_map[l])


make_predictions(model,df_test)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['predictions']=final_outputs.argmax(axis=1).cpu().numpy()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['predictions']=df_test['predictions'].apply(lambda l:category_map[l])


In [81]:
get_performance_metrics(df_test)

Confusion Matrix:
[[124  40]
 [147 128]]

Classification Report:
                                                                                                         precision    recall  f1-score   support

Based on the results, we can predict that the patient is highly likely to have a relapse in the future.       0.46      0.76      0.57       164
   Based on the results, we can predict that the patient is not likely to have a relapse in the future.       0.76      0.47      0.58       275

                                                                                               accuracy                           0.57       439
                                                                                              macro avg       0.61      0.61      0.57       439
                                                                                           weighted avg       0.65      0.57      0.57       439

Balanced Accuracy Score: 0.6107760532150777
Accuracy Score: 0.

In [82]:
# Saving model trainer state and model adapters
metrics = train_result.metrics
max_train_samples = len(dataset_train)
metrics["train_samples"] = min(max_train_samples, len(dataset_train))
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

***** train metrics *****
  epoch                    =        2.0
  total_flos               = 37224684GF
  train_loss               =      0.764
  train_runtime            = 1:41:06.99
  train_samples            =       1315
  train_samples_per_second =      0.433
  train_steps_per_second   =      0.054


In [83]:
# Saving the adapter model
trainer.save_model("text1_adapter_model_1")

In [88]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [90]:
!cp -r text1_relapse_classification_1 /content/drive/MyDrive/Patient_data

In [91]:
!cp -r text1_adapter_model_1 /content/drive/MyDrive/Patient_data/