In [2]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import pandas as pd
  
# adding the root to the path
path_to_add = r'/home/etaylor/code_projects/dubby'  
  
# Add the directory to sys.path

sys.path.insert(0, path_to_add)

In [3]:
import torch

print(torch.cuda.is_available())  # Should print True if CUDA is set up correctly

print(torch.version.cuda)  # Should print '11.4' matching the CUDA version supported by the driver

print(torch.cuda.get_device_name(0))  # Should print the name of your GPU

True
11.8
NVIDIA GeForce GTX 1080 Ti


In [4]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset('csv', data_files={'train': '/home/etaylor/code_projects/dubby/data/train.csv',
                                          'test': '/home/etaylor/code_projects/dubby/data/test.csv'})


In [6]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('mistralai/Mistral-7B-v0.1')

def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True)

tokenized_data = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/11828 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Map:   0%|          | 0/3943 [00:00<?, ? examples/s]

In [7]:
tokenized_data['train'][0]['label']

0

In [8]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    'mistralai/Mistral-7B-v0.1', num_labels=2)

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of MistralForSequenceClassification were not initialized from the model checkpoint at mistralai/Mistral-7B-v0.1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
from transformers import TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)
    return {
        "accuracy": acc,
        "f1": f1,
        "precision": precision,
        "recall": recall
    }


training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=1,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data["train"],
    eval_dataset=tokenized_data["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,  # Pass the compute_metrics function here
)

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


OutOfMemoryError: CUDA out of memory. Tried to allocate 224.00 MiB. GPU 0 has a total capacity of 10.92 GiB of which 57.44 MiB is free. Including non-PyTorch memory, this process has 10.86 GiB memory in use. Of the allocated memory 10.39 GiB is allocated by PyTorch, and 1.65 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Start training
trainer.train()

Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
500,0.2574,1.459197,0.617804,0.646327,0.692308,0.606074
1000,0.3889,1.303613,0.618565,0.696162,0.643391,0.758363
1500,0.5472,0.655826,0.633528,0.652226,0.719596,0.596391
2000,0.4611,0.973819,0.621608,0.627186,0.725434,0.552377
2500,0.4207,0.925017,0.63226,0.67706,0.685302,0.669014
3000,0.4055,0.933815,0.62947,0.678831,0.678085,0.679577
3500,0.3117,1.355861,0.629724,0.678839,0.67854,0.679137
4000,0.3007,1.465613,0.630992,0.684724,0.674349,0.695423


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-1000 already exists and is non-empty. Saving will proceed but saved results may be invalid.


TrainOutput(global_step=4437, training_loss=0.37641437316440585, metrics={'train_runtime': 1453.5526, 'train_samples_per_second': 24.412, 'train_steps_per_second': 3.053, 'total_flos': 4700473173909504.0, 'train_loss': 0.37641437316440585, 'epoch': 3.0})

In [11]:
# If memeory GPU error occurs, run the following line
torch.cuda.empty_cache()

### Evaluate the Model

In [37]:
# eval on the test set that was initiate with the trainer
evaluation_results = trainer.evaluate()
print(evaluation_results)

{'eval_loss': 1.5050917863845825, 'eval_accuracy': 0.6289627187420745, 'eval_f1': 0.6768279213607246, 'eval_precision': 0.6793791574279379, 'eval_recall': 0.6742957746478874, 'eval_runtime': 45.4235, 'eval_samples_per_second': 86.805, 'eval_steps_per_second': 10.853, 'epoch': 3.0}
