In [44]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer
import torch
from datasets import load_metric
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

In [32]:
df = pd.read_csv('../data/lyrics_sentiments.csv', sep=';')
df.head()

Unnamed: 0,label,claude_sent,gpt_sent,gpt_sent_2,lyrics
0,nostalgic,,Positive Emotions,Positive Sentiments,\nThere were bells on a hill\nBut I never hear...
1,intimate,,,Calm and Peaceful,\nAnd I'm standing on a platform\nNow I'm star...
2,bittersweet,Pensive/Emotional,,Complex and Mysterious,\nThey're gonna send us to prison for jerks\nF...
3,wistful,Pensive/Emotional,,Complex and Mysterious,Nothing is ever as good as it was\nAnd what's ...
4,exciting,Positive/Uplifting,,Energetic and Exciting,You become pretty when you draw near love\nLet...


In [85]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_lyrics(lyrics):
    return tokenizer(lyrics, padding='max_length', truncation=True)


In [86]:
class LyricsDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.labels)

In [87]:



def model_generation(df, label):
    # select columns named lyrics and label
    df = df[['lyrics', label]].copy()
    df.dropna(inplace=True)
    
    label_map = [(i, label) for i, label in enumerate(df[label].unique())]
    df[label] = df[label].map({label: i for i, label in label_map})
    
    df['lyrics'] = df['lyrics'].str.replace('\n', ' ')
    
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)



    train_encodings = tokenizer(list(train_df['lyrics']), padding=True, truncation=True)
    val_encodings = tokenizer(list(val_df['lyrics']), padding=True, truncation=True)
    
    class LyricsDataset(torch.utils.data.Dataset):
        def __init__(self, encodings, labels):
            self.encodings = encodings
            self.labels = labels

        def __getitem__(self, idx):
            item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
            item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
            return item

        def __len__(self):
            return len(self.labels)

    train_dataset = LyricsDataset(train_encodings, train_df[label].tolist())
    val_dataset = LyricsDataset(val_encodings, val_df[label].tolist())
    
    accuracy_metric = load_metric("accuracy")

    def compute_metrics(p):
        logits, labels = p
        predictions = np.argmax(logits, axis=1)
        return accuracy_metric.compute(predictions=predictions, references=labels)
    
    n_labels = len(df[label].unique())
    
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=n_labels)

    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch"
    )
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics
    )

    trainer.train()
    
    results = trainer.evaluate()
    
    # save the model
    
    model_name = f"{label}_model"
    model.save_pretrained(model_name)
    
    return(results)

In [88]:
label = 'label'

seeds_accuracy = model_generation(df, label)



You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/474 [00:00<?, ?it/s]

{'loss': 4.6642, 'grad_norm': 6.653538703918457, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.06}
{'loss': 4.6606, 'grad_norm': 6.345301151275635, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.13}
{'loss': 4.6823, 'grad_norm': 7.635947227478027, 'learning_rate': 3e-06, 'epoch': 0.19}
{'loss': 4.6315, 'grad_norm': 5.033979892730713, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.25}
{'loss': 4.6333, 'grad_norm': 5.48937463760376, 'learning_rate': 5e-06, 'epoch': 0.32}
{'loss': 4.6255, 'grad_norm': 7.177362442016602, 'learning_rate': 6e-06, 'epoch': 0.38}
{'loss': 4.6127, 'grad_norm': 5.38101053237915, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.44}
{'loss': 4.6526, 'grad_norm': 7.737237930297852, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.51}
{'loss': 4.6272, 'grad_norm': 5.230264663696289, 'learning_rate': 9e-06, 'epoch': 0.57}
{'loss': 4.6322, 'grad_norm': 6.384013652801514, 'learning_rate': 1e-05, 'epoch': 0.63}
{'loss': 4.6514, 'grad_norm': 6.31190633

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 4.565482139587402, 'eval_accuracy': 0.02694136291600634, 'eval_runtime': 13.7725, 'eval_samples_per_second': 45.816, 'eval_steps_per_second': 2.904, 'epoch': 1.0}
{'loss': 4.588, 'grad_norm': 5.406882286071777, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.01}
{'loss': 4.5361, 'grad_norm': 5.33447265625, 'learning_rate': 1.7000000000000003e-05, 'epoch': 1.08}
{'loss': 4.5162, 'grad_norm': 5.928139686584473, 'learning_rate': 1.8e-05, 'epoch': 1.14}
{'loss': 4.519, 'grad_norm': 5.009749412536621, 'learning_rate': 1.9e-05, 'epoch': 1.2}
{'loss': 4.5183, 'grad_norm': 4.946057319641113, 'learning_rate': 2e-05, 'epoch': 1.27}
{'loss': 4.5434, 'grad_norm': 5.765203952789307, 'learning_rate': 2.1e-05, 'epoch': 1.33}
{'loss': 4.5197, 'grad_norm': 6.280948162078857, 'learning_rate': 2.2000000000000003e-05, 'epoch': 1.39}
{'loss': 4.5215, 'grad_norm': 6.412256240844727, 'learning_rate': 2.3000000000000003e-05, 'epoch': 1.46}
{'loss': 4.5538, 'grad_norm': 5.221756458282471, 'le

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 4.582465648651123, 'eval_accuracy': 0.011093502377179081, 'eval_runtime': 13.9768, 'eval_samples_per_second': 45.146, 'eval_steps_per_second': 2.862, 'epoch': 2.0}
{'loss': 4.6111, 'grad_norm': 4.635292053222656, 'learning_rate': 3.2000000000000005e-05, 'epoch': 2.03}
{'loss': 4.5209, 'grad_norm': 4.951947212219238, 'learning_rate': 3.3e-05, 'epoch': 2.09}
{'loss': 4.4624, 'grad_norm': 5.682195663452148, 'learning_rate': 3.4000000000000007e-05, 'epoch': 2.15}
{'loss': 4.5617, 'grad_norm': 5.325401306152344, 'learning_rate': 3.5e-05, 'epoch': 2.22}
{'loss': 4.4538, 'grad_norm': 7.407135009765625, 'learning_rate': 3.6e-05, 'epoch': 2.28}
{'loss': 4.4972, 'grad_norm': 8.817619323730469, 'learning_rate': 3.7e-05, 'epoch': 2.34}
{'loss': 4.4829, 'grad_norm': 6.228271007537842, 'learning_rate': 3.8e-05, 'epoch': 2.41}
{'loss': 4.4623, 'grad_norm': 6.047979354858398, 'learning_rate': 3.9000000000000006e-05, 'epoch': 2.47}
{'loss': 4.3696, 'grad_norm': 5.072359561920166, 'learnin

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 4.474768161773682, 'eval_accuracy': 0.03645007923930269, 'eval_runtime': 14.2589, 'eval_samples_per_second': 44.253, 'eval_steps_per_second': 2.805, 'epoch': 3.0}
{'train_runtime': 577.0927, 'train_samples_per_second': 13.116, 'train_steps_per_second': 0.821, 'train_loss': 4.5516098601908626, 'epoch': 3.0}


  0%|          | 0/40 [00:00<?, ?it/s]

In [89]:
label = 'claude_sent'

claude_accuracy = model_generation(df, label)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/237 [00:00<?, ?it/s]

{'loss': 1.5297, 'grad_norm': 7.556830883026123, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.13}
{'loss': 1.5686, 'grad_norm': 4.945971488952637, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.25}
{'loss': 1.5724, 'grad_norm': 4.56644868850708, 'learning_rate': 3e-06, 'epoch': 0.38}
{'loss': 1.5082, 'grad_norm': 5.777831077575684, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.51}
{'loss': 1.599, 'grad_norm': 7.73043966293335, 'learning_rate': 5e-06, 'epoch': 0.63}
{'loss': 1.5577, 'grad_norm': 4.820255756378174, 'learning_rate': 6e-06, 'epoch': 0.76}
{'loss': 1.569, 'grad_norm': 3.754528284072876, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.89}


  0%|          | 0/20 [00:00<?, ?it/s]

{'eval_loss': 1.5769931077957153, 'eval_accuracy': 0.2911392405063291, 'eval_runtime': 9.4583, 'eval_samples_per_second': 33.41, 'eval_steps_per_second': 2.115, 'epoch': 1.0}
{'loss': 1.571, 'grad_norm': 4.588228702545166, 'learning_rate': 8.000000000000001e-06, 'epoch': 1.01}
{'loss': 1.531, 'grad_norm': 5.3220696449279785, 'learning_rate': 9e-06, 'epoch': 1.14}
{'loss': 1.5192, 'grad_norm': 4.875345706939697, 'learning_rate': 1e-05, 'epoch': 1.27}
{'loss': 1.5829, 'grad_norm': 11.830602645874023, 'learning_rate': 1.1000000000000001e-05, 'epoch': 1.39}
{'loss': 1.5124, 'grad_norm': 4.759320259094238, 'learning_rate': 1.2e-05, 'epoch': 1.52}
{'loss': 1.5328, 'grad_norm': 7.466587066650391, 'learning_rate': 1.3000000000000001e-05, 'epoch': 1.65}
{'loss': 1.5546, 'grad_norm': 4.177201271057129, 'learning_rate': 1.4000000000000001e-05, 'epoch': 1.77}
{'loss': 1.5724, 'grad_norm': 4.968449115753174, 'learning_rate': 1.5e-05, 'epoch': 1.9}


  0%|          | 0/20 [00:00<?, ?it/s]

{'eval_loss': 1.5674939155578613, 'eval_accuracy': 0.3069620253164557, 'eval_runtime': 7.09, 'eval_samples_per_second': 44.57, 'eval_steps_per_second': 2.821, 'epoch': 2.0}
{'loss': 1.4899, 'grad_norm': 5.359111309051514, 'learning_rate': 1.6000000000000003e-05, 'epoch': 2.03}
{'loss': 1.4849, 'grad_norm': 5.272211074829102, 'learning_rate': 1.7000000000000003e-05, 'epoch': 2.15}
{'loss': 1.4512, 'grad_norm': 6.004427433013916, 'learning_rate': 1.8e-05, 'epoch': 2.28}
{'loss': 1.4284, 'grad_norm': 8.520371437072754, 'learning_rate': 1.9e-05, 'epoch': 2.41}
{'loss': 1.4448, 'grad_norm': 14.146289825439453, 'learning_rate': 2e-05, 'epoch': 2.53}
{'loss': 1.4504, 'grad_norm': 8.017539024353027, 'learning_rate': 2.1e-05, 'epoch': 2.66}
{'loss': 1.4673, 'grad_norm': 17.838943481445312, 'learning_rate': 2.2000000000000003e-05, 'epoch': 2.78}
{'loss': 1.5021, 'grad_norm': 6.6927642822265625, 'learning_rate': 2.3000000000000003e-05, 'epoch': 2.91}


  0%|          | 0/20 [00:00<?, ?it/s]

{'eval_loss': 1.5809441804885864, 'eval_accuracy': 0.2879746835443038, 'eval_runtime': 6.5107, 'eval_samples_per_second': 48.535, 'eval_steps_per_second': 3.072, 'epoch': 3.0}
{'train_runtime': 352.5797, 'train_samples_per_second': 10.738, 'train_steps_per_second': 0.672, 'train_loss': 1.5231522306611267, 'epoch': 3.0}


  0%|          | 0/20 [00:00<?, ?it/s]

In [90]:
label = 'gpt_sent'

gpt_accuracy = model_generation(df, label)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/234 [00:00<?, ?it/s]

{'loss': 1.6184, 'grad_norm': 6.676315784454346, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.13}
{'loss': 1.6198, 'grad_norm': 6.715231895446777, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.26}
{'loss': 1.6605, 'grad_norm': 8.723931312561035, 'learning_rate': 3e-06, 'epoch': 0.38}
{'loss': 1.638, 'grad_norm': 6.926366329193115, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.51}
{'loss': 1.5736, 'grad_norm': 5.122390270233154, 'learning_rate': 5e-06, 'epoch': 0.64}
{'loss': 1.5799, 'grad_norm': 5.905638217926025, 'learning_rate': 6e-06, 'epoch': 0.77}
{'loss': 1.5926, 'grad_norm': 6.000736236572266, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.9}


  0%|          | 0/20 [00:00<?, ?it/s]

{'eval_loss': 1.53018057346344, 'eval_accuracy': 0.32051282051282054, 'eval_runtime': 4.7432, 'eval_samples_per_second': 65.778, 'eval_steps_per_second': 4.217, 'epoch': 1.0}
{'loss': 1.5503, 'grad_norm': 8.733366012573242, 'learning_rate': 8.000000000000001e-06, 'epoch': 1.03}
{'loss': 1.5717, 'grad_norm': 5.31638765335083, 'learning_rate': 9e-06, 'epoch': 1.15}
{'loss': 1.4956, 'grad_norm': 8.342581748962402, 'learning_rate': 1e-05, 'epoch': 1.28}
{'loss': 1.5389, 'grad_norm': 4.520341873168945, 'learning_rate': 1.1000000000000001e-05, 'epoch': 1.41}
{'loss': 1.575, 'grad_norm': 3.8851308822631836, 'learning_rate': 1.2e-05, 'epoch': 1.54}
{'loss': 1.4926, 'grad_norm': 7.710490703582764, 'learning_rate': 1.3000000000000001e-05, 'epoch': 1.67}
{'loss': 1.5546, 'grad_norm': 6.679062843322754, 'learning_rate': 1.4000000000000001e-05, 'epoch': 1.79}
{'loss': 1.5948, 'grad_norm': 5.930582046508789, 'learning_rate': 1.5e-05, 'epoch': 1.92}


  0%|          | 0/20 [00:00<?, ?it/s]

{'eval_loss': 1.5252299308776855, 'eval_accuracy': 0.32051282051282054, 'eval_runtime': 5.1621, 'eval_samples_per_second': 60.441, 'eval_steps_per_second': 3.874, 'epoch': 2.0}
{'loss': 1.5299, 'grad_norm': 9.799586296081543, 'learning_rate': 1.6000000000000003e-05, 'epoch': 2.05}
{'loss': 1.5257, 'grad_norm': 3.128535747528076, 'learning_rate': 1.7000000000000003e-05, 'epoch': 2.18}
{'loss': 1.4869, 'grad_norm': 4.127585411071777, 'learning_rate': 1.8e-05, 'epoch': 2.31}
{'loss': 1.4779, 'grad_norm': 7.798474311828613, 'learning_rate': 1.9e-05, 'epoch': 2.44}
{'loss': 1.6007, 'grad_norm': 5.479226112365723, 'learning_rate': 2e-05, 'epoch': 2.56}
{'loss': 1.5142, 'grad_norm': 3.8015360832214355, 'learning_rate': 2.1e-05, 'epoch': 2.69}
{'loss': 1.501, 'grad_norm': 6.2907023429870605, 'learning_rate': 2.2000000000000003e-05, 'epoch': 2.82}
{'loss': 1.488, 'grad_norm': 5.315500736236572, 'learning_rate': 2.3000000000000003e-05, 'epoch': 2.95}


  0%|          | 0/20 [00:00<?, ?it/s]

{'eval_loss': 1.4895938634872437, 'eval_accuracy': 0.34935897435897434, 'eval_runtime': 6.5098, 'eval_samples_per_second': 47.928, 'eval_steps_per_second': 3.072, 'epoch': 3.0}
{'train_runtime': 243.9541, 'train_samples_per_second': 15.31, 'train_steps_per_second': 0.959, 'train_loss': 1.5530352144159822, 'epoch': 3.0}


  0%|          | 0/20 [00:00<?, ?it/s]

In [91]:
label = 'gpt_sent_2'

gpt_2_accuracy = model_generation(df, label)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/471 [00:00<?, ?it/s]

{'loss': 1.6149, 'grad_norm': 5.005006790161133, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.06}
{'loss': 1.6571, 'grad_norm': 8.172957420349121, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.13}
{'loss': 1.6544, 'grad_norm': 6.283621788024902, 'learning_rate': 3e-06, 'epoch': 0.19}
{'loss': 1.5723, 'grad_norm': 11.587963104248047, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.25}
{'loss': 1.5758, 'grad_norm': 7.33373498916626, 'learning_rate': 5e-06, 'epoch': 0.32}
{'loss': 1.5732, 'grad_norm': 6.910074710845947, 'learning_rate': 6e-06, 'epoch': 0.38}
{'loss': 1.6167, 'grad_norm': 9.072874069213867, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.45}
{'loss': 1.5846, 'grad_norm': 4.08292818069458, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.51}
{'loss': 1.5722, 'grad_norm': 5.138309955596924, 'learning_rate': 9e-06, 'epoch': 0.57}
{'loss': 1.5531, 'grad_norm': 7.2225751876831055, 'learning_rate': 1e-05, 'epoch': 0.64}
{'loss': 1.5711, 'grad_norm': 4.513034

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 1.5765527486801147, 'eval_accuracy': 0.28594249201277955, 'eval_runtime': 13.4474, 'eval_samples_per_second': 46.552, 'eval_steps_per_second': 2.975, 'epoch': 1.0}
{'loss': 1.5871, 'grad_norm': 5.493659019470215, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.02}
{'loss': 1.578, 'grad_norm': 5.316946983337402, 'learning_rate': 1.7000000000000003e-05, 'epoch': 1.08}
{'loss': 1.5573, 'grad_norm': 4.193392276763916, 'learning_rate': 1.8e-05, 'epoch': 1.15}
{'loss': 1.5182, 'grad_norm': 4.210460662841797, 'learning_rate': 1.9e-05, 'epoch': 1.21}
{'loss': 1.5219, 'grad_norm': 3.9745044708251953, 'learning_rate': 2e-05, 'epoch': 1.27}
{'loss': 1.5391, 'grad_norm': 7.695709705352783, 'learning_rate': 2.1e-05, 'epoch': 1.34}
{'loss': 1.5133, 'grad_norm': 6.257134437561035, 'learning_rate': 2.2000000000000003e-05, 'epoch': 1.4}
{'loss': 1.5162, 'grad_norm': 6.2271199226379395, 'learning_rate': 2.3000000000000003e-05, 'epoch': 1.46}
{'loss': 1.5289, 'grad_norm': 5.927811622619

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 1.5792171955108643, 'eval_accuracy': 0.28753993610223644, 'eval_runtime': 12.0243, 'eval_samples_per_second': 52.061, 'eval_steps_per_second': 3.327, 'epoch': 2.0}
{'loss': 1.5838, 'grad_norm': 3.844600200653076, 'learning_rate': 3.2000000000000005e-05, 'epoch': 2.04}
{'loss': 1.5271, 'grad_norm': 5.161525249481201, 'learning_rate': 3.3e-05, 'epoch': 2.1}
{'loss': 1.5036, 'grad_norm': 4.142090797424316, 'learning_rate': 3.4000000000000007e-05, 'epoch': 2.17}
{'loss': 1.5691, 'grad_norm': 7.037516117095947, 'learning_rate': 3.5e-05, 'epoch': 2.23}
{'loss': 1.5179, 'grad_norm': 4.483217239379883, 'learning_rate': 3.6e-05, 'epoch': 2.29}
{'loss': 1.4607, 'grad_norm': 4.090171813964844, 'learning_rate': 3.7e-05, 'epoch': 2.36}
{'loss': 1.504, 'grad_norm': 6.272985458374023, 'learning_rate': 3.8e-05, 'epoch': 2.42}
{'loss': 1.5088, 'grad_norm': 5.160446643829346, 'learning_rate': 3.9000000000000006e-05, 'epoch': 2.48}
{'loss': 1.5074, 'grad_norm': 5.832645893096924, 'learning_

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 1.516116976737976, 'eval_accuracy': 0.3242811501597444, 'eval_runtime': 15.4709, 'eval_samples_per_second': 40.463, 'eval_steps_per_second': 2.586, 'epoch': 3.0}
{'train_runtime': 658.6679, 'train_samples_per_second': 11.396, 'train_steps_per_second': 0.715, 'train_loss': 1.5587587186991536, 'epoch': 3.0}


  0%|          | 0/40 [00:00<?, ?it/s]

In [92]:
print(f"Results from the Individual Labels Model                      Loss: {seeds_accuracy['eval_loss']:.2f} | Accuracy: {100*seeds_accuracy['eval_accuracy']:.2f}%")
print(f"Results from the Categories assigned by Claude Model:         Loss: {claude_accuracy['eval_loss']:.2f} | Accuracy: {100*claude_accuracy['eval_accuracy']:.2f}%")
print(f"Results from the first categories assigned by GPT Model:      Loss: {gpt_accuracy['eval_loss']:.2f} | Accuracy: {100*gpt_accuracy['eval_accuracy']:.2f}%")
print(f"Results from the second categories assigned by GPT Model:     Loss: {gpt_2_accuracy['eval_loss']:.2f} | Accuracy: {100*gpt_2_accuracy['eval_accuracy']:.2f}%")

Results from the Individual Labels Model                      Loss: 4.47 | Accuracy: 3.65%
Results from the Categories assigned by Claude Model:         Loss: 1.58 | Accuracy: 28.80%
Results from the first categories assigned by GPT Model:      Loss: 1.49 | Accuracy: 34.94%
Results from the second categories assigned by GPT Model:     Loss: 1.52 | Accuracy: 32.43%
