# NLP: Sentiment Analysis
Choosing and training a model to perform sentiment analysis on catalan text.

In [3]:
import pandas as pd
import numpy as np

from datasets import Dataset, concatenate_datasets

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch

## The data

Check out `data-preprocessing.ipynb` for details about how this initial dataset was chosen, and how the subset dataset was labeled.

In [5]:
comments = pd.read_csv("comments.csv")

In [6]:
labeled = pd.read_csv("manual-labeling/comments_subset_labeled.tsv", sep="\t")

### Defined the labeled dataset

In [6]:
comments_labeled = comments.join(labeled.set_index('comment_id'), on='comment_id', rsuffix='labeled')

In [7]:
comments_labeled = comments_labeled[~comments_labeled['label'].isnull()]

In [8]:
labels = {'positive':       0,
          'neutral':        1,
          'negative':       2,
          'very-negative':  3}

In [9]:
comments_labeled['labels'] = comments_labeled['label'].apply(lambda text_label: labels[text_label])

In [10]:
comments_labeled = comments_labeled[['input_text', 'labels']]

In [11]:
comments_labeled.head()

Unnamed: 0,input_text,labels
5,[ARTICLE TITLE] Pas endavant del conveni del C...,2
9,[ARTICLE TITLE] Espot confia en el suport d’Es...,3
15,[ARTICLE TITLE] Més de 174.000 euros per a un ...,3
22,[ARTICLE TITLE] Més de 174.000 euros per a un ...,2
25,[ARTICLE TITLE] Més de 174.000 euros per a un ...,2


In [12]:
comments_labeled['labels'].value_counts()

labels
2    114
1     59
3     53
0     24
Name: count, dtype: int64

In [13]:
num_custom_labels = 4

### Define the unlabeled dataset

In [14]:
comments['labels'] = np.nan

In [15]:
len(comments)

2413

In [16]:
comments_unlabeled = comments[~comments['comment_id'].isin(labeled['comment_id'])][['input_text']]

In [17]:
len(comments_unlabeled)

2163

In [18]:
comments_unlabeled.head()

Unnamed: 0,input_text
0,[ARTICLE TITLE] SDP advoca per un espai transv...
1,[ARTICLE TITLE] SDP advoca per un espai transv...
2,[ARTICLE TITLE] Acord en cercar “consens” i “c...
3,[ARTICLE TITLE] Acord en cercar “consens” i “c...
4,[ARTICLE TITLE] Acord en cercar “consens” i “c...


## Prepare the dataset for the model

### Splitting into training and testing

In [19]:
train, val = train_test_split(
    comments_labeled,
    test_size=0.2,
    random_state=42,
    stratify=comments_labeled['labels']  # Ensures proportional class distribution
)

In [20]:
print(f"Length of training dataset: {len(train)}\nLength of validation dataset: {len(val)}")
print(f"Total length of dataset: {len(train) + len(val)}")

Length of training dataset: 200
Length of validation dataset: 50
Total length of dataset: 250


## Fine-tuning

We'll get the Bert model and Tokenizer from the transformers package.

In [21]:
mbert = 'bert-base-multilingual-cased'
tokenizer = BertTokenizer.from_pretrained(mbert)
model = BertForSequenceClassification.from_pretrained(mbert, num_labels=num_custom_labels)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Defining the datasets in the correct HuggingFace format

In [22]:
train_dataset = Dataset.from_pandas(train)
val_dataset = Dataset.from_pandas(val)

### Tokenize the text

In [23]:
def tokenize(record):
    return tokenizer(record['input_text'], padding='max_length', truncation=True, max_length=128)

In [24]:
train_dataset

Dataset({
    features: ['input_text', 'labels', '__index_level_0__'],
    num_rows: 200
})

In [25]:
train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)

Map: 100%|██████████| 200/200 [00:00<00:00, 3716.77 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 3581.94 examples/s]


### Rename the columns

### Set format for PyTorch

In [26]:
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
val_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])

In [27]:
train_dataset

Dataset({
    features: ['input_text', 'labels', '__index_level_0__', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 200
})

### Define training arguments

In [28]:
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=15,
    learning_rate=3e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=50,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    save_total_limit=2,  # Limit the total amount of checkpoints
    seed=42
)



### Define metrics

In [29]:
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {"accuracy": accuracy_score(p.label_ids, preds)}

### Initialize the trainer

In [30]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

### Training the model !

In [31]:
trainer.train()

  5%|▌         | 10/195 [00:08<02:29,  1.24it/s]

{'loss': 1.3925, 'grad_norm': 4.255208969116211, 'learning_rate': 6e-06, 'epoch': 0.77}


                                                
  7%|▋         | 13/195 [00:11<02:15,  1.34it/s]

{'eval_loss': 1.325324296951294, 'eval_accuracy': 0.46, 'eval_runtime': 0.6519, 'eval_samples_per_second': 76.701, 'eval_steps_per_second': 1.534, 'epoch': 1.0}


 10%|█         | 20/195 [00:19<02:37,  1.11it/s]

{'loss': 1.3097, 'grad_norm': 4.64552640914917, 'learning_rate': 1.2e-05, 'epoch': 1.54}


                                                
 13%|█▎        | 26/195 [00:24<02:03,  1.37it/s]

{'eval_loss': 1.236799716949463, 'eval_accuracy': 0.46, 'eval_runtime': 0.6134, 'eval_samples_per_second': 81.51, 'eval_steps_per_second': 1.63, 'epoch': 2.0}


 15%|█▌        | 30/195 [00:29<02:57,  1.08s/it]

{'loss': 1.2499, 'grad_norm': 3.954501152038574, 'learning_rate': 1.8e-05, 'epoch': 2.31}


                                                
 20%|██        | 39/195 [00:37<01:52,  1.38it/s]

{'eval_loss': 1.2695953845977783, 'eval_accuracy': 0.46, 'eval_runtime': 0.6165, 'eval_samples_per_second': 81.1, 'eval_steps_per_second': 1.622, 'epoch': 3.0}


 21%|██        | 40/195 [00:40<04:07,  1.60s/it]

{'loss': 1.1987, 'grad_norm': 6.060129165649414, 'learning_rate': 2.4e-05, 'epoch': 3.08}


 26%|██▌       | 50/195 [00:48<01:59,  1.21it/s]

{'loss': 1.1977, 'grad_norm': 5.967221260070801, 'learning_rate': 3e-05, 'epoch': 3.85}


                                                
 27%|██▋       | 52/195 [00:50<01:43,  1.38it/s]

{'eval_loss': 1.2021063566207886, 'eval_accuracy': 0.52, 'eval_runtime': 0.6187, 'eval_samples_per_second': 80.817, 'eval_steps_per_second': 1.616, 'epoch': 4.0}


 31%|███       | 60/195 [00:58<01:56,  1.15it/s]

{'loss': 1.1183, 'grad_norm': 14.111748695373535, 'learning_rate': 2.793103448275862e-05, 'epoch': 4.62}


                                                
 33%|███▎      | 65/195 [01:03<01:34,  1.38it/s]

{'eval_loss': 1.2863792181015015, 'eval_accuracy': 0.46, 'eval_runtime': 0.6146, 'eval_samples_per_second': 81.349, 'eval_steps_per_second': 1.627, 'epoch': 5.0}


 36%|███▌      | 70/195 [01:09<02:04,  1.01it/s]

{'loss': 0.896, 'grad_norm': 7.554915428161621, 'learning_rate': 2.586206896551724e-05, 'epoch': 5.38}


                                                
 40%|████      | 78/195 [01:16<01:26,  1.35it/s]

{'eval_loss': 1.2289361953735352, 'eval_accuracy': 0.42, 'eval_runtime': 0.6243, 'eval_samples_per_second': 80.094, 'eval_steps_per_second': 1.602, 'epoch': 6.0}


 41%|████      | 80/195 [01:20<02:37,  1.37s/it]

{'loss': 0.8556, 'grad_norm': 48.4938850402832, 'learning_rate': 2.3793103448275862e-05, 'epoch': 6.15}


 46%|████▌     | 90/195 [01:28<01:25,  1.23it/s]

{'loss': 0.8393, 'grad_norm': 20.422714233398438, 'learning_rate': 2.1724137931034484e-05, 'epoch': 6.92}


                                                
 47%|████▋     | 91/195 [01:29<01:14,  1.39it/s]

{'eval_loss': 1.2251347303390503, 'eval_accuracy': 0.5, 'eval_runtime': 0.616, 'eval_samples_per_second': 81.173, 'eval_steps_per_second': 1.623, 'epoch': 7.0}


 51%|█████▏    | 100/195 [01:38<01:21,  1.17it/s]

{'loss': 0.6389, 'grad_norm': 9.50019645690918, 'learning_rate': 1.9655172413793102e-05, 'epoch': 7.69}


                                                 
 53%|█████▎    | 104/195 [01:42<01:07,  1.35it/s]

{'eval_loss': 1.3250346183776855, 'eval_accuracy': 0.46, 'eval_runtime': 0.6296, 'eval_samples_per_second': 79.413, 'eval_steps_per_second': 1.588, 'epoch': 8.0}


 56%|█████▋    | 110/195 [01:49<01:20,  1.05it/s]

{'loss': 0.5726, 'grad_norm': 13.959309577941895, 'learning_rate': 1.7586206896551724e-05, 'epoch': 8.46}


                                                 
 60%|██████    | 117/195 [01:55<00:57,  1.37it/s]

{'eval_loss': 1.545756220817566, 'eval_accuracy': 0.42, 'eval_runtime': 0.6136, 'eval_samples_per_second': 81.484, 'eval_steps_per_second': 1.63, 'epoch': 9.0}


 62%|██████▏   | 120/195 [01:59<01:28,  1.19s/it]

{'loss': 0.4442, 'grad_norm': 6.3397440910339355, 'learning_rate': 1.5517241379310346e-05, 'epoch': 9.23}


 67%|██████▋   | 130/195 [02:07<00:47,  1.37it/s]

{'loss': 0.3413, 'grad_norm': 74.83346557617188, 'learning_rate': 1.3448275862068966e-05, 'epoch': 10.0}


                                                 
 67%|██████▋   | 130/195 [02:08<00:47,  1.37it/s]

{'eval_loss': 1.599244236946106, 'eval_accuracy': 0.44, 'eval_runtime': 0.6183, 'eval_samples_per_second': 80.866, 'eval_steps_per_second': 1.617, 'epoch': 10.0}


 72%|███████▏  | 140/195 [02:18<00:45,  1.20it/s]

{'loss': 0.2911, 'grad_norm': 43.455772399902344, 'learning_rate': 1.1379310344827586e-05, 'epoch': 10.77}


                                                 
 73%|███████▎  | 143/195 [02:21<00:37,  1.38it/s]

{'eval_loss': 1.5450941324234009, 'eval_accuracy': 0.46, 'eval_runtime': 0.6241, 'eval_samples_per_second': 80.114, 'eval_steps_per_second': 1.602, 'epoch': 11.0}


 77%|███████▋  | 150/195 [02:29<00:40,  1.11it/s]

{'loss': 0.3123, 'grad_norm': 2.7249178886413574, 'learning_rate': 9.310344827586207e-06, 'epoch': 11.54}


                                                 
 80%|████████  | 156/195 [02:34<00:28,  1.37it/s]

{'eval_loss': 1.6294347047805786, 'eval_accuracy': 0.54, 'eval_runtime': 0.6302, 'eval_samples_per_second': 79.337, 'eval_steps_per_second': 1.587, 'epoch': 12.0}


 82%|████████▏ | 160/195 [02:39<00:37,  1.07s/it]

{'loss': 0.2225, 'grad_norm': 5.292593955993652, 'learning_rate': 7.241379310344828e-06, 'epoch': 12.31}


                                                 
 87%|████████▋ | 169/195 [02:47<00:18,  1.38it/s]

{'eval_loss': 1.6218115091323853, 'eval_accuracy': 0.56, 'eval_runtime': 0.6296, 'eval_samples_per_second': 79.42, 'eval_steps_per_second': 1.588, 'epoch': 13.0}


 87%|████████▋ | 170/195 [02:50<00:39,  1.58s/it]

{'loss': 0.176, 'grad_norm': 2.1836647987365723, 'learning_rate': 5.172413793103449e-06, 'epoch': 13.08}


 92%|█████████▏| 180/195 [02:58<00:12,  1.21it/s]

{'loss': 0.167, 'grad_norm': 3.3400514125823975, 'learning_rate': 3.103448275862069e-06, 'epoch': 13.85}


                                                 
 93%|█████████▎| 182/195 [03:00<00:09,  1.37it/s]

{'eval_loss': 1.595895528793335, 'eval_accuracy': 0.52, 'eval_runtime': 0.6394, 'eval_samples_per_second': 78.2, 'eval_steps_per_second': 1.564, 'epoch': 14.0}


 97%|█████████▋| 190/195 [03:08<00:04,  1.13it/s]

{'loss': 0.1494, 'grad_norm': 12.252001762390137, 'learning_rate': 1.0344827586206896e-06, 'epoch': 14.62}


                                                 
100%|██████████| 195/195 [03:15<00:00,  1.36it/s]

{'eval_loss': 1.6514149904251099, 'eval_accuracy': 0.54, 'eval_runtime': 0.6584, 'eval_samples_per_second': 75.939, 'eval_steps_per_second': 1.519, 'epoch': 15.0}


100%|██████████| 195/195 [03:18<00:00,  1.02s/it]

{'train_runtime': 198.4189, 'train_samples_per_second': 15.12, 'train_steps_per_second': 0.983, 'train_loss': 0.6899081162917309, 'epoch': 15.0}





TrainOutput(global_step=195, training_loss=0.6899081162917309, metrics={'train_runtime': 198.4189, 'train_samples_per_second': 15.12, 'train_steps_per_second': 0.983, 'total_flos': 197336835072000.0, 'train_loss': 0.6899081162917309, 'epoch': 15.0})

### Evalutaing the model

In [32]:
results = trainer.evaluate(eval_dataset=val_dataset)
print("Evaluation Results:", results)

100%|██████████| 1/1 [00:00<00:00, 439.38it/s]

Evaluation Results: {'eval_loss': 1.6218115091323853, 'eval_accuracy': 0.56, 'eval_runtime': 0.6848, 'eval_samples_per_second': 73.012, 'eval_steps_per_second': 1.46, 'epoch': 15.0}





### Save the trained model

In [33]:
# trainer.save_model('./fine-tuned-mbert-sentiment')

# Pseudo-labeling on Unlabeled Data

In [34]:
len(comments_unlabeled)

2163

### Define the Dataset

In [35]:
unlabeled_dataset = Dataset.from_pandas(comments_unlabeled)

### Tokenize

In [36]:
unlabeled_dataset = unlabeled_dataset.map(tokenize, batched=True)

Map: 100%|██████████| 2163/2163 [00:00<00:00, 4114.04 examples/s]


### Define torch format

In [37]:
unlabeled_dataset.set_format('torch', columns=['input_ids', 'attention_mask'])

### Predict the labels for the unlabeled data

In [38]:
predictions = trainer.predict(unlabeled_dataset)
probs = torch.nn.functional.softmax(torch.tensor(predictions.predictions), dim=1)
max_probs, pseudo_labels = torch.max(probs, dim=1)

100%|██████████| 34/34 [00:28<00:00,  1.18it/s]


In [39]:
probs = max_probs.numpy()
pseudo_labels = pseudo_labels.numpy()

### Define confidence threshold

In [40]:
confidence_threshold = 0.9
confident_indices = np.where(probs > confidence_threshold)[0]

In [41]:
print(f"Confident pseudo-labeled samples: {len(confident_indices)}")

Confident pseudo-labeled samples: 847


### Select confident pseudo-labeled data

In [42]:
pseudo_labeled_dataset = unlabeled_dataset.select(confident_indices)
pseudo_labeled_dataset = pseudo_labeled_dataset.add_column('labels', pseudo_labels[confident_indices])

Flattening the indices: 100%|██████████| 847/847 [00:00<00:00, 26281.50 examples/s]


# Combining labeled and pseudo-labeled dataset

In [43]:
expanded_train_dataset = concatenate_datasets([train_dataset, pseudo_labeled_dataset])

In [44]:
print(f"Expanded training samples: {len(expanded_train_dataset)}")

Expanded training samples: 1047


# Fine tune on expanded dataset

In [45]:
trainer_expanded = Trainer(
    model=model,
    args=training_args,
    train_dataset=expanded_train_dataset,
    eval_dataset=val_dataset,  # Use the same validation set
    compute_metrics=compute_metrics,
)

In [46]:
trainer_expanded.train()

  1%|          | 10/990 [00:08<13:56,  1.17it/s]

{'loss': 0.1669, 'grad_norm': 3.2703919410705566, 'learning_rate': 6e-06, 'epoch': 0.15}


  2%|▏         | 20/990 [00:17<13:36,  1.19it/s]

{'loss': 0.1301, 'grad_norm': 1.9483102560043335, 'learning_rate': 1.2e-05, 'epoch': 0.3}


  3%|▎         | 30/990 [00:25<14:21,  1.11it/s]

{'loss': 0.1532, 'grad_norm': 42.932193756103516, 'learning_rate': 1.8e-05, 'epoch': 0.45}


  4%|▍         | 40/990 [00:35<15:11,  1.04it/s]

{'loss': 0.2246, 'grad_norm': 73.94305419921875, 'learning_rate': 2.4e-05, 'epoch': 0.61}


  5%|▌         | 50/990 [00:45<15:59,  1.02s/it]

{'loss': 0.2865, 'grad_norm': 27.20431137084961, 'learning_rate': 3e-05, 'epoch': 0.76}


  6%|▌         | 60/990 [00:55<16:36,  1.07s/it]

{'loss': 0.1754, 'grad_norm': 28.016773223876953, 'learning_rate': 2.968085106382979e-05, 'epoch': 0.91}


  7%|▋         | 66/990 [01:02<16:21,  1.06s/it]
  7%|▋         | 66/990 [01:03<16:21,  1.06s/it]

{'eval_loss': 2.1756980419158936, 'eval_accuracy': 0.48, 'eval_runtime': 0.8388, 'eval_samples_per_second': 59.607, 'eval_steps_per_second': 1.192, 'epoch': 1.0}


  7%|▋         | 70/990 [01:09<21:11,  1.38s/it]

{'loss': 0.197, 'grad_norm': 60.7669563293457, 'learning_rate': 2.9361702127659574e-05, 'epoch': 1.06}


  8%|▊         | 80/990 [01:20<16:30,  1.09s/it]

{'loss': 0.3878, 'grad_norm': 246.611328125, 'learning_rate': 2.9042553191489362e-05, 'epoch': 1.21}


  9%|▉         | 90/990 [01:31<15:52,  1.06s/it]

{'loss': 0.6145, 'grad_norm': 16.784528732299805, 'learning_rate': 2.872340425531915e-05, 'epoch': 1.36}


 10%|█         | 100/990 [01:42<18:01,  1.22s/it]

{'loss': 0.2867, 'grad_norm': 7.486649990081787, 'learning_rate': 2.8404255319148935e-05, 'epoch': 1.52}


 11%|█         | 110/990 [01:53<15:07,  1.03s/it]

{'loss': 0.1207, 'grad_norm': 7.7974700927734375, 'learning_rate': 2.8085106382978723e-05, 'epoch': 1.67}


 12%|█▏        | 120/990 [02:02<14:31,  1.00s/it]

{'loss': 0.2289, 'grad_norm': 31.75539207458496, 'learning_rate': 2.776595744680851e-05, 'epoch': 1.82}


 13%|█▎        | 130/990 [02:12<13:49,  1.04it/s]

{'loss': 0.4168, 'grad_norm': 3.3967432975769043, 'learning_rate': 2.74468085106383e-05, 'epoch': 1.97}


 13%|█▎        | 132/990 [02:14<11:58,  1.19it/s]
 13%|█▎        | 132/990 [02:14<11:58,  1.19it/s]

{'eval_loss': 2.8875741958618164, 'eval_accuracy': 0.42, 'eval_runtime': 0.8169, 'eval_samples_per_second': 61.208, 'eval_steps_per_second': 1.224, 'epoch': 2.0}


 14%|█▍        | 140/990 [02:24<14:48,  1.05s/it]

{'loss': 0.1007, 'grad_norm': 1.2229673862457275, 'learning_rate': 2.7127659574468088e-05, 'epoch': 2.12}


 15%|█▌        | 150/990 [02:34<13:34,  1.03it/s]

{'loss': 0.2213, 'grad_norm': 43.14710998535156, 'learning_rate': 2.6808510638297873e-05, 'epoch': 2.27}


 16%|█▌        | 160/990 [02:43<13:32,  1.02it/s]

{'loss': 0.1109, 'grad_norm': 38.432281494140625, 'learning_rate': 2.648936170212766e-05, 'epoch': 2.42}


 17%|█▋        | 170/990 [02:53<13:36,  1.00it/s]

{'loss': 0.1338, 'grad_norm': 2.5101795196533203, 'learning_rate': 2.617021276595745e-05, 'epoch': 2.58}


 18%|█▊        | 180/990 [03:03<13:32,  1.00s/it]

{'loss': 0.1097, 'grad_norm': 0.09352022409439087, 'learning_rate': 2.5851063829787234e-05, 'epoch': 2.73}


 19%|█▉        | 190/990 [03:14<13:45,  1.03s/it]

{'loss': 0.1237, 'grad_norm': 78.02490997314453, 'learning_rate': 2.5531914893617022e-05, 'epoch': 2.88}


 20%|██        | 198/990 [03:21<11:44,  1.12it/s]
 20%|██        | 198/990 [03:22<11:44,  1.12it/s]

{'eval_loss': 2.7672948837280273, 'eval_accuracy': 0.5, 'eval_runtime': 0.9245, 'eval_samples_per_second': 54.084, 'eval_steps_per_second': 1.082, 'epoch': 3.0}


 20%|██        | 200/990 [03:26<20:04,  1.52s/it]

{'loss': 0.1005, 'grad_norm': 26.30610466003418, 'learning_rate': 2.521276595744681e-05, 'epoch': 3.03}


 21%|██        | 210/990 [03:37<13:49,  1.06s/it]

{'loss': 0.007, 'grad_norm': 0.5355567932128906, 'learning_rate': 2.4893617021276595e-05, 'epoch': 3.18}


 22%|██▏       | 220/990 [03:47<13:47,  1.07s/it]

{'loss': 0.1063, 'grad_norm': 91.38518524169922, 'learning_rate': 2.4574468085106383e-05, 'epoch': 3.33}


 23%|██▎       | 230/990 [03:58<13:18,  1.05s/it]

{'loss': 0.0205, 'grad_norm': 1.3089481592178345, 'learning_rate': 2.4255319148936168e-05, 'epoch': 3.48}


 24%|██▍       | 240/990 [04:08<13:08,  1.05s/it]

{'loss': 0.0253, 'grad_norm': 0.0791650265455246, 'learning_rate': 2.3936170212765956e-05, 'epoch': 3.64}


 25%|██▌       | 250/990 [04:19<13:08,  1.06s/it]

{'loss': 0.0343, 'grad_norm': 0.030676085501909256, 'learning_rate': 2.3617021276595744e-05, 'epoch': 3.79}


 26%|██▋       | 260/990 [04:29<12:49,  1.05s/it]

{'loss': 0.1404, 'grad_norm': 23.9550724029541, 'learning_rate': 2.3297872340425533e-05, 'epoch': 3.94}


 27%|██▋       | 264/990 [04:33<11:06,  1.09it/s]
 27%|██▋       | 264/990 [04:34<11:06,  1.09it/s]

{'eval_loss': 3.764519691467285, 'eval_accuracy': 0.5, 'eval_runtime': 0.9449, 'eval_samples_per_second': 52.918, 'eval_steps_per_second': 1.058, 'epoch': 4.0}


 27%|██▋       | 270/990 [04:42<14:25,  1.20s/it]

{'loss': 0.1383, 'grad_norm': 96.81298828125, 'learning_rate': 2.297872340425532e-05, 'epoch': 4.09}


 28%|██▊       | 280/990 [04:53<12:23,  1.05s/it]

{'loss': 0.1063, 'grad_norm': 4.277616024017334, 'learning_rate': 2.265957446808511e-05, 'epoch': 4.24}


 29%|██▉       | 290/990 [05:03<12:16,  1.05s/it]

{'loss': 0.0812, 'grad_norm': 17.18572235107422, 'learning_rate': 2.2340425531914894e-05, 'epoch': 4.39}


 30%|███       | 300/990 [05:14<12:01,  1.05s/it]

{'loss': 0.1087, 'grad_norm': 82.56684875488281, 'learning_rate': 2.2021276595744682e-05, 'epoch': 4.55}


 31%|███▏      | 310/990 [05:24<11:55,  1.05s/it]

{'loss': 0.0909, 'grad_norm': 0.06764954328536987, 'learning_rate': 2.170212765957447e-05, 'epoch': 4.7}


 32%|███▏      | 320/990 [05:35<11:43,  1.05s/it]

{'loss': 0.0695, 'grad_norm': 13.999109268188477, 'learning_rate': 2.1382978723404255e-05, 'epoch': 4.85}


 33%|███▎      | 330/990 [05:45<10:02,  1.09it/s]

{'loss': 0.1486, 'grad_norm': 128.23426818847656, 'learning_rate': 2.1063829787234043e-05, 'epoch': 5.0}



 33%|███▎      | 330/990 [05:46<10:02,  1.09it/s]

{'eval_loss': 3.2418322563171387, 'eval_accuracy': 0.56, 'eval_runtime': 0.9673, 'eval_samples_per_second': 51.69, 'eval_steps_per_second': 1.034, 'epoch': 5.0}


 34%|███▍      | 340/990 [05:58<12:00,  1.11s/it]

{'loss': 0.0042, 'grad_norm': 15.355944633483887, 'learning_rate': 2.074468085106383e-05, 'epoch': 5.15}


 35%|███▌      | 350/990 [06:09<11:25,  1.07s/it]

{'loss': 0.0025, 'grad_norm': 0.05341530591249466, 'learning_rate': 2.0425531914893616e-05, 'epoch': 5.3}


 36%|███▋      | 360/990 [06:20<11:23,  1.09s/it]

{'loss': 0.0012, 'grad_norm': 0.166780024766922, 'learning_rate': 2.0106382978723404e-05, 'epoch': 5.45}


 37%|███▋      | 370/990 [06:31<11:20,  1.10s/it]

{'loss': 0.0949, 'grad_norm': 0.009628782980144024, 'learning_rate': 1.978723404255319e-05, 'epoch': 5.61}


 38%|███▊      | 380/990 [06:42<11:09,  1.10s/it]

{'loss': 0.0646, 'grad_norm': 0.06809471547603607, 'learning_rate': 1.9468085106382977e-05, 'epoch': 5.76}


 39%|███▉      | 390/990 [06:53<11:07,  1.11s/it]

{'loss': 0.0263, 'grad_norm': 0.07778344303369522, 'learning_rate': 1.914893617021277e-05, 'epoch': 5.91}


 40%|████      | 396/990 [06:59<09:36,  1.03it/s]
 40%|████      | 396/990 [07:00<09:36,  1.03it/s]

{'eval_loss': 3.492000102996826, 'eval_accuracy': 0.52, 'eval_runtime': 1.0258, 'eval_samples_per_second': 48.741, 'eval_steps_per_second': 0.975, 'epoch': 6.0}


 40%|████      | 400/990 [07:07<14:19,  1.46s/it]

{'loss': 0.1441, 'grad_norm': 0.7282078266143799, 'learning_rate': 1.8829787234042554e-05, 'epoch': 6.06}


 41%|████▏     | 410/990 [07:18<10:53,  1.13s/it]

{'loss': 0.0539, 'grad_norm': 0.022077342495322227, 'learning_rate': 1.8510638297872342e-05, 'epoch': 6.21}


 42%|████▏     | 420/990 [07:29<10:37,  1.12s/it]

{'loss': 0.088, 'grad_norm': 0.017558619379997253, 'learning_rate': 1.819148936170213e-05, 'epoch': 6.36}


 43%|████▎     | 430/990 [07:40<10:12,  1.09s/it]

{'loss': 0.0732, 'grad_norm': 0.3889275789260864, 'learning_rate': 1.7872340425531915e-05, 'epoch': 6.52}


 44%|████▍     | 440/990 [07:51<10:04,  1.10s/it]

{'loss': 0.045, 'grad_norm': 13.072084426879883, 'learning_rate': 1.7553191489361703e-05, 'epoch': 6.67}


 45%|████▌     | 450/990 [08:02<09:47,  1.09s/it]

{'loss': 0.005, 'grad_norm': 62.88982009887695, 'learning_rate': 1.723404255319149e-05, 'epoch': 6.82}


 46%|████▋     | 460/990 [08:13<09:34,  1.08s/it]

{'loss': 0.0037, 'grad_norm': 0.1680838018655777, 'learning_rate': 1.6914893617021276e-05, 'epoch': 6.97}


 47%|████▋     | 462/990 [08:15<08:18,  1.06it/s]
 47%|████▋     | 462/990 [08:16<08:18,  1.06it/s]

{'eval_loss': 3.7872955799102783, 'eval_accuracy': 0.48, 'eval_runtime': 0.9909, 'eval_samples_per_second': 50.46, 'eval_steps_per_second': 1.009, 'epoch': 7.0}


 47%|████▋     | 470/990 [08:26<10:03,  1.16s/it]

{'loss': 0.0465, 'grad_norm': 0.03368820995092392, 'learning_rate': 1.6595744680851064e-05, 'epoch': 7.12}


 48%|████▊     | 480/990 [08:37<09:11,  1.08s/it]

{'loss': 0.05, 'grad_norm': 0.007050142157822847, 'learning_rate': 1.627659574468085e-05, 'epoch': 7.27}


 49%|████▉     | 490/990 [08:48<09:06,  1.09s/it]

{'loss': 0.0368, 'grad_norm': 0.07414097338914871, 'learning_rate': 1.5957446808510637e-05, 'epoch': 7.42}


 51%|█████     | 500/990 [08:59<08:57,  1.10s/it]

{'loss': 0.0009, 'grad_norm': 0.009289557114243507, 'learning_rate': 1.5638297872340426e-05, 'epoch': 7.58}


 52%|█████▏    | 510/990 [09:10<08:51,  1.11s/it]

{'loss': 0.0007, 'grad_norm': 0.020691432058811188, 'learning_rate': 1.531914893617021e-05, 'epoch': 7.73}


 53%|█████▎    | 520/990 [09:21<08:44,  1.12s/it]

{'loss': 0.0029, 'grad_norm': 0.011680754832923412, 'learning_rate': 1.5e-05, 'epoch': 7.88}


 53%|█████▎    | 528/990 [09:30<07:21,  1.05it/s]
 53%|█████▎    | 528/990 [09:31<07:21,  1.05it/s]

{'eval_loss': 3.745751142501831, 'eval_accuracy': 0.52, 'eval_runtime': 1.0264, 'eval_samples_per_second': 48.715, 'eval_steps_per_second': 0.974, 'epoch': 8.0}


 54%|█████▎    | 530/990 [09:35<12:16,  1.60s/it]

{'loss': 0.0703, 'grad_norm': 0.010922220535576344, 'learning_rate': 1.4680851063829787e-05, 'epoch': 8.03}


 55%|█████▍    | 540/990 [09:46<08:38,  1.15s/it]

{'loss': 0.0005, 'grad_norm': 0.011236299760639668, 'learning_rate': 1.4361702127659575e-05, 'epoch': 8.18}


 56%|█████▌    | 550/990 [09:58<08:16,  1.13s/it]

{'loss': 0.0406, 'grad_norm': 0.006490299478173256, 'learning_rate': 1.4042553191489362e-05, 'epoch': 8.33}


 57%|█████▋    | 560/990 [10:09<08:10,  1.14s/it]

{'loss': 0.0358, 'grad_norm': 56.95469284057617, 'learning_rate': 1.372340425531915e-05, 'epoch': 8.48}


 58%|█████▊    | 570/990 [10:20<08:00,  1.14s/it]

{'loss': 0.0004, 'grad_norm': 0.17345213890075684, 'learning_rate': 1.3404255319148936e-05, 'epoch': 8.64}


 59%|█████▊    | 580/990 [10:32<07:49,  1.15s/it]

{'loss': 0.0005, 'grad_norm': 0.020547660067677498, 'learning_rate': 1.3085106382978724e-05, 'epoch': 8.79}


 60%|█████▉    | 590/990 [10:43<07:29,  1.12s/it]

{'loss': 0.0005, 'grad_norm': 0.004324663896113634, 'learning_rate': 1.2765957446808511e-05, 'epoch': 8.94}


 60%|██████    | 594/990 [10:47<06:23,  1.03it/s]
 60%|██████    | 594/990 [10:48<06:23,  1.03it/s]

{'eval_loss': 3.9496572017669678, 'eval_accuracy': 0.5, 'eval_runtime': 1.0039, 'eval_samples_per_second': 49.805, 'eval_steps_per_second': 0.996, 'epoch': 9.0}


 61%|██████    | 600/990 [10:57<08:15,  1.27s/it]

{'loss': 0.0017, 'grad_norm': 0.013054659590125084, 'learning_rate': 1.2446808510638298e-05, 'epoch': 9.09}


 62%|██████▏   | 610/990 [11:08<07:13,  1.14s/it]

{'loss': 0.0004, 'grad_norm': 0.005475270561873913, 'learning_rate': 1.2127659574468084e-05, 'epoch': 9.24}


 63%|██████▎   | 620/990 [11:20<06:57,  1.13s/it]

{'loss': 0.007, 'grad_norm': 0.006629506126046181, 'learning_rate': 1.1808510638297872e-05, 'epoch': 9.39}


 64%|██████▎   | 630/990 [11:31<06:50,  1.14s/it]

{'loss': 0.0005, 'grad_norm': 0.013466176576912403, 'learning_rate': 1.148936170212766e-05, 'epoch': 9.55}


 65%|██████▍   | 640/990 [11:43<06:52,  1.18s/it]

{'loss': 0.0007, 'grad_norm': 0.004535311367362738, 'learning_rate': 1.1170212765957447e-05, 'epoch': 9.7}


 66%|██████▌   | 650/990 [11:54<06:34,  1.16s/it]

{'loss': 0.0003, 'grad_norm': 0.003310335101559758, 'learning_rate': 1.0851063829787235e-05, 'epoch': 9.85}


 67%|██████▋   | 660/990 [12:05<05:26,  1.01it/s]

{'loss': 0.0004, 'grad_norm': 0.016327276825904846, 'learning_rate': 1.0531914893617022e-05, 'epoch': 10.0}



 67%|██████▋   | 660/990 [12:06<05:26,  1.01it/s]

{'eval_loss': 3.895176410675049, 'eval_accuracy': 0.5, 'eval_runtime': 1.0837, 'eval_samples_per_second': 46.137, 'eval_steps_per_second': 0.923, 'epoch': 10.0}


 68%|██████▊   | 670/990 [12:20<06:20,  1.19s/it]

{'loss': 0.0004, 'grad_norm': 0.034401945769786835, 'learning_rate': 1.0212765957446808e-05, 'epoch': 10.15}


 69%|██████▊   | 680/990 [12:31<05:59,  1.16s/it]

{'loss': 0.0004, 'grad_norm': 0.012146178632974625, 'learning_rate': 9.893617021276595e-06, 'epoch': 10.3}


 70%|██████▉   | 690/990 [12:43<05:47,  1.16s/it]

{'loss': 0.0003, 'grad_norm': 0.0049133142456412315, 'learning_rate': 9.574468085106385e-06, 'epoch': 10.45}


 71%|███████   | 700/990 [12:55<05:34,  1.15s/it]

{'loss': 0.0005, 'grad_norm': 0.09274832159280777, 'learning_rate': 9.255319148936171e-06, 'epoch': 10.61}


 72%|███████▏  | 710/990 [13:06<05:23,  1.16s/it]

{'loss': 0.0003, 'grad_norm': 0.0035903456155210733, 'learning_rate': 8.936170212765958e-06, 'epoch': 10.76}


 73%|███████▎  | 720/990 [13:18<05:19,  1.18s/it]

{'loss': 0.0004, 'grad_norm': 0.00442160852253437, 'learning_rate': 8.617021276595746e-06, 'epoch': 10.91}


 73%|███████▎  | 726/990 [13:24<04:22,  1.01it/s]
 73%|███████▎  | 726/990 [13:25<04:22,  1.01it/s]

{'eval_loss': 3.9382128715515137, 'eval_accuracy': 0.54, 'eval_runtime': 1.0848, 'eval_samples_per_second': 46.092, 'eval_steps_per_second': 0.922, 'epoch': 11.0}


 74%|███████▎  | 730/990 [13:32<06:35,  1.52s/it]

{'loss': 0.0004, 'grad_norm': 0.011494324542582035, 'learning_rate': 8.297872340425532e-06, 'epoch': 11.06}


 75%|███████▍  | 740/990 [13:44<04:52,  1.17s/it]

{'loss': 0.0002, 'grad_norm': 0.08385349810123444, 'learning_rate': 7.978723404255319e-06, 'epoch': 11.21}


 76%|███████▌  | 750/990 [13:55<04:39,  1.17s/it]

{'loss': 0.0002, 'grad_norm': 0.005422601476311684, 'learning_rate': 7.659574468085105e-06, 'epoch': 11.36}


 77%|███████▋  | 760/990 [14:07<04:29,  1.17s/it]

{'loss': 0.0003, 'grad_norm': 0.20716777443885803, 'learning_rate': 7.3404255319148934e-06, 'epoch': 11.52}


 78%|███████▊  | 770/990 [14:19<04:16,  1.17s/it]

{'loss': 0.0004, 'grad_norm': 0.012087993323802948, 'learning_rate': 7.021276595744681e-06, 'epoch': 11.67}


 79%|███████▉  | 780/990 [14:31<04:11,  1.20s/it]

{'loss': 0.0003, 'grad_norm': 0.044569697231054306, 'learning_rate': 6.702127659574468e-06, 'epoch': 11.82}


 80%|███████▉  | 790/990 [14:42<03:52,  1.16s/it]

{'loss': 0.0005, 'grad_norm': 0.002934070071205497, 'learning_rate': 6.3829787234042555e-06, 'epoch': 11.97}


 80%|████████  | 792/990 [14:44<03:19,  1.01s/it]
 80%|████████  | 792/990 [14:45<03:19,  1.01s/it]

{'eval_loss': 3.9208686351776123, 'eval_accuracy': 0.52, 'eval_runtime': 1.0977, 'eval_samples_per_second': 45.551, 'eval_steps_per_second': 0.911, 'epoch': 12.0}


 81%|████████  | 800/990 [14:57<03:58,  1.26s/it]

{'loss': 0.0006, 'grad_norm': 1.159438967704773, 'learning_rate': 6.063829787234042e-06, 'epoch': 12.12}


 82%|████████▏ | 810/990 [15:08<03:32,  1.18s/it]

{'loss': 0.0002, 'grad_norm': 0.00237897876650095, 'learning_rate': 5.74468085106383e-06, 'epoch': 12.27}


 83%|████████▎ | 820/990 [15:20<03:20,  1.18s/it]

{'loss': 0.0003, 'grad_norm': 0.0022905091755092144, 'learning_rate': 5.4255319148936176e-06, 'epoch': 12.42}


 84%|████████▍ | 830/990 [15:32<03:08,  1.18s/it]

{'loss': 0.0003, 'grad_norm': 0.00392352556809783, 'learning_rate': 5.106382978723404e-06, 'epoch': 12.58}


 85%|████████▍ | 840/990 [15:44<02:58,  1.19s/it]

{'loss': 0.0002, 'grad_norm': 0.0036475975066423416, 'learning_rate': 4.787234042553192e-06, 'epoch': 12.73}


 86%|████████▌ | 850/990 [15:56<02:44,  1.18s/it]

{'loss': 0.0003, 'grad_norm': 0.0032255761325359344, 'learning_rate': 4.468085106382979e-06, 'epoch': 12.88}


 87%|████████▋ | 858/990 [16:05<02:13,  1.01s/it]
 87%|████████▋ | 858/990 [16:06<02:13,  1.01s/it]

{'eval_loss': 3.998152494430542, 'eval_accuracy': 0.54, 'eval_runtime': 1.1073, 'eval_samples_per_second': 45.155, 'eval_steps_per_second': 0.903, 'epoch': 13.0}


 87%|████████▋ | 860/990 [16:10<03:34,  1.65s/it]

{'loss': 0.0006, 'grad_norm': 0.0022282723803073168, 'learning_rate': 4.148936170212766e-06, 'epoch': 13.03}


 88%|████████▊ | 870/990 [16:22<02:25,  1.21s/it]

{'loss': 0.0003, 'grad_norm': 0.0023255418054759502, 'learning_rate': 3.829787234042553e-06, 'epoch': 13.18}


 89%|████████▉ | 880/990 [16:34<02:09,  1.18s/it]

{'loss': 0.0003, 'grad_norm': 0.004972984082996845, 'learning_rate': 3.5106382978723404e-06, 'epoch': 13.33}


 90%|████████▉ | 890/990 [16:46<01:58,  1.19s/it]

{'loss': 0.0003, 'grad_norm': 0.002421953249722719, 'learning_rate': 3.1914893617021277e-06, 'epoch': 13.48}


 91%|█████████ | 900/990 [16:58<01:48,  1.20s/it]

{'loss': 0.0003, 'grad_norm': 0.012364249676465988, 'learning_rate': 2.872340425531915e-06, 'epoch': 13.64}


 92%|█████████▏| 910/990 [17:09<01:34,  1.19s/it]

{'loss': 0.0002, 'grad_norm': 0.004709863569587469, 'learning_rate': 2.553191489361702e-06, 'epoch': 13.79}


 93%|█████████▎| 920/990 [17:22<01:25,  1.22s/it]

{'loss': 0.0002, 'grad_norm': 0.0026636456605046988, 'learning_rate': 2.2340425531914894e-06, 'epoch': 13.94}


 93%|█████████▎| 924/990 [17:26<01:08,  1.04s/it]
 93%|█████████▎| 924/990 [17:27<01:08,  1.04s/it]

{'eval_loss': 4.021026134490967, 'eval_accuracy': 0.54, 'eval_runtime': 1.1761, 'eval_samples_per_second': 42.514, 'eval_steps_per_second': 0.85, 'epoch': 14.0}


 94%|█████████▍| 930/990 [17:36<01:23,  1.39s/it]

{'loss': 0.0003, 'grad_norm': 0.004284983035176992, 'learning_rate': 1.9148936170212763e-06, 'epoch': 14.09}


 95%|█████████▍| 940/990 [17:49<01:01,  1.23s/it]

{'loss': 0.0003, 'grad_norm': 0.0035107482690364122, 'learning_rate': 1.5957446808510639e-06, 'epoch': 14.24}


 96%|█████████▌| 950/990 [18:01<00:48,  1.22s/it]

{'loss': 0.0003, 'grad_norm': 0.002967262174934149, 'learning_rate': 1.276595744680851e-06, 'epoch': 14.39}


 97%|█████████▋| 960/990 [18:13<00:36,  1.22s/it]

{'loss': 0.0003, 'grad_norm': 0.006479703821241856, 'learning_rate': 9.574468085106382e-07, 'epoch': 14.55}


 98%|█████████▊| 970/990 [18:25<00:24,  1.22s/it]

{'loss': 0.0003, 'grad_norm': 0.0038693526294082403, 'learning_rate': 6.382978723404255e-07, 'epoch': 14.7}


 99%|█████████▉| 980/990 [18:37<00:11,  1.19s/it]

{'loss': 0.0002, 'grad_norm': 0.0035491385497152805, 'learning_rate': 3.1914893617021275e-07, 'epoch': 14.85}


100%|██████████| 990/990 [18:49<00:00,  1.01s/it]

{'loss': 0.0002, 'grad_norm': 0.002191763138398528, 'learning_rate': 0.0, 'epoch': 15.0}



100%|██████████| 990/990 [18:52<00:00,  1.01s/it]

{'eval_loss': 4.027500629425049, 'eval_accuracy': 0.54, 'eval_runtime': 0.6889, 'eval_samples_per_second': 72.579, 'eval_steps_per_second': 1.452, 'epoch': 15.0}


100%|██████████| 990/990 [18:55<00:00,  1.15s/it]

{'train_runtime': 1135.4296, 'train_samples_per_second': 13.832, 'train_steps_per_second': 0.872, 'train_loss': 0.06344544435851276, 'epoch': 15.0}





TrainOutput(global_step=990, training_loss=0.06344544435851276, metrics={'train_runtime': 1135.4296, 'train_samples_per_second': 13.832, 'train_steps_per_second': 0.872, 'total_flos': 1033058331601920.0, 'train_loss': 0.06344544435851276, 'epoch': 15.0})

In [47]:
results = trainer.evaluate(eval_dataset=val_dataset)
print("Evaluation Results:", results)

100%|██████████| 1/1 [00:00<00:00, 530.99it/s]

Evaluation Results: {'eval_loss': 3.2418322563171387, 'eval_accuracy': 0.56, 'eval_runtime': 0.7452, 'eval_samples_per_second': 67.098, 'eval_steps_per_second': 1.342, 'epoch': 15.0}





In [50]:
# trainer_expanded.save_model('./fine-tuned-mbert-sentiment-expanded')

In [1]:
model.save_pretrained("../model")
tokenizer.save_pretrained("../model")

NameError: name 'model' is not defined