In [1]:
import torch
import numpy as np
import random
from dotenv import load_dotenv
import os
from huggingface_hub import login

load_dotenv()

True

In [2]:
HF_TOKEN = os.getenv("HF_TOKEN")
login(HF_TOKEN)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
SEED = 42
def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.mps.manual_seed(seed)
set_seed(SEED)
print(f"Seed set to {SEED}")

Seed set to 42


In [4]:
def get_device():
    if torch.cuda.is_available():
        device = torch.device("cuda:0")
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device('cpu')
    return device
device = get_device()
print("Using device:", device)

Using device: mps


# Finetuning a Model

### 1. Use the datasets library to load the ```imdb``` dataset for sentiment classification, selecting 200 samples for training and 50 samples for testing from the respective dataset splits. Make sure there are the same amount of positive as negative samples in each split. Print the label counts for each split as well as the first sample.

In [5]:
from datasets import load_dataset, concatenate_datasets

In [6]:
dataset = load_dataset("stanfordnlp/imdb")

In [7]:
def select_samples(split, num_samples):
    pos_samples = dataset[split].filter(lambda x: x['label'] == 1).select(range(num_samples // 2))
    neg_samples = dataset[split].filter(lambda x: x['label'] == 0).select(range(num_samples // 2))
    return concatenate_datasets([pos_samples, neg_samples]).shuffle(seed = SEED)

In [8]:
train_dataset = select_samples('train', 200)
test_dataset = select_samples('test', 50)

In [9]:
from collections import Counter

In [10]:
train_labels = [example['label'] for example in train_dataset]
test_labels = [example['label'] for example in test_dataset]

In [11]:
print("Training Set Label Counts:", Counter(train_labels))
print("Testing Set Label Counts:", Counter(test_labels))

Training Set Label Counts: Counter({0: 100, 1: 100})
Testing Set Label Counts: Counter({1: 25, 0: 25})


In [12]:
print("First Training Sample:")
print("Text:", train_dataset[0]['text'])
print("Label:", train_dataset[0]['label'])

First Training Sample:
Text: Ned Kelly (Ledger), the infamous Australian outlaw and legend. Sort of like Robin Hood, with a mix of Billy the Kid, Australians love the legend of how he stood up against the English aristocratic oppression, and united the lower classes to change Australia forever. The fact that the lower classes of the time were around 70% immigrant criminals seems to be casually skimmed around by this film. Indeed, quite a few so called `facts' in this film are, on reflection, a tad dubious.<br /><br />I suppose the suspicions should have been aroused when, in the opening credits, it was claimed that this film is based upon the book, `Our Sunshine'. If ever a romanticized version of truth could be seen in a name for a book, there it was. This wasn't going to be a historical epic, but just an adaptation of one of many dubious legends of Ned Kelly, albeit a harsh and sporadically brutal version.<br /><br />Unfortunately, Ned Kelly is nothing more than an overblown Hallmark

In [13]:
print("First Test Sample:")
print("Text:", test_dataset[0]['text'])
print("Label:", test_dataset[0]['label'])

First Test Sample:
Text: Overall, a well done movie. There were the parts that made me wince, and there were the parts that I threw my hands up at, but I came away with something more than I gone in with.<br /><br />I think the movie suffers from some serious excess ambition. Without spoiling it, let me say that the obvious references to the trial by fire in Ramayana, is way beyond what this movie stands for. The Ramayana is an epic. Not a 200 page book that puts down women in India. The movie is about two girls married into a very distinctive Indian family. While the basic tenets of the "unwritten laws of the family tradition" seem to be that of conservative India, let me assure my reader that I (having lived in Delhi for 12 years) found entire parts that just did not ring those bells. I mean some things and some actions are very true, but some other stuff is just way off the mark. Especially today.<br /><br />Delhi is complicated. India is complicated. The director tries to simplify 

### 2. Load the tokenizer for the ```distilbert-base-uncased``` model and tokenize the data.

In [14]:
from transformers import AutoTokenizer

In [15]:
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")

In [16]:
def tokenize_data(example):
    return tokenizer(example['text'], padding='max_length', truncation=True)

In [17]:
tokenized_train = train_dataset.map(tokenize_data, batched=True)
tokenized_test = test_dataset.map(tokenize_data, batched=True)

### 3. Load the ```distilbert-base-uncased``` model on the GPU and adapt it for binary classification by adding a classification head. Print the model configuration.

In [18]:
from transformers import AutoModelForSequenceClassification

In [19]:
model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased", num_labels=2).to(device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
print("Model Configuration:")
print(model.config)

Model Configuration:
DistilBertConfig {
  "_attn_implementation_autoset": true,
  "_name_or_path": "distilbert/distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.46.1",
  "vocab_size": 30522
}



### 4. Use the ```Trainer``` API from Hugging Face for training. Configure the Trainer with:
- Training arguments like ```num_train_epochs``` , ```per_device_train_batch_size```, and ```per_device_eval_batch_size```. Choose appropriate values.
- ```evaluation_strategy``` set to "```epoch```" to evaluate the model at the end of each epoch.

In [21]:
from transformers import TrainingArguments, Trainer

Using ```load_best_model_at_end=True``` so that setting a higher value for ```num_train_epochs``` does not adversely affect the final model performance, since the best checkpoint from all epochs will be restored at the end of training.

In [22]:
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=50,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    eval_strategy='epoch',
    save_strategy='epoch',
    logging_dir='./logs',
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model='accuracy'
)

In [23]:
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

### 5. Define a simple metric (accuracy) to monitor performance during training.

In [24]:
import numpy as np
import evaluate

metric = evaluate.load('accuracy')

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

### 6. Train the model with the Trainer. Print the training and validation loss as well as the validation accuracy after each epoch.

In [25]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

trainer.train()

  trainer = Trainer(


  0%|          | 0/350 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.6844596266746521, 'eval_accuracy': 0.5, 'eval_runtime': 0.5982, 'eval_samples_per_second': 83.583, 'eval_steps_per_second': 3.343, 'epoch': 1.0}
{'loss': 0.6789, 'grad_norm': 0.9121620059013367, 'learning_rate': 4.8571428571428576e-05, 'epoch': 1.43}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.7627889513969421, 'eval_accuracy': 0.5, 'eval_runtime': 0.6042, 'eval_samples_per_second': 82.756, 'eval_steps_per_second': 3.31, 'epoch': 2.0}
{'loss': 0.6011, 'grad_norm': 3.2625017166137695, 'learning_rate': 4.714285714285714e-05, 'epoch': 2.86}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.6330229043960571, 'eval_accuracy': 0.62, 'eval_runtime': 0.64, 'eval_samples_per_second': 78.119, 'eval_steps_per_second': 3.125, 'epoch': 3.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.7780811786651611, 'eval_accuracy': 0.58, 'eval_runtime': 0.6213, 'eval_samples_per_second': 80.482, 'eval_steps_per_second': 3.219, 'epoch': 4.0}
{'loss': 0.3463, 'grad_norm': 5.981366157531738, 'learning_rate': 4.5714285714285716e-05, 'epoch': 4.29}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8604112267494202, 'eval_accuracy': 0.62, 'eval_runtime': 0.5923, 'eval_samples_per_second': 84.415, 'eval_steps_per_second': 3.377, 'epoch': 5.0}
{'loss': 0.1205, 'grad_norm': 4.945954322814941, 'learning_rate': 4.428571428571428e-05, 'epoch': 5.71}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1406810283660889, 'eval_accuracy': 0.56, 'eval_runtime': 0.5463, 'eval_samples_per_second': 91.524, 'eval_steps_per_second': 3.661, 'epoch': 6.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.6705536842346191, 'eval_accuracy': 0.54, 'eval_runtime': 0.5993, 'eval_samples_per_second': 83.428, 'eval_steps_per_second': 3.337, 'epoch': 7.0}
{'loss': 0.0165, 'grad_norm': 0.15385520458221436, 'learning_rate': 4.2857142857142856e-05, 'epoch': 7.14}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.8818931579589844, 'eval_accuracy': 0.56, 'eval_runtime': 0.6282, 'eval_samples_per_second': 79.592, 'eval_steps_per_second': 3.184, 'epoch': 8.0}
{'loss': 0.0045, 'grad_norm': 0.04631126672029495, 'learning_rate': 4.1428571428571437e-05, 'epoch': 8.57}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.0453391075134277, 'eval_accuracy': 0.56, 'eval_runtime': 0.5735, 'eval_samples_per_second': 87.189, 'eval_steps_per_second': 3.488, 'epoch': 9.0}
{'loss': 0.0023, 'grad_norm': 0.02741876244544983, 'learning_rate': 4e-05, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.164607286453247, 'eval_accuracy': 0.56, 'eval_runtime': 0.6169, 'eval_samples_per_second': 81.045, 'eval_steps_per_second': 3.242, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.2466864585876465, 'eval_accuracy': 0.56, 'eval_runtime': 0.5939, 'eval_samples_per_second': 84.191, 'eval_steps_per_second': 3.368, 'epoch': 11.0}
{'loss': 0.0015, 'grad_norm': 0.01945476606488228, 'learning_rate': 3.857142857142858e-05, 'epoch': 11.43}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.3045849800109863, 'eval_accuracy': 0.56, 'eval_runtime': 0.5773, 'eval_samples_per_second': 86.604, 'eval_steps_per_second': 3.464, 'epoch': 12.0}
{'loss': 0.0012, 'grad_norm': 0.01683773286640644, 'learning_rate': 3.7142857142857143e-05, 'epoch': 12.86}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.3491132259368896, 'eval_accuracy': 0.56, 'eval_runtime': 0.5738, 'eval_samples_per_second': 87.135, 'eval_steps_per_second': 3.485, 'epoch': 13.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.3864901065826416, 'eval_accuracy': 0.56, 'eval_runtime': 0.6287, 'eval_samples_per_second': 79.523, 'eval_steps_per_second': 3.181, 'epoch': 14.0}
{'loss': 0.0009, 'grad_norm': 0.013980508781969547, 'learning_rate': 3.571428571428572e-05, 'epoch': 14.29}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.414905309677124, 'eval_accuracy': 0.56, 'eval_runtime': 0.6092, 'eval_samples_per_second': 82.077, 'eval_steps_per_second': 3.283, 'epoch': 15.0}
{'loss': 0.0008, 'grad_norm': 0.012998536229133606, 'learning_rate': 3.428571428571429e-05, 'epoch': 15.71}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.4414703845977783, 'eval_accuracy': 0.56, 'eval_runtime': 0.5664, 'eval_samples_per_second': 88.275, 'eval_steps_per_second': 3.531, 'epoch': 16.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.4653592109680176, 'eval_accuracy': 0.56, 'eval_runtime': 0.5832, 'eval_samples_per_second': 85.729, 'eval_steps_per_second': 3.429, 'epoch': 17.0}
{'loss': 0.0007, 'grad_norm': 0.012086618691682816, 'learning_rate': 3.285714285714286e-05, 'epoch': 17.14}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.49015736579895, 'eval_accuracy': 0.56, 'eval_runtime': 0.5596, 'eval_samples_per_second': 89.35, 'eval_steps_per_second': 3.574, 'epoch': 18.0}
{'loss': 0.0007, 'grad_norm': 0.01101008616387844, 'learning_rate': 3.142857142857143e-05, 'epoch': 18.57}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.5106382369995117, 'eval_accuracy': 0.56, 'eval_runtime': 0.5899, 'eval_samples_per_second': 84.753, 'eval_steps_per_second': 3.39, 'epoch': 19.0}
{'loss': 0.0006, 'grad_norm': 0.009391388855874538, 'learning_rate': 3e-05, 'epoch': 20.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.5289392471313477, 'eval_accuracy': 0.56, 'eval_runtime': 0.5952, 'eval_samples_per_second': 84.009, 'eval_steps_per_second': 3.36, 'epoch': 20.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.546236038208008, 'eval_accuracy': 0.56, 'eval_runtime': 0.5875, 'eval_samples_per_second': 85.101, 'eval_steps_per_second': 3.404, 'epoch': 21.0}
{'loss': 0.0005, 'grad_norm': 0.008417838253080845, 'learning_rate': 2.857142857142857e-05, 'epoch': 21.43}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.563535213470459, 'eval_accuracy': 0.56, 'eval_runtime': 0.6027, 'eval_samples_per_second': 82.96, 'eval_steps_per_second': 3.318, 'epoch': 22.0}
{'loss': 0.0005, 'grad_norm': 0.00901388842612505, 'learning_rate': 2.714285714285714e-05, 'epoch': 22.86}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.5789284706115723, 'eval_accuracy': 0.56, 'eval_runtime': 0.6517, 'eval_samples_per_second': 76.719, 'eval_steps_per_second': 3.069, 'epoch': 23.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.593454599380493, 'eval_accuracy': 0.56, 'eval_runtime': 0.6223, 'eval_samples_per_second': 80.345, 'eval_steps_per_second': 3.214, 'epoch': 24.0}
{'loss': 0.0005, 'grad_norm': 0.008877181448042393, 'learning_rate': 2.5714285714285714e-05, 'epoch': 24.29}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.607285976409912, 'eval_accuracy': 0.56, 'eval_runtime': 0.6383, 'eval_samples_per_second': 78.339, 'eval_steps_per_second': 3.134, 'epoch': 25.0}
{'loss': 0.0004, 'grad_norm': 0.008311874233186245, 'learning_rate': 2.4285714285714288e-05, 'epoch': 25.71}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.6209354400634766, 'eval_accuracy': 0.58, 'eval_runtime': 0.6023, 'eval_samples_per_second': 83.019, 'eval_steps_per_second': 3.321, 'epoch': 26.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.633516311645508, 'eval_accuracy': 0.58, 'eval_runtime': 0.6628, 'eval_samples_per_second': 75.436, 'eval_steps_per_second': 3.017, 'epoch': 27.0}
{'loss': 0.0004, 'grad_norm': 0.007390887476503849, 'learning_rate': 2.2857142857142858e-05, 'epoch': 27.14}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.645871162414551, 'eval_accuracy': 0.58, 'eval_runtime': 0.67, 'eval_samples_per_second': 74.627, 'eval_steps_per_second': 2.985, 'epoch': 28.0}
{'loss': 0.0004, 'grad_norm': 0.006620308384299278, 'learning_rate': 2.1428571428571428e-05, 'epoch': 28.57}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.6575894355773926, 'eval_accuracy': 0.58, 'eval_runtime': 0.68, 'eval_samples_per_second': 73.527, 'eval_steps_per_second': 2.941, 'epoch': 29.0}
{'loss': 0.0004, 'grad_norm': 0.00783505942672491, 'learning_rate': 2e-05, 'epoch': 30.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.668602705001831, 'eval_accuracy': 0.58, 'eval_runtime': 0.9354, 'eval_samples_per_second': 53.451, 'eval_steps_per_second': 2.138, 'epoch': 30.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.6793129444122314, 'eval_accuracy': 0.58, 'eval_runtime': 0.5834, 'eval_samples_per_second': 85.698, 'eval_steps_per_second': 3.428, 'epoch': 31.0}
{'loss': 0.0004, 'grad_norm': 0.006035269238054752, 'learning_rate': 1.8571428571428572e-05, 'epoch': 31.43}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.68874454498291, 'eval_accuracy': 0.58, 'eval_runtime': 0.8054, 'eval_samples_per_second': 62.082, 'eval_steps_per_second': 2.483, 'epoch': 32.0}
{'loss': 0.0003, 'grad_norm': 0.005285230930894613, 'learning_rate': 1.7142857142857145e-05, 'epoch': 32.86}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.697890520095825, 'eval_accuracy': 0.58, 'eval_runtime': 0.7145, 'eval_samples_per_second': 69.981, 'eval_steps_per_second': 2.799, 'epoch': 33.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.7059361934661865, 'eval_accuracy': 0.58, 'eval_runtime': 0.7032, 'eval_samples_per_second': 71.099, 'eval_steps_per_second': 2.844, 'epoch': 34.0}
{'loss': 0.0003, 'grad_norm': 0.005757856648415327, 'learning_rate': 1.5714285714285715e-05, 'epoch': 34.29}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.7132089138031006, 'eval_accuracy': 0.58, 'eval_runtime': 0.6905, 'eval_samples_per_second': 72.415, 'eval_steps_per_second': 2.897, 'epoch': 35.0}
{'loss': 0.0003, 'grad_norm': 0.005824069958180189, 'learning_rate': 1.4285714285714285e-05, 'epoch': 35.71}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.7182934284210205, 'eval_accuracy': 0.58, 'eval_runtime': 0.6573, 'eval_samples_per_second': 76.074, 'eval_steps_per_second': 3.043, 'epoch': 36.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.7236785888671875, 'eval_accuracy': 0.58, 'eval_runtime': 0.6938, 'eval_samples_per_second': 72.063, 'eval_steps_per_second': 2.883, 'epoch': 37.0}
{'loss': 0.0003, 'grad_norm': 0.005626563914120197, 'learning_rate': 1.2857142857142857e-05, 'epoch': 37.14}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.7290947437286377, 'eval_accuracy': 0.58, 'eval_runtime': 0.6983, 'eval_samples_per_second': 71.601, 'eval_steps_per_second': 2.864, 'epoch': 38.0}
{'loss': 0.0003, 'grad_norm': 0.005388506222516298, 'learning_rate': 1.1428571428571429e-05, 'epoch': 38.57}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.7362172603607178, 'eval_accuracy': 0.58, 'eval_runtime': 0.6526, 'eval_samples_per_second': 76.616, 'eval_steps_per_second': 3.065, 'epoch': 39.0}
{'loss': 0.0003, 'grad_norm': 0.005997044965624809, 'learning_rate': 1e-05, 'epoch': 40.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.7429163455963135, 'eval_accuracy': 0.58, 'eval_runtime': 0.6301, 'eval_samples_per_second': 79.355, 'eval_steps_per_second': 3.174, 'epoch': 40.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.749074697494507, 'eval_accuracy': 0.58, 'eval_runtime': 0.6427, 'eval_samples_per_second': 77.797, 'eval_steps_per_second': 3.112, 'epoch': 41.0}
{'loss': 0.0003, 'grad_norm': 0.005078851245343685, 'learning_rate': 8.571428571428573e-06, 'epoch': 41.43}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.7541635036468506, 'eval_accuracy': 0.58, 'eval_runtime': 0.5897, 'eval_samples_per_second': 84.791, 'eval_steps_per_second': 3.392, 'epoch': 42.0}
{'loss': 0.0003, 'grad_norm': 0.0046392218209803104, 'learning_rate': 7.142857142857143e-06, 'epoch': 42.86}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.758768320083618, 'eval_accuracy': 0.58, 'eval_runtime': 0.6407, 'eval_samples_per_second': 78.043, 'eval_steps_per_second': 3.122, 'epoch': 43.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.762705087661743, 'eval_accuracy': 0.58, 'eval_runtime': 0.6695, 'eval_samples_per_second': 74.684, 'eval_steps_per_second': 2.987, 'epoch': 44.0}
{'loss': 0.0003, 'grad_norm': 0.004926219116896391, 'learning_rate': 5.7142857142857145e-06, 'epoch': 44.29}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.7657926082611084, 'eval_accuracy': 0.58, 'eval_runtime': 0.8487, 'eval_samples_per_second': 58.915, 'eval_steps_per_second': 2.357, 'epoch': 45.0}
{'loss': 0.0003, 'grad_norm': 0.005157209932804108, 'learning_rate': 4.285714285714286e-06, 'epoch': 45.71}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.768049955368042, 'eval_accuracy': 0.58, 'eval_runtime': 0.601, 'eval_samples_per_second': 83.199, 'eval_steps_per_second': 3.328, 'epoch': 46.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.7698822021484375, 'eval_accuracy': 0.58, 'eval_runtime': 0.6354, 'eval_samples_per_second': 78.686, 'eval_steps_per_second': 3.147, 'epoch': 47.0}
{'loss': 0.0003, 'grad_norm': 0.004976932425051928, 'learning_rate': 2.8571428571428573e-06, 'epoch': 47.14}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.7712526321411133, 'eval_accuracy': 0.58, 'eval_runtime': 0.5854, 'eval_samples_per_second': 85.418, 'eval_steps_per_second': 3.417, 'epoch': 48.0}
{'loss': 0.0003, 'grad_norm': 0.004660893231630325, 'learning_rate': 1.4285714285714286e-06, 'epoch': 48.57}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.772052049636841, 'eval_accuracy': 0.58, 'eval_runtime': 0.5665, 'eval_samples_per_second': 88.261, 'eval_steps_per_second': 3.53, 'epoch': 49.0}
{'loss': 0.0003, 'grad_norm': 0.0049386415630578995, 'learning_rate': 0.0, 'epoch': 50.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.772334098815918, 'eval_accuracy': 0.58, 'eval_runtime': 0.7107, 'eval_samples_per_second': 70.356, 'eval_steps_per_second': 2.814, 'epoch': 50.0}
{'train_runtime': 438.5294, 'train_samples_per_second': 22.803, 'train_steps_per_second': 0.798, 'train_loss': 0.05095987924840301, 'epoch': 50.0}


TrainOutput(global_step=350, training_loss=0.05095987924840301, metrics={'train_runtime': 438.5294, 'train_samples_per_second': 22.803, 'train_steps_per_second': 0.798, 'total_flos': 1324673986560000.0, 'train_loss': 0.05095987924840301, 'epoch': 50.0})

In [26]:
results = trainer.evaluate()
print(results)

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.6330229043960571, 'eval_accuracy': 0.62, 'eval_runtime': 0.6544, 'eval_samples_per_second': 76.405, 'eval_steps_per_second': 3.056, 'epoch': 50.0}


In [29]:
print("Accuracy:", results['eval_accuracy'])

Accuracy: 0.62


### 7. Test the fine-tuned model on a few new sentences (not in the training/validation set) and observe its predictions. Print both the input and the predicted sentiment.

In [27]:
new_sentences = [
    "This movie was fantastic! I loved every minute of it.",
    "What a waste of time. The plot was dull and uninteresting.",
    "An average film with some good moments."
]

new_inputs = tokenizer(new_sentences, padding=True, truncation=True, return_tensors='pt').to(device)

with torch.no_grad():
    outputs = model(**new_inputs)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)

label_map = {0: 'Negative', 1: 'Positive'}

for sentence, pred in zip(new_sentences, predictions):
    print(f"\nSentence: {sentence}")
    print(f"Predicted Sentiment: {label_map[pred.item()]}")



Sentence: This movie was fantastic! I loved every minute of it.
Predicted Sentiment: Positive

Sentence: What a waste of time. The plot was dull and uninteresting.
Predicted Sentiment: Negative

Sentence: An average film with some good moments.
Predicted Sentiment: Negative


### ```Additionally checking the accuracy by training the model for 3 epochs on the entire dataset```

In [28]:
dataset_full = load_dataset("stanfordnlp/imdb")
full_train_dataset = dataset_full['train']
full_test_dataset = dataset_full['test']

full_train_dataset_tokenized = full_train_dataset.map(tokenize_data, batched=True)
full_test_dataset_tokenized = full_test_dataset.map(tokenize_data, batched=True)

model2 = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased", num_labels=2).to(device)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    eval_strategy='epoch',
    save_strategy='epoch',
    logging_dir='./logs',
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model='accuracy'
)


trainer = Trainer(
    model=model2,
    args=training_args,
    train_dataset=full_train_dataset_tokenized,
    eval_dataset=full_test_dataset_tokenized,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

trainer.train()


Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


  0%|          | 0/2346 [00:00<?, ?it/s]

{'loss': 0.6962, 'grad_norm': 0.581632673740387, 'learning_rate': 4.9786871270247234e-05, 'epoch': 0.01}
{'loss': 0.6575, 'grad_norm': 2.4042141437530518, 'learning_rate': 4.957374254049446e-05, 'epoch': 0.03}
{'loss': 0.5774, 'grad_norm': 8.75777530670166, 'learning_rate': 4.936061381074169e-05, 'epoch': 0.04}
{'loss': 0.3971, 'grad_norm': 7.970410346984863, 'learning_rate': 4.914748508098892e-05, 'epoch': 0.05}
{'loss': 0.3036, 'grad_norm': 7.598392009735107, 'learning_rate': 4.893435635123615e-05, 'epoch': 0.06}
{'loss': 0.3029, 'grad_norm': 17.06102752685547, 'learning_rate': 4.872122762148338e-05, 'epoch': 0.08}
{'loss': 0.3658, 'grad_norm': 5.587943077087402, 'learning_rate': 4.850809889173061e-05, 'epoch': 0.09}
{'loss': 0.309, 'grad_norm': 4.550342559814453, 'learning_rate': 4.8294970161977835e-05, 'epoch': 0.1}
{'loss': 0.31, 'grad_norm': 2.641946315765381, 'learning_rate': 4.8081841432225067e-05, 'epoch': 0.12}
{'loss': 0.2944, 'grad_norm': 7.626326560974121, 'learning_rate':

  0%|          | 0/782 [00:00<?, ?it/s]

{'eval_loss': 0.20838667452335358, 'eval_accuracy': 0.9176, 'eval_runtime': 294.0916, 'eval_samples_per_second': 85.008, 'eval_steps_per_second': 2.659, 'epoch': 1.0}
{'loss': 0.1295, 'grad_norm': 4.091047763824463, 'learning_rate': 3.316283034953112e-05, 'epoch': 1.01}
{'loss': 0.1054, 'grad_norm': 5.312215805053711, 'learning_rate': 3.294970161977835e-05, 'epoch': 1.02}
{'loss': 0.1301, 'grad_norm': 3.2413785457611084, 'learning_rate': 3.273657289002558e-05, 'epoch': 1.04}
{'loss': 0.1984, 'grad_norm': 6.837656497955322, 'learning_rate': 3.252344416027281e-05, 'epoch': 1.05}
{'loss': 0.1308, 'grad_norm': 4.515942573547363, 'learning_rate': 3.2310315430520036e-05, 'epoch': 1.06}
{'loss': 0.1586, 'grad_norm': 2.6825687885284424, 'learning_rate': 3.209718670076726e-05, 'epoch': 1.07}
{'loss': 0.1022, 'grad_norm': 3.8952393531799316, 'learning_rate': 3.188405797101449e-05, 'epoch': 1.09}
{'loss': 0.1503, 'grad_norm': 2.707221508026123, 'learning_rate': 3.1670929241261724e-05, 'epoch': 1.

  0%|          | 0/782 [00:00<?, ?it/s]

{'eval_loss': 0.2059822827577591, 'eval_accuracy': 0.93228, 'eval_runtime': 301.3247, 'eval_samples_per_second': 82.967, 'eval_steps_per_second': 2.595, 'epoch': 2.0}
{'loss': 0.1411, 'grad_norm': 5.272259712219238, 'learning_rate': 1.6538789428815005e-05, 'epoch': 2.01}
{'loss': 0.0388, 'grad_norm': 3.0963997840881348, 'learning_rate': 1.6325660699062233e-05, 'epoch': 2.02}
{'loss': 0.0221, 'grad_norm': 0.10902952402830124, 'learning_rate': 1.6112531969309465e-05, 'epoch': 2.03}
{'loss': 0.031, 'grad_norm': 6.9734673500061035, 'learning_rate': 1.5899403239556693e-05, 'epoch': 2.05}
{'loss': 0.0107, 'grad_norm': 0.05837460979819298, 'learning_rate': 1.568627450980392e-05, 'epoch': 2.06}
{'loss': 0.0357, 'grad_norm': 6.696451187133789, 'learning_rate': 1.5473145780051153e-05, 'epoch': 2.07}
{'loss': 0.1112, 'grad_norm': 1.608903408050537, 'learning_rate': 1.526001705029838e-05, 'epoch': 2.08}
{'loss': 0.0524, 'grad_norm': 2.1577959060668945, 'learning_rate': 1.504688832054561e-05, 'epoc

  0%|          | 0/782 [00:00<?, ?it/s]

{'eval_loss': 0.2704622447490692, 'eval_accuracy': 0.93288, 'eval_runtime': 296.5088, 'eval_samples_per_second': 84.315, 'eval_steps_per_second': 2.637, 'epoch': 3.0}
{'train_runtime': 3385.8979, 'train_samples_per_second': 22.151, 'train_steps_per_second': 0.693, 'train_loss': 0.14583309073531517, 'epoch': 3.0}


TrainOutput(global_step=2346, training_loss=0.14583309073531517, metrics={'train_runtime': 3385.8979, 'train_samples_per_second': 22.151, 'train_steps_per_second': 0.693, 'total_flos': 9935054899200000.0, 'train_loss': 0.14583309073531517, 'epoch': 3.0})

In [30]:
results = trainer.evaluate()
print(results)

  0%|          | 0/782 [00:00<?, ?it/s]

{'eval_loss': 0.2704622447490692, 'eval_accuracy': 0.93288, 'eval_runtime': 285.9694, 'eval_samples_per_second': 87.422, 'eval_steps_per_second': 2.735, 'epoch': 3.0}


In [31]:
print("Accuracy:", results['eval_accuracy'])

Accuracy: 0.93288
