In [1]:
import os
from datasets import load_dataset
from transformers import AutoTokenizer

#1 ---------------------------------------------------------------------------------- 1
#1.1
imdb = load_dataset("stanfordnlp/imdb")
print(imdb)

#1.2
train_full = imdb["train"].shuffle(seed=42).select(range(1000))
train_dataset = train_full.select(range(900))
val_dataset = train_full.select(range(900, 1000))
test_dataset = imdb["test"].shuffle(seed=42).select(range(200))

#1.3
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

#1.4
def tokenize_function(example):
    return tokenizer(
        example["text"],
        truncation=True,
        max_length=tokenizer.model_max_length
    )

train_dataset_tok = train_dataset.map(tokenize_function, batched=True)
val_dataset_tok = val_dataset.map(tokenize_function, batched=True)
test_dataset_tok = test_dataset.map(tokenize_function, batched=True)

#1.5
sample = train_dataset[0]
tokens = tokenizer.convert_ids_to_tokens(
    tokenizer(sample["text"], truncation=True, max_length=tokenizer.model_max_length)["input_ids"]
)
print("Original text:", sample["text"])
print("Tokenized:", tokens)

  from .autonotebook import tqdm as notebook_tqdm


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})


Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 9770.61 examples/s]
Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 7722.47 examples/s]
Map: 100%|███████████████████████████| 200/200 [00:00<00:00, 8005.31 examples/s]

Original text: There is no relation at all between Fortier and Profiler but the fact that both are police series about violent crimes. Profiler looks crispy, Fortier looks classic. Profiler plots are quite simple. Fortier's plot are far more complicated... Fortier looks more like Prime Suspect, if we have to spot similarities... The main character is weak and weirdo, but have "clairvoyance". People like to compare, to judge, to evaluate. How about just enjoying? Funny thing too, people writing Fortier looks American but, on the other hand, arguing they prefer American series (!!!). Maybe it's the language, or the spirit, but I think this series is more English than American. By the way, the actors are really good and funny. The acting is not superficial at all...
Tokenized: ['[CLS]', 'there', 'is', 'no', 'relation', 'at', 'all', 'between', 'fort', '##ier', 'and', 'profile', '##r', 'but', 'the', 'fact', 'that', 'both', 'are', 'police', 'series', 'about', 'violent', 'crimes', '.', 'profi




In [2]:
from transformers import DataCollatorWithPadding, AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np

#2 ---------------------------------------------------------------------------------- 2
#2.1
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

#2.2
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = np.argmax(predictions, axis=1)
    accuracy = (preds == labels).mean()
    return {"accuracy": accuracy}

#2.3
id2label = {0: "neg", 1: "pos"}
label2id = {"neg": 0, "pos": 1}
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=2,
    id2label=id2label,
    label2id=label2id
)

#2.4 & #2.5
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
)

#2.6
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset_tok,
    eval_dataset=val_dataset_tok,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

#2.7
trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.411256,0.86
2,No log,0.272543,0.91
3,No log,0.248173,0.89


TrainOutput(global_step=171, training_loss=0.37566893281992414, metrics={'train_runtime': 53.9881, 'train_samples_per_second': 50.011, 'train_steps_per_second': 3.167, 'total_flos': 701435573593680.0, 'train_loss': 0.37566893281992414, 'epoch': 3.0})

In [3]:
#3 ---------------------------------------------------------------------------------- 3
#3.1
test_results = trainer.evaluate(test_dataset_tok)
print("Test results:", test_results)

#3.2
from transformers import pipeline

sentiment_pipe = pipeline(
    "sentiment-analysis",
    model=trainer.model,
    tokenizer=tokenizer,
    device=0
)

texts = [
    "Nice car bro!",
    "I hate to wake up so early!",
    "Average size, nothing special."
]

for text in texts:
    result = sentiment_pipe(text)
    print(f"Text: {text}\nResult: {result}\n")

Device set to use cuda:0


Test results: {'eval_loss': 0.3112679719924927, 'eval_accuracy': 0.86, 'eval_runtime': 1.1819, 'eval_samples_per_second': 169.225, 'eval_steps_per_second': 5.923, 'epoch': 3.0}
Text: Nice car bro!
Result: [{'label': 'pos', 'score': 0.7937133312225342}]

Text: I hate to wake up so early!
Result: [{'label': 'neg', 'score': 0.7836753726005554}]

Text: Average size, nothing special.
Result: [{'label': 'neg', 'score': 0.8390772938728333}]

