In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_id = 'distilbert-base-uncased'
import torch

device = torch.device("cpu")

model = AutoModelForSequenceClassification.from_pretrained(model_id).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_id)


In [None]:
from datasets import load_dataset
imdb = load_dataset("imdb")

In [None]:
small_train_dataset = imdb["train"].shuffle(seed=42).select([i for i in list(range(3000))])
small_test_dataset = imdb["test"].shuffle(seed=42).select([i for i in list(range(300))])

In [None]:
test_input = 'how are you? bhagya'
tokenizer.tokenize(test_input)

In [None]:
def preprocess_func(datasets):
    return tokenizer(datasets['text'], truncation=True, padding=True, max_length=128)

tokenized_train = small_train_dataset.map(preprocess_func, batched=True)
tokenized_test = small_test_dataset.map(preprocess_func, batched=True)

In [None]:
import numpy as np
from evaluate import load

def compute_metrics(eval_pred):
   load_accuracy = load("accuracy")
   load_f1 = load("f1")

   logits, labels = eval_pred
   predictions = np.argmax(logits, axis=-1)
   accuracy = load_accuracy.compute(predictions=predictions, references=labels)["accuracy"]
   f1 = load_f1.compute(predictions=predictions, references=labels)["f1"]
   return {"accuracy": accuracy, "f1": f1}


In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
from transformers import TrainingArguments, Trainer

repo_name = "fine-tune-sentiment"

training_args = TrainingArguments(
   output_dir=repo_name,
   learning_rate=2e-5,
   per_device_train_batch_size=16,
   per_device_eval_batch_size=16,
   num_train_epochs=4,
   weight_decay=0.01,
   save_strategy="epoch",
report_to=[]
)

trainer = Trainer(
   model=model,
   args=training_args,
   train_dataset=tokenized_train,
   eval_dataset=tokenized_test,
   tokenizer=tokenizer,
   compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

In [None]:
trainer.evaluate()

In [None]:
trainer.push_to_hub()


In [None]:
from transformers import pipeline
 
sentiment_model = pipeline(model="allelbhagya/fine-tune-sentiment")


In [None]:
sentiment_model(["this movie is amazing", "i hate it"])