# BERT for Sentiment Classification

In this notebook, we perform sentiment classification using a BERT model and the `sentiment_classification` dataset.

### Prerequisites:
1. Download the `sentiment_classification` dataset from [Link].
2. Save the dataset in the following directory structure: 8_Transformer/sentiment_classification/


In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import pandas as pd

# Load pre-trained BERT tokenizer and model
model_name = (
    "nlptown/bert-base-multilingual-uncased-sentiment"  # A sentiment analysis model
)
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

data = pd.read_csv("sentiment_classification/data/test.csv")
texts = data["text"].tolist()
labels = data["sentiment"].tolist()

# Tokenize and encode the input texts
inputs = tokenizer(
    texts, padding=True, truncation=True, return_tensors="pt", max_length=128
)

# Perform predictions
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits

# Get predicted labels
predictions = torch.argmax(logits, dim=-1)

correct_predictions = 0
n_data_entries = len(texts)

# Print the predictions
for prediction, label in zip(predictions, labels):
    sentiment = ""
    if prediction.item() < 2:
        sentiment = "negative"
    elif prediction.item() > 4:
        sentiment = "positive"
    else:
        sentiment = "neutral"

    if sentiment == label:
        correct_predictions += 1

print(f"accuracy: {correct_predictions/n_data_entries*100} %")
