In [74]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, EvalPrediction
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [75]:
# Load data from CSV files
train_df = pd.read_csv('../data/train_dataset.csv')
eval_df = pd.read_csv('../data/eval_dataset.csv')
test_df = pd.read_csv('../data/test_dataset.csv')

In [76]:
# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [77]:
# Tokenize the input
train_encodings = tokenizer(list(train_df['text']), truncation=True, padding=True)
eval_encodings = tokenizer(list(eval_df['text']), truncation=True, padding=True)
test_encodings = tokenizer(list(test_df['text']), truncation=True, padding=True)

In [78]:
# Define a PyTorch dataset
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_labels = list(train_df['label'])
eval_labels = list(eval_df['label'])
test_labels = list(eval_df['label'])

train_dataset = CustomDataset(train_encodings, train_labels)
eval_dataset = CustomDataset(eval_encodings, eval_labels)
test_dataset = CustomDataset(test_encodings, test_labels)

In [79]:
# Load the model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [80]:
# Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=1,              # number of training epochs
    per_device_train_batch_size=2,   # batch size for training
    per_device_eval_batch_size=2,    # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
)

In [81]:
# Initialize the Trainer
trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset            # evaluation dataset
)

In [82]:
# Train the model
trainer.train()

Step,Training Loss
10,0.6888
20,0.713
30,0.6927
40,0.6989


TrainOutput(global_step=40, training_loss=0.6983397245407105, metrics={'train_runtime': 6.7048, 'train_samples_per_second': 11.932, 'train_steps_per_second': 5.966, 'total_flos': 575555433600.0, 'train_loss': 0.6983397245407105, 'epoch': 1.0})

In [83]:
# Evaluate the model
eval_result = trainer.evaluate()

print(f"Evaluation results: {eval_result}")

Evaluation results: {'eval_loss': 0.6620675325393677, 'eval_runtime': 0.2824, 'eval_samples_per_second': 70.816, 'eval_steps_per_second': 35.408, 'epoch': 1.0}


In [84]:
# # Make predictions on the evaluation set
test_predictions = trainer.predict(eval_dataset)

for idx, example in enumerate(eval_dataset):
    text = tokenizer.decode(example['input_ids'], skip_special_tokens=True)
    true_label = eval_dataset[idx]['labels']
    predicted_label = test_predictions.predictions[idx].argmax().item()
    predicted_sentiment = "positive" if predicted_label == 1 else "negative"

    print(f"Text: {text}")
    print(f"True Label: {'positive' if true_label == 1 else 'negative'}")
    print(f"Predicted Sentiment: {predicted_sentiment}\n")


Text: she always lies and can't be trusted.
True Label: negative
Predicted Sentiment: negative

Text: this garden is a work of art.
True Label: positive
Predicted Sentiment: positive

Text: this situation is hopeless.
True Label: negative
Predicted Sentiment: negative

Text: you always show up late.
True Label: negative
Predicted Sentiment: positive

Text: this job is a nightmare.
True Label: negative
Predicted Sentiment: negative

Text: your positive attitude is infectious.
True Label: positive
Predicted Sentiment: positive

Text: this product is a complete waste of money.
True Label: negative
Predicted Sentiment: negative

Text: i'm so tired of your constant negativity.
True Label: negative
Predicted Sentiment: negative

Text: i'm fed up with your lies.
True Label: negative
Predicted Sentiment: negative

Text: your voice is grating.
True Label: negative
Predicted Sentiment: negative

Text: the customer service here is exceptional.
True Label: positive
Predicted Sentiment: positive

T

In [85]:
# Calculate metrics
def compute_metrics(p: EvalPrediction):
    preds = p.predictions.argmax(-1)
    accuracy = accuracy_score(p.label_ids, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='binary')
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

metrics = compute_metrics(test_predictions)
print(f"Metrics: {metrics}")

Metrics: {'accuracy': 0.95, 'precision': 0.8888888888888888, 'recall': 1.0, 'f1': 0.9411764705882353}
