In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np
import evaluate

# Load the dataset
dataset = load_dataset('zeroshot/twitter-financial-news-sentiment')

model_checkpoint = 'distilbert-base-uncased' #Smaller version of the BERT LLM
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
def preprocess_function(examples):
    return tokenizer(examples['text'], truncation=True, padding=True)

tokenized_dataset = dataset.map(preprocess_function, batched=True)
num_labels = 3  # Negative, Positive, Neutral

model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

training_args = TrainingArguments( #Hyperparamaters
    output_dir='./results',
    evaluation_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
)

metric_accuracy = evaluate.load('accuracy')
metric_f1 = evaluate.load('f1')

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = metric_accuracy.compute(predictions=predictions, references=labels)
    f1 = metric_f1.compute(predictions=predictions, references=labels, average='macro')
    return {**accuracy, **f1}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation'],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.475,0.368073,0.860972,0.817622
2,0.2232,0.33023,0.879397,0.841436
3,0.1633,0.37221,0.883166,0.847572


TrainOutput(global_step=1791, training_loss=0.3414224500885927, metrics={'train_runtime': 2261.2005, 'train_samples_per_second': 12.661, 'train_steps_per_second': 0.792, 'total_flos': 604755765404802.0, 'train_loss': 0.3414224500885927, 'epoch': 3.0})

In [2]:
#Evaluate the Model
eval_results = trainer.evaluate()
print(eval_results)

predictions = trainer.predict(tokenized_dataset['validation'])
pred_labels = np.argmax(predictions.predictions, axis=-1)
true_labels = predictions.label_ids

from sklearn.metrics import classification_report

target_names = ['Negative', 'Positive', 'Neutral']
print(classification_report(true_labels, pred_labels, target_names=target_names))

{'eval_loss': 0.372209757566452, 'eval_accuracy': 0.8831658291457286, 'eval_f1': 0.8475722481149707, 'eval_runtime': 42.7179, 'eval_samples_per_second': 55.902, 'eval_steps_per_second': 0.89, 'epoch': 3.0}
              precision    recall  f1-score   support

    Negative       0.77      0.83      0.80       347
    Positive       0.84      0.82      0.83       475
     Neutral       0.93      0.92      0.92      1566

    accuracy                           0.88      2388
   macro avg       0.84      0.85      0.85      2388
weighted avg       0.88      0.88      0.88      2388

