In [1]:
import json
with open("/kaggle/input/stocktwitz/twits.json", 'r') as f:
    twits = json.load(f)
messages = [twit['message_body'] for twit in twits['data']]
# Since the sentiment scores are discrete, we'll scale the sentiments to 0 to 4 for use in our network
sentiments = [twit['sentiment'] + 2 for twit in twits['data']]

# Technically, a df is not needed
# import pandas as pd
# df = pd.DataFrame({"message":messages, "sentiment": sentiments})

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(messages, sentiments, test_size = 0.2)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [2]:
encoded_data_train = tokenizer(
    X_train,
    add_special_tokens=True,
    return_attention_mask=True,
    padding='max_length',
    truncation=True,
    max_length=50, #Change to 64
    return_tensors='pt'
)

encoded_data_val = tokenizer(
    X_test,
    add_special_tokens=True,
    return_attention_mask=True,
    padding='max_length',
    truncation=True,
    max_length=50, #Change to 64
    return_tensors='pt'
)
#Wait for an hour :)

In [3]:
import torch
input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(y_train)

input_ids_val = encoded_data_val['input_ids']
attention_masks_val = encoded_data_val['attention_mask']
labels_val = torch.tensor(y_test)


from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
#Tensor Dataset
dataset_train = TensorDataset(input_ids_train, 
                              attention_masks_train,
                              labels_train)

dataset_val = TensorDataset(input_ids_val, 
                            attention_masks_val,
                           labels_val)


batch_size = 16

dataloader_train = DataLoader(
    dataset_train,
    sampler=RandomSampler(dataset_train),
    batch_size=batch_size
)

dataloader_val = DataLoader(
    dataset_val,
    sampler=SequentialSampler(dataset_val),
    batch_size=32
)


In [4]:
## Download BERT Model
from transformers import BertForSequenceClassification
from torch.optim import AdamW
model = BertForSequenceClassification.from_pretrained(
                                      'bert-base-uncased', 
                                      num_labels = 5,
                                      output_attentions = False,
                                      output_hidden_states = False
                                     )

from transformers import  get_linear_schedule_with_warmup

## Optimizer
optimizer = AdamW(
    model.parameters(),
    lr = 1e-5,
    eps = 1e-8
)


## Epoch
epochs = 6

## Linear Scheduler
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps = len(dataloader_train)*epochs
)


2025-09-07 02:29:18.039046: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757212158.231413      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757212158.279671      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
import numpy as np
def evaluate(dataloader_val):
    model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in tqdm(dataloader_val):
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

from sklearn.metrics import accuracy_score,f1_score
def accuracy(preds,labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    accuracy_val = accuracy_score(labels_flat, preds_flat)
    print(f'Accuracy: {accuracy_val*100:.2f}%')

def accuracy_per_class(preds, labels):
    label_dict_inverse = {v: k for k, v in label_dict.items()}
    
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    
    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy:{len(y_preds[y_preds==label])}/{len(y_true)}\n')

def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average = 'weighted')

In [6]:
from tqdm import tqdm

#Freeze some parameters
for name, param in model.bert.named_parameters():
    param.requires_grad = False
    
#Change device to GPU to speed up traininag
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in tqdm(range(1, epochs+1)):
    model.train()
    loss_train_total = 0
    
    progress_bar = tqdm(dataloader_train, 
                        desc='Epoch {:1d}'.format(epoch), 
                        leave=False, 
                        disable=True)
    if epoch == 3:
        for name, param in model.bert.named_parameters():
            param.requires_grad = True
    
    for batch in progress_bar:
        model.zero_grad()
        batch = tuple(b.to(device) for b in batch)
        inputs = {
            'input_ids': batch[0],
            'attention_mask': batch[1],
            'labels': batch[2]
        }
        
        outputs = model(**inputs)
        loss = outputs[0]
        loss_train_total +=loss.item()
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        
        optimizer.step()
        scheduler.step()
        
        #progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})     
    
    #torch.save(model.state_dict(), f'Models/BERT_ft_Epoch{epoch}.model')
    
    print(f'\nEpoch {epoch}') #Used to be tqdm.write
    
    loss_train_avg = loss_train_total/len(dataloader_train)
    print(f'Training loss: {loss_train_avg}')
    
    val_loss, predictions, true_vals = evaluate(dataloader_val)
    val_f1 = f1_score_func(predictions, true_vals)
    print(f'Validation loss: {val_loss}')
    print(f'F1 Score (weighted): {val_f1}')
    accuracy(predictions, true_vals)

  0%|          | 0/6 [00:00<?, ?it/s]


Epoch 1
Training loss: 1.393811985946016



  0%|          | 0/9676 [00:00<?, ?it/s][A
  0%|          | 3/9676 [00:00<07:17, 22.12it/s][A
  0%|          | 6/9676 [00:00<07:18, 22.06it/s][A
  0%|          | 9/9676 [00:00<07:19, 21.99it/s][A
  0%|          | 12/9676 [00:00<07:21, 21.91it/s][A
  0%|          | 15/9676 [00:00<07:20, 21.92it/s][A
  0%|          | 18/9676 [00:00<07:21, 21.88it/s][A
  0%|          | 21/9676 [00:00<07:20, 21.92it/s][A
  0%|          | 24/9676 [00:01<07:19, 21.98it/s][A
  0%|          | 27/9676 [00:01<07:18, 21.99it/s][A
  0%|          | 30/9676 [00:01<07:18, 22.00it/s][A
  0%|          | 33/9676 [00:01<07:17, 22.04it/s][A
  0%|          | 36/9676 [00:01<07:17, 22.04it/s][A
  0%|          | 39/9676 [00:01<07:18, 21.99it/s][A
  0%|          | 42/9676 [00:01<07:18, 21.98it/s][A
  0%|          | 45/9676 [00:02<07:18, 21.94it/s][A
  0%|          | 48/9676 [00:02<07:19, 21.92it/s][A
  1%|          | 51/9676 [00:02<07:17, 22.00it/s][A
  1%|          | 54/9676 [00:02<07:16, 22.02it/s][A
  1%

Validation loss: 1.3758221730472826
F1 Score (weighted): 0.2853831548216529
Accuracy: 45.39%

Epoch 2
Training loss: 1.3758557225785804



  0%|          | 0/9676 [00:00<?, ?it/s][A
  0%|          | 3/9676 [00:00<07:11, 22.40it/s][A
  0%|          | 6/9676 [00:00<07:15, 22.21it/s][A
  0%|          | 9/9676 [00:00<07:17, 22.10it/s][A
  0%|          | 12/9676 [00:00<07:18, 22.02it/s][A
  0%|          | 15/9676 [00:00<07:19, 21.96it/s][A
  0%|          | 18/9676 [00:00<07:20, 21.95it/s][A
  0%|          | 21/9676 [00:00<07:20, 21.90it/s][A
  0%|          | 24/9676 [00:01<07:20, 21.92it/s][A
  0%|          | 27/9676 [00:01<07:18, 21.98it/s][A
  0%|          | 30/9676 [00:01<07:18, 22.02it/s][A
  0%|          | 33/9676 [00:01<07:17, 22.04it/s][A
  0%|          | 36/9676 [00:01<07:16, 22.07it/s][A
  0%|          | 39/9676 [00:01<07:17, 22.04it/s][A
  0%|          | 42/9676 [00:01<07:17, 22.00it/s][A
  0%|          | 45/9676 [00:02<07:18, 21.96it/s][A
  0%|          | 48/9676 [00:02<07:18, 21.95it/s][A
  1%|          | 51/9676 [00:02<07:18, 21.96it/s][A
  1%|          | 54/9676 [00:02<07:17, 21.99it/s][A
  1%

Validation loss: 1.365261612068084
F1 Score (weighted): 0.28983009984629715
Accuracy: 45.49%

Epoch 3
Training loss: 0.6191645822671158



  0%|          | 0/9676 [00:00<?, ?it/s][A
  0%|          | 2/9676 [00:00<09:34, 16.85it/s][A
  0%|          | 5/9676 [00:00<08:01, 20.08it/s][A
  0%|          | 8/9676 [00:00<07:40, 20.99it/s][A
  0%|          | 11/9676 [00:00<07:32, 21.34it/s][A
  0%|          | 14/9676 [00:00<07:28, 21.55it/s][A
  0%|          | 17/9676 [00:00<07:25, 21.70it/s][A
  0%|          | 20/9676 [00:00<07:22, 21.82it/s][A
  0%|          | 23/9676 [00:01<07:21, 21.89it/s][A
  0%|          | 26/9676 [00:01<07:19, 21.97it/s][A
  0%|          | 29/9676 [00:01<07:18, 22.01it/s][A
  0%|          | 32/9676 [00:01<07:18, 22.01it/s][A
  0%|          | 35/9676 [00:01<07:18, 21.99it/s][A
  0%|          | 38/9676 [00:01<07:18, 21.98it/s][A
  0%|          | 41/9676 [00:01<07:18, 22.00it/s][A
  0%|          | 44/9676 [00:02<07:17, 22.01it/s][A
  0%|          | 47/9676 [00:02<07:16, 22.06it/s][A
  1%|          | 50/9676 [00:02<07:15, 22.10it/s][A
  1%|          | 53/9676 [00:02<07:15, 22.11it/s][A
  1%

Validation loss: 0.5285645416603201
F1 Score (weighted): 0.8015222590008192
Accuracy: 80.72%

Epoch 4
Training loss: 0.5124528133843355



  0%|          | 0/9676 [00:00<?, ?it/s][A
  0%|          | 2/9676 [00:00<09:34, 16.83it/s][A
  0%|          | 5/9676 [00:00<08:00, 20.11it/s][A
  0%|          | 8/9676 [00:00<07:38, 21.09it/s][A
  0%|          | 11/9676 [00:00<07:29, 21.51it/s][A
  0%|          | 14/9676 [00:00<07:25, 21.66it/s][A
  0%|          | 17/9676 [00:00<07:23, 21.76it/s][A
  0%|          | 20/9676 [00:00<07:22, 21.81it/s][A
  0%|          | 23/9676 [00:01<07:21, 21.84it/s][A
  0%|          | 26/9676 [00:01<07:20, 21.92it/s][A
  0%|          | 29/9676 [00:01<07:19, 21.95it/s][A
  0%|          | 32/9676 [00:01<07:18, 22.01it/s][A
  0%|          | 35/9676 [00:01<07:17, 22.04it/s][A
  0%|          | 38/9676 [00:01<07:16, 22.07it/s][A
  0%|          | 41/9676 [00:01<07:17, 22.04it/s][A
  0%|          | 44/9676 [00:02<07:17, 22.00it/s][A
  0%|          | 47/9676 [00:02<07:17, 22.00it/s][A
  1%|          | 50/9676 [00:02<07:17, 22.00it/s][A
  1%|          | 53/9676 [00:02<07:16, 22.04it/s][A
  1%

Validation loss: 0.5153814432268585
F1 Score (weighted): 0.8079187649767038
Accuracy: 81.34%

Epoch 5
Training loss: 0.47385448569078414



  0%|          | 0/9676 [00:00<?, ?it/s][A
  0%|          | 2/9676 [00:00<09:32, 16.90it/s][A
  0%|          | 5/9676 [00:00<08:00, 20.11it/s][A
  0%|          | 8/9676 [00:00<07:39, 21.04it/s][A
  0%|          | 11/9676 [00:00<07:30, 21.47it/s][A
  0%|          | 14/9676 [00:00<07:25, 21.70it/s][A
  0%|          | 17/9676 [00:00<07:22, 21.83it/s][A
  0%|          | 20/9676 [00:00<07:20, 21.91it/s][A
  0%|          | 23/9676 [00:01<07:19, 21.99it/s][A
  0%|          | 26/9676 [00:01<07:18, 22.02it/s][A
  0%|          | 29/9676 [00:01<07:17, 22.06it/s][A
  0%|          | 32/9676 [00:01<07:16, 22.07it/s][A
  0%|          | 35/9676 [00:01<07:16, 22.07it/s][A
  0%|          | 38/9676 [00:01<07:16, 22.10it/s][A
  0%|          | 41/9676 [00:01<07:16, 22.09it/s][A
  0%|          | 44/9676 [00:02<07:15, 22.10it/s][A
  0%|          | 47/9676 [00:02<07:15, 22.12it/s][A
  1%|          | 50/9676 [00:02<07:15, 22.12it/s][A
  1%|          | 53/9676 [00:02<07:14, 22.12it/s][A
  1%

Validation loss: 0.49814368803446796
F1 Score (weighted): 0.8138543591945381
Accuracy: 81.84%

Epoch 6
Training loss: 0.44565397929303224



  0%|          | 0/9676 [00:00<?, ?it/s][A
  0%|          | 2/9676 [00:00<09:33, 16.86it/s][A
  0%|          | 5/9676 [00:00<08:02, 20.04it/s][A
  0%|          | 8/9676 [00:00<07:40, 20.99it/s][A
  0%|          | 11/9676 [00:00<07:30, 21.43it/s][A
  0%|          | 14/9676 [00:00<07:26, 21.65it/s][A
  0%|          | 17/9676 [00:00<07:23, 21.76it/s][A
  0%|          | 20/9676 [00:00<07:21, 21.86it/s][A
  0%|          | 23/9676 [00:01<07:20, 21.91it/s][A
  0%|          | 26/9676 [00:01<07:19, 21.94it/s][A
  0%|          | 29/9676 [00:01<07:19, 21.95it/s][A
  0%|          | 32/9676 [00:01<07:19, 21.94it/s][A
  0%|          | 35/9676 [00:01<07:19, 21.95it/s][A
  0%|          | 38/9676 [00:01<07:18, 21.97it/s][A
  0%|          | 41/9676 [00:01<07:18, 21.97it/s][A
  0%|          | 44/9676 [00:02<07:18, 21.96it/s][A
  0%|          | 47/9676 [00:02<07:18, 21.94it/s][A
  1%|          | 50/9676 [00:02<07:18, 21.97it/s][A
  1%|          | 53/9676 [00:02<07:17, 22.00it/s][A
  1%

Validation loss: 0.505106037692831
F1 Score (weighted): 0.8125869463654891
Accuracy: 81.77%





Experiment with whether undersampling will cause any differences