### Read files, merge dataframe, and split train/test:

In [1]:
import pandas as pd
import json
from pathlib import Path
from datetime import datetime
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

In [2]:
### training data
iden_df = pd.read_csv("../data/kaggle/data_identification.csv",
                         header=0)
emotion_df = pd.read_csv("../data/kaggle/emotion.csv",
                         header=0)

In [3]:
print(len(iden_df), len(emotion_df))

1867535 1455563


In [4]:
# Load the tweets raw data file
with open("../data/kaggle/tweets_DM.json", "r") as file:
    tweets_data = [json.loads(line) for line in file]

len(tweets_data)

1867535

In [5]:
# Extract relevant information from the tweets data
tweets_info = []
for tweet in tweets_data:
    tweet_id = tweet["_source"]["tweet"]["tweet_id"]
    hashtags = tweet["_source"]["tweet"]["hashtags"]
    text = tweet["_source"]["tweet"]["text"]
    tweets_info.append({"tweet_id": tweet_id, "hashtags": hashtags, "text": text})

# Create a dataframe from the tweets information
tweets_df = pd.DataFrame(tweets_info)


In [6]:
tweets_df

Unnamed: 0,tweet_id,hashtags,text
0,0x376b20,[Snapchat],"People who post ""add me on #Snapchat"" must be ..."
1,0x2d5350,"[freepress, TrumpLegacy, CNN]","@brianklaas As we see, Trump is dangerous to #..."
2,0x28b412,[bibleverse],"Confident of your obedience, I write to you, k..."
3,0x1cd5b0,[],Now ISSA is stalking Tasha 😂😂😂 <LH>
4,0x2de201,[],"""Trust is not the same as faith. A friend is s..."
...,...,...,...
1867530,0x316b80,"[mixedfeeling, butimTHATperson]",When you buy the last 2 tickets remaining for ...
1867531,0x29d0cb,[],I swear all this hard work gone pay off one da...
1867532,0x2a6a4f,[],@Parcel2Go no card left when I wasn't in so I ...
1867533,0x24faed,[],"Ah, corporate life, where you can date <LH> us..."


In [7]:
merged_df = pd.merge(iden_df, tweets_df, on="tweet_id")

train_df = merged_df[merged_df["identification"] == "train"]
test_df = merged_df[merged_df["identification"] == "test"]

train_df = pd.merge(train_df, emotion_df, on="tweet_id")

print(train_df.head())
print(len(train_df))

print(test_df.head())
print(len(test_df))

   tweet_id identification                                           hashtags  \
0  0x29e452          train                                                 []   
1  0x2b3819          train                                  [spateradio, app]   
2  0x2a2acc          train                                                 []   
3  0x2a8830          train  [PUBG, GamersUnite, twitch, BeHealthy, StayPos...   
4  0x20b21d          train                             [strength, bones, God]   

                                                text       emotion  
0  Huge Respect🖒 @JohnnyVegasReal talking about l...           joy  
1  Yoooo we hit all our monthly goals with the ne...           joy  
2  @KIDSNTS @PICU_BCH @uhbcomms @BWCHBoss Well do...         trust  
3  Come join @ambushman27 on #PUBG while he striv...           joy  
4  @fanshixieen2014 Blessings!My #strength little...  anticipation  
1455563
    tweet_id identification          hashtags  \
0   0x28cc61           test               

In [8]:
train_df = train_df.sample(frac=0.01, replace=True, random_state=1)

print(len(train_df))

14556


In [9]:
import sys
sys.path.append('../helpers')
import data_mining_helpers as dmh

In [10]:
train_df.isnull().apply(lambda x: dmh.check_missing_values(x))

Unnamed: 0,tweet_id,identification,hashtags,text,emotion
0,The amoung of missing records is:,The amoung of missing records is:,The amoung of missing records is:,The amoung of missing records is:,The amoung of missing records is:
1,0,0,0,0,0


In [11]:
test_df.isnull().apply(lambda x: dmh.check_missing_values(x))

Unnamed: 0,tweet_id,identification,hashtags,text
0,The amoung of missing records is:,The amoung of missing records is:,The amoung of missing records is:,The amoung of missing records is:
1,0,0,0,0


### Roberta

In [12]:
import torch
from transformers import RobertaTokenizer, RobertaTokenizerFast, RobertaForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
import pickle

In [13]:
target_list = ['anger', 'anticipation', 'disgust', 'fear', 'sadness', 'surprise', 'trust', 'joy']

In [14]:
# tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')
# encoded_data = torch.load('encoded_data.pth')

In [15]:

# try:
#     # Try to load the encoded data
#     encoded_data = torch.load('encoded_data.pth')
# except FileNotFoundError:
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')
encoded_data = tokenizer(list(train_df['text']), truncation=True, padding=True, return_tensors='pt', max_length=256)
    # torch.save(encoded_data, 'encoded_data.pth')


In [16]:
input_ids = encoded_data['input_ids'][0]

# Decode the token IDs to obtain the original sentence
decoded_sentence = tokenizer.decode(input_ids, skip_special_tokens=True)

# Print the results
print("Original Sentence:", train_df['text'].iloc[0])
print("Tokenized Sentence:", decoded_sentence)

decoded_tokens = tokenizer.convert_ids_to_tokens(input_ids)
print(decoded_tokens)
# Print each token along with its token ID
for token, token_id in zip(decoded_tokens, input_ids):
    print(f"Token: {token}\tToken ID: {token_id}")

Original Sentence: @TheFostersTV @TeriPolo1 Has one of the best "What the Hell?" looks ever!!! <LH> #FostersProm
Tokenized Sentence: @TheFostersTV @TeriPolo1 Has one of the best "What the Hell?" looks ever!!! <LH> #FostersProm
['<s>', '@', 'The', 'F', 'ost', 'ers', 'TV', 'Ġ@', 'Ter', 'i', 'Pol', 'o', '1', 'ĠHas', 'Ġone', 'Ġof', 'Ġthe', 'Ġbest', 'Ġ"', 'What', 'Ġthe', 'ĠHell', '?"', 'Ġlooks', 'Ġever', '!!!', 'Ġ<', 'L', 'H', '>', 'Ġ#', 'F', 'ost', 'ers', 'Prom', '</s>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>

In [17]:
unique_elements = torch.unique(encoded_data['input_ids'])
num_unique_elements = unique_elements.numel()
num_unique_elements

23084

In [18]:

# Map emotion labels to each tweet's tensor
label_map = {label: i for i, label in enumerate(target_list)}
encoded_data['labels'] = torch.tensor([label_map[label] for label in train_df['emotion']])


In [19]:
encoded_data['input_ids'].shape

torch.Size([14556, 244])

In [20]:
print(encoded_data['labels'])
print(len(encoded_data['input_ids']), len(encoded_data['labels']))

tensor([7, 5, 7,  ..., 4, 6, 6])
14556 14556


In [21]:
# Split into train and valid, 9:1
# train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(encoded_data['input_ids'],
#                                                                                     encoded_data['labels'],
#                                                                                     random_state=42,
#                                                                                     test_size=0.1)

model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=len(label_map))
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-2)


# print(len(validation_inputs))

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:
print(model)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [23]:
"cuda" if torch.cuda.is_available() else "cpu"

'cuda'

In [24]:
import gc
torch.cuda.empty_cache()
gc.collect()

6

In [25]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [26]:
# train_dataset = TensorDataset(train_inputs, train_labels)
# train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)

# validation_dataset = TensorDataset(validation_inputs, validation_labels)
# validation_dataloader = DataLoader(validation_dataset, batch_size=4, shuffle=False)

# # Define loss function
# criterion = torch.nn.CrossEntropyLoss()

# # Training loop
# num_epochs = 5  # Adjust as needed
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

In [27]:
class Trainer:
    def __init__(self, log_dir, model, encoded_data):
        '''Initialize the varibles for training
        Args:
            log_dir: (pathlib.Path) the direction used for logging
        '''
        self.log_dir = log_dir
        print(self.log_dir)
        # Split into train and valid, 9:1
        self.train_inputs, self.validation_inputs, self.train_labels, self.validation_labels = train_test_split(encoded_data['input_ids'],
                                                                                            encoded_data['labels'],
                                                                                            random_state=42,
                                                                                            test_size=0.1)
        self.train_dataset = TensorDataset(self.train_inputs, self.train_labels)
        self.train_loader = DataLoader(self.train_dataset, batch_size=4, shuffle=True, num_workers=0)

        self.validation_dataset = TensorDataset(self.validation_inputs, self.validation_labels)
        self.valid_loader = DataLoader(self.validation_dataset, batch_size=4, shuffle=False, num_workers=0)



        # Datasets and dataloaders
        # 1. Split the whole training data into train and valid (validation)
        # 2. Make the corresponding dataloaders

        # self.train_loader = DataLoader(self.train_set, 16, shuffle=True, num_workers=0)
        # self.valid_loader = DataLoader(self.valid_set, 16, shuffle=False, num_workers=0)

        # model, loss function, optimizer
        self.device = 'cuda'
        self.model = model.to(self.device)
        self.criterion = torch.nn.CrossEntropyLoss()
        self.optimizer = torch.optim.AdamW(model.parameters(), lr=5e-3)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 10, gamma=0.95, last_epoch=-1, verbose=True)
        # self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min', verbose=True)
        self.max_epoch = 1 

    def run(self):
        training_result_dir = self.log_dir / 'training_result'
        training_result_dir.mkdir(parents=True)
        metrics = {'train_loss': [], 'valid_loss': []}
        lrs = []
        for self.epoch in range(self.max_epoch): # epochs
            train_loss = self.train() # train 1 epoch
            valid_loss = self.valid() # valid 1 epoch
            print('lr:',get_lr(self.optimizer))
            lrs.append(get_lr(self.optimizer))
            print(f'Epoch {self.epoch:03d}:')
            print('train loss:', train_loss)
            print('valid loss:', valid_loss)
            metrics['train_loss'].append(train_loss)
            metrics['valid_loss'].append(valid_loss)
            # Save the parameters(weights) of the model to disk
            if torch.tensor(metrics['valid_loss']).argmin() == self.epoch:
                torch.save(self.model.state_dict(), str(training_result_dir / 'model.pth'))
        
        # Plot the loss curve against epoch
        fig, ax = plt.subplots(1, 1, figsize=(10, 10), dpi=100)
        ax.set_title('Loss')
        ax.plot(range(self.epoch + 1), metrics['train_loss'], label='Train')
        ax.plot(range(self.epoch + 1), metrics['valid_loss'], label='Valid')
        ax.legend()
        plt.show()
        fig.savefig(str(training_result_dir / 'metrics.jpg'))
        plt.close()

        fig, ax = plt.subplots(1, 1, figsize=(10, 10), dpi=100)
        ax.set_title('Learning rate')
        ax.plot(range(self.epoch + 1), lrs)
        ax.legend()
        plt.show()
        fig.savefig(str(training_result_dir / 'lr.jpg'))
        plt.close()

    def train(self):
        '''Train one epoch
        1. Switch model to training mode
        2. Iterate mini-batches and do:
            a. clear gradient
            b. forward to get loss
            c. loss backward
            d. update parameters
        3. Return the average loss in this epoch
        '''
        self.model.train()
        loss_steps = []

        for batch_inputs, batch_labels in tqdm(self.train_loader):
            batch_inputs, batch_labels = batch_inputs.to(self.device), batch_labels.to(self.device).unsqueeze(0)
            # Forward pass
            outputs = model(input_ids=batch_inputs, labels=batch_labels)
            loss = outputs.loss
            predictions = torch.nn.functional.softmax(outputs.logits, dim=1)
            predictions = torch.argmax(predictions, dim=1)
            print(batch_labels, predictions)
            print(loss)
            # Backward pass and optimization
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            loss_steps.append(loss.detach().item())
            # total_loss += loss.item()
            self.scheduler.step()
        
        # Calculate average training loss for the epoch
        avg_train_loss = sum(loss_steps) / len(self.train_loader)
        print(f"Epoch {self.epoch + 1}, Avg. Training Loss: {avg_train_loss:.4f}")
        
        return avg_train_loss

    @torch.no_grad()
    def valid(self):
        '''Validate one epoch
        1. Switch model to evaluation mode and turn off gradient (by @torch.no_grad() or with torch.no_grad())
        2. Iterate mini-batches and do forwarding to get loss
        3. Return average loss in this epoch
        '''
        self.model.eval()
        loss_steps = []
        all_predictions = []
        all_true_labels = []

        for batch_inputs, batch_labels in self.valid_loader:
            batch_inputs, batch_labels = batch_inputs.to(self.device), batch_labels.to(self.device)
            # Forward pass
            outputs = self.model(input_ids=batch_inputs)
            logits = outputs.logits.cpu()
            print(logits)
            # Predictions
            predictions = torch.nn.functional.softmax(logits, dim=1)
            predictions = torch.argmax(predictions, dim=1).cpu().numpy()
            true_labels = batch_labels.cpu()
            print(predictions, true_labels)
            loss_steps.append(self.criterion(logits, true_labels))
            true_labels = true_labels.numpy()
            all_predictions.extend(predictions)
            all_true_labels.extend(true_labels)

            

        # Calculate accuracy on the validation set
        avg_valid_loss = sum(loss_steps) / len(self.valid_loader)
        print(f"Epoch {self.epoch + 1}, Validation Accuracy: {avg_valid_loss:.4f}")
        return avg_valid_loss

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']


log_dir = Path('./runs/') / f'{datetime.now():%b%d_%H_%M_%S}'
log_dir.mkdir(parents=True, exist_ok=True)
Trainer(log_dir, model, encoded_data).run()

runs\Dec13_15_33_47
Adjusting learning rate of group 0 to 5.0000e-03.


  0%|          | 0/3275 [00:00<?, ?it/s]

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


tensor([[7, 6, 7, 6]], device='cuda:0') tensor([3, 4, 3, 3], device='cuda:0')
tensor(2.1254, device='cuda:0', grad_fn=<NllLossBackward0>)
Adjusting learning rate of group 0 to 5.0000e-03.
tensor([[4, 0, 4, 7]], device='cuda:0') tensor([7, 7, 7, 7], device='cuda:0')
tensor(3.0943, device='cuda:0', grad_fn=<NllLossBackward0>)
Adjusting learning rate of group 0 to 5.0000e-03.
tensor([[1, 2, 4, 1]], device='cuda:0') tensor([6, 6, 6, 6], device='cuda:0')
tensor(7.8632, device='cuda:0', grad_fn=<NllLossBackward0>)
Adjusting learning rate of group 0 to 5.0000e-03.
tensor([[2, 7, 2, 6]], device='cuda:0') tensor([7, 6, 7, 7], device='cuda:0')
tensor(2.2995, device='cuda:0', grad_fn=<NllLossBackward0>)
Adjusting learning rate of group 0 to 5.0000e-03.
tensor([[6, 2, 0, 7]], device='cuda:0') tensor([2, 2, 2, 2], device='cuda:0')
tensor(5.8015, device='cuda:0', grad_fn=<NllLossBackward0>)
Adjusting learning rate of group 0 to 5.0000e-03.
tensor([[7, 3, 1, 6]], device='cuda:0') tensor([2, 2, 2, 2],

RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`

In [45]:
predictions = torch.tensor([[-2.1727,  0.2177, -0.5744, -1.7016, -0.0799, -1.4667, -0.0843,  0.8515],
        [-2.1727,  0.2177, -0.5744, -1.7016, -0.0799, -1.4667, -0.0843,  0.8515]])
true_labels = torch.tensor([7, 7])

# Ensure true_labels is of type LongTensor
true_labels = true_labels.type(torch.LongTensor)
print(predictions, true_labels)
# Instantiate CrossEntropyLoss
criterion = torch.nn.CrossEntropyLoss()

# Calculate the loss for the current batch
batch_loss = criterion(predictions, true_labels)
print(batch_loss)

tensor([[-2.1727,  0.2177, -0.5744, -1.7016, -0.0799, -1.4667, -0.0843,  0.8515],
        [-2.1727,  0.2177, -0.5744, -1.7016, -0.0799, -1.4667, -0.0843,  0.8515]]) tensor([7, 7])
tensor(1.0232)


In [52]:
torch.tensor([1,0]).unsqueeze(0)


tensor([[6, 0]])

In [None]:



test_encoded_data = tokenizer(list(test_df['text']), truncation=True, padding=True, return_tensors='pt', max_length=256)
# test_dataset = TensorDataset(test_encoded_data['input_ids'])
# test_loader = DataLoader(test_dataset, batch_size=4, shuffle=True)

class CustomTestDataset(TensorDataset):
    def __init__(self, tweet_ids, input_ids):
        super(CustomTestDataset, self).__init__(input_ids)
        self.tweet_ids = tweet_ids

    def __getitem__(self, index):
        # Return both input_ids tensor and tweet_id
        return (self.tweet_ids[index], super(CustomTestDataset, self).__getitem__(index))
    
test_dataset = CustomTestDataset(test_df["tweet_id"].values, test_encoded_data['input_ids'])

# Create the data loader
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

device = 'cuda'
model = model.to(device)
model.load_state_dict(torch.load(str(log_dir / 'training_result/model.pth')))
model.eval()
print(str(log_dir / 'training_result/model.pth'))

log_dir_test = Path('./testing_result') / f'{datetime.now():%b%d_%H_%M_%S}'
log_dir_test.mkdir(parents=True)
print(log_dir_test)

def test(pbar):
    model.eval()
    all_predictions = []
    for batch_inputs in test_loader:
        batch_inputs = batch_inputs.to(device)

        # Forward pass
        outputs = model(input_ids=batch_inputs)
        logits = outputs.logits

        # Predictions
        predictions = torch.argmax(logits, dim=1).cpu().numpy()
        predicted_labels = [target_list[idx] for idx in predictions]
        all_predictions.append([f'{pbar.n:04d}.jpg', predicted_labels])
        pbar.update()


    return all_predictions


with torch.no_grad():
    with tqdm(total=len(test_dataset)) as pbar:
        df_pred = test(pbar)
    df_pred.columns = ['name', 'BR_x', 'BR_y', 'BL_x', 'BL_y', 'TL_x', 'TL_y', 'TR_x', 'TR_y']
    df_pred.to_csv(str(log_dir_test / 'test_pred.csv'), float_format='%.5f', index=False)

In [None]:
from tqdm.auto import tqdm

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0

    for batch_inputs, batch_labels in tqdm(train_dataloader):
        batch_inputs, batch_labels = batch_inputs.to(device), batch_labels.to(device)

        # Forward pass
        outputs = model(input_ids=batch_inputs, labels=batch_labels)
        loss = outputs.loss

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Calculate average training loss for the epoch
    avg_train_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Avg. Training Loss: {avg_train_loss:.4f}")

    # Validation
    model.eval()
    all_predictions = []
    all_true_labels = []

    with torch.no_grad():
        for batch_inputs, batch_labels in validation_dataloader:
            batch_inputs, batch_labels = batch_inputs.to(device), batch_labels.to(device)

            # Forward pass
            outputs = model(input_ids=batch_inputs)
            logits = outputs.logits

            # Predictions
            predictions = torch.argmax(logits, dim=1).cpu().numpy()
            true_labels = batch_labels.cpu().numpy()

            all_predictions.extend(predictions)
            all_true_labels.extend(true_labels)

    # Calculate accuracy on the validation set
    accuracy = accuracy_score(all_true_labels, all_predictions)
    print(f"Epoch {epoch + 1}/{num_epochs}, Validation Accuracy: {accuracy:.4f}")

In [None]:
train_dataset = TensorDataset(train_inputs, train_labels)
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)

# Initialize XGBoost model
xgb_model = XGBClassifier()

epochs = 10
label_encoder = LabelEncoder()



# Training loop
for epoch in range(epochs):
    for batch_inputs, batch_labels in train_dataloader:
        # Flatten the input tensors if needed
        flat_inputs = batch_inputs.view(batch_inputs.size(0), -1).numpy()
        flat_labels = batch_labels.numpy()

        flat_labels_encoded = label_encoder.fit_transform(flat_labels)

        # Train the XGBoost model on the batch
        xgb_model.fit(flat_inputs, flat_labels)

# Validation
xgb_model.eval()
with torch.no_grad():
    # Flatten the validation inputs
    flat_validation_inputs = validation_inputs.view(validation_inputs.size(0), -1).numpy()

    # Make predictions
    predictions = xgb_model.predict(flat_validation_inputs)



ValueError: Invalid classes inferred from unique values of `y`.  Expected: [0 1 2], got [2 5 7]

In [None]:
# Map predictions back to emotion labels
predicted_labels = [label_map_inv[pred] for pred in predictions]

# Calculate accuracy
accuracy = accuracy_score(validation_labels.numpy(), predictions)
print(f"Validation Accuracy: {accuracy}")

In [None]:
# Evaluate on validation set
model.eval()
with torch.no_grad():
    logits = model(validation_inputs)['logits']
    predictions = torch.argmax(logits, dim=1).tolist()

RuntimeError: [enforce fail at alloc_cpu.cpp:80] data. DefaultCPUAllocator: not enough memory: you tried to allocate 114470682624 bytes.

In [None]:
label_map_inv = {i: label for label, i in label_map.items()}
# Map predictions back to emotion labels
predicted_labels = [label_map_inv[pred] for pred in predictions]

# Print classification report
print(classification_report(validation_labels.tolist(), predictions, target_names=label_map_inv.keys()))