In [1]:
import torch
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import classification_report
import pandas as pd
import matplotlib.pyplot as plt

Using device: cuda:1


In [2]:
import os
from tqdm import tqdm
os.environ["WANDB_DISABLED"] = "true"

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [4]:
seed = 25
# random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [5]:
train_data = pd.read_csv('datasets/subtask_2/es/train.tsv',sep='\t')
train_data = train_data.reset_index(drop=True)
print(train_data.head())

      id                                               text label
0  12786  Sin embargo, los jóvenes son capaces de recono...     B
1  12361  ¿Hay algo más que quieras compartir? ¿Algo sob...     B
2   1662  El servicio de sala es bueno, rápido y amabilí...     B
3  14729  Para concentrarse en el hablante, trata de des...     F
4   9312  Los responsables locales tendrán ahora que esp...     F


In [29]:
from sklearn.model_selection import train_test_split
train_data_texts = train_data['text'].to_list()
train_data_labels = train_data['label'].to_list()
train_data_labels = [ord(label)-ord('A') for label in train_data_labels]
train_texts, test_texts, train_labels, test_labels = train_test_split(train_data_texts, train_data_labels, test_size=0.1, random_state=25)
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.1, random_state=25)
print('train data size: ', len(train_texts))
print('validation data size: ', len(val_texts))
print('test data size: ', len(test_texts))

train data size:  17766
validation data size:  1975
test data size:  2194


In [7]:
from transformers import AdamW, AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding
from transformers import BertTokenizer, BertModel
from transformers import TrainingArguments, Trainer
bert_tokenizer = BertTokenizer.from_pretrained("dccuchile/bert-base-spanish-wwm-cased")
bert_model = BertModel.from_pretrained("dccuchile/bert-base-spanish-wwm-cased").to(device)
print("Model Configurations")
print()
print(bert_model.config)

Some weights of the model checkpoint at dccuchile/bert-base-spanish-wwm-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.we

Model Configurations

BertConfig {
  "_name_or_path": "dccuchile/bert-base-spanish-wwm-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.24.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 31002
}



In [8]:
def get_bert_embeddings(text):
    # Tokenize input text
    encoded_input = bert_tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors='pt').to(device)
    #get bert embeddings
    with torch.no_grad():
        bert_output = bert_model(**encoded_input)
    bert_embeddings = bert_output.last_hidden_state[:,0,:].cpu().numpy()
    return bert_embeddings

#get train embeddings
train_embeddings = []
for text in tqdm(train_texts):
    train_embeddings.append(get_bert_embeddings(text))
train_embeddings = np.array(train_embeddings)
train_embeddings = np.squeeze(train_embeddings, axis=1)
print('train embeddings shape: ', train_embeddings.shape)

100%|████████████████████████████████████| 17766/17766 [02:52<00:00, 103.27it/s]

train embeddings shape:  (17766, 768)





In [9]:
#get validation embeddings
val_embeddings = []
for text in tqdm(val_texts):
    val_embeddings.append(get_bert_embeddings(text))
val_embeddings = np.array(val_embeddings)
val_embeddings = np.squeeze(val_embeddings, axis=1)
print('validation embeddings shape: ', val_embeddings.shape) #shape: (num_samples, 1, 768)


#get test embeddings
test_embeddings = []
for text in tqdm(test_texts):
    test_embeddings.append(get_bert_embeddings(text))
test_embeddings = np.array(test_embeddings)
test_embeddings = np.squeeze(test_embeddings, axis=1)
print('test embeddings shape: ', test_embeddings.shape) #shape: (num_samples, 1, 768)

100%|██████████████████████████████████████| 1975/1975 [00:18<00:00, 106.14it/s]


validation embeddings shape:  (1975, 768)


100%|██████████████████████████████████████| 2194/2194 [00:20<00:00, 106.31it/s]

test embeddings shape:  (2194, 768)





In [10]:
import string
def count_punctuations(text):
    count = sum([1 for char in text if char in string.punctuation])
    return count

train_punc = []
for text in train_texts:
    train_punc.append(count_punctuations(text))
train_punc = np.array(train_punc)

val_punc = []
for text in val_texts:
    val_punc.append(count_punctuations(text))
val_punc = np.array(val_punc)

test_punc = []
for text in test_texts:
    test_punc.append(count_punctuations(text))
test_punc = np.array(test_punc)
print('train punc shape: ', train_punc.shape) #shape: (num_samples, 1)

train punc shape:  (17766,)


In [11]:
def count_capital_letters(text):
    count = sum([1 for char in text if char.isupper()])
    return count

train_capital = []
for text in train_texts:
    train_capital.append(count_capital_letters(text))
train_capital = np.array(train_capital)

val_capital = []
for text in val_texts:
    val_capital.append(count_capital_letters(text))
val_capital = np.array(val_capital)

test_capital = []
for text in test_texts:
    test_capital.append(count_capital_letters(text))
test_capital = np.array(test_capital)
print('train capital shape: ', train_capital.shape) #shape: (num_samples, 1)

train capital shape:  (17766,)


In [12]:
#function to perform sentiment analysis on a spanish text
from transformers import pipeline
sentiment_analysis = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

In [15]:
def get_sentiment(text):
    sentiment = sentiment_analysis(text)[0]['label']
    #remove stars
    #if its 1 star return 1
    if sentiment == '1 star':
        sentiment = 1
    #if its 2 stars return 2
    elif sentiment == '2 stars':
        sentiment = 2
    #if its 3 stars return 3
    elif sentiment == '3 stars':
        sentiment = 3
    #if its 4 stars return 4
    elif sentiment == '4 stars':
        sentiment = 4
    #if its 5 stars return 5
    elif sentiment == '5 stars':
        sentiment = 5
    return sentiment #dim: (num_samples, 1) range: [1,5]

train_sentiment = []
for text in tqdm(train_texts):
    train_sentiment.append(get_sentiment(text))
train_sentiment = np.array(train_sentiment)

val_sentiment = []
for text in tqdm(val_texts):
    val_sentiment.append(get_sentiment(text))
val_sentiment = np.array(val_sentiment)

test_sentiment = []
for text in tqdm(test_texts):
    test_sentiment.append(get_sentiment(text))
test_sentiment = np.array(test_sentiment)
print('train sentiment shape: ', train_sentiment.shape) #shape: (num_samples, 1)

100%|█████████████████████████████████████| 17766/17766 [14:01<00:00, 21.12it/s]
100%|███████████████████████████████████████| 1975/1975 [01:33<00:00, 21.09it/s]
100%|███████████████████████████████████████| 2194/2194 [01:42<00:00, 21.42it/s]

train sentiment shape:  (17766,)





In [16]:
import spacy
nlp = spacy.load("es_core_news_sm")
#function to get pos tags for each category
def get_pos(text):
    doc = nlp(text)
    adj_count = 0
    noun_count = 0
    verb_count = 0
    adp_count = 0
    det_count = 0
    for token in doc:
        if token.pos_ == 'ADJ':
            adj_count += 1
        elif token.pos_ == 'NOUN':
            noun_count += 1
        elif token.pos_ == 'VERB':
            verb_count += 1
        elif token.pos_ == 'ADP':
            adp_count += 1
        elif token.pos_ == 'DET':
            det_count += 1
    return [adj_count, noun_count, verb_count, adp_count, det_count] #dim: (num_samples, 5) 

train_pos = []
for text in tqdm(train_texts):
    train_pos.append(get_pos(text))
train_pos = np.array(train_pos)

val_pos = []
for text in tqdm(val_texts):
    val_pos.append(get_pos(text))
val_pos = np.array(val_pos)

test_pos = []
for text in tqdm(test_texts):
    test_pos.append(get_pos(text))
test_pos = np.array(test_pos)
print('train pos shape: ', train_pos.shape) #shape: (num_samples, 5)

100%|████████████████████████████████████| 17766/17766 [02:46<00:00, 106.97it/s]
100%|██████████████████████████████████████| 1975/1975 [00:17<00:00, 109.91it/s]
100%|██████████████████████████████████████| 2194/2194 [00:20<00:00, 108.29it/s]

train pos shape:  (17766, 5)





In [17]:
from transformers import pipeline
ner_analysis = pipeline("ner", model="mrm8488/bert-spanish-cased-finetuned-ner")

In [18]:
def get_ner(text):
    ner = ner_analysis(text)
    loc_count = 0
    org_count = 0
    per_count = 0
    misc_count = 0
    for item in ner:
        if item['entity'] == 'B-LOC' or item['entity'] == 'I-LOC':
            loc_count += 1
        elif item['entity'] == 'B-ORG' or item['entity'] == 'I-ORG':
            org_count += 1
        elif item['entity'] == 'B-PER' or item['entity'] == 'I-PER':
            per_count += 1
        elif item['entity'] == 'B-MISC' or item['entity'] == 'I-MISC':
            misc_count += 1
    return [loc_count, org_count, per_count, misc_count] #dim: (num_samples, 4)

train_ner = []
for text in tqdm(train_texts):
    train_ner.append(get_ner(text))
train_ner = np.array(train_ner)

val_ner = []
for text in tqdm(val_texts):
    val_ner.append(get_ner(text))
val_ner = np.array(val_ner)

test_ner = []
for text in tqdm(test_texts):
    test_ner.append(get_ner(text))
test_ner = np.array(test_ner)
print('train ner shape: ', train_ner.shape) #shape: (num_samples, 4)

  0%|                                                 | 0/17766 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|█████████████████████████████████████| 17766/17766 [13:07<00:00, 22.56it/s]
100%|███████████████████████████████████████| 1975/1975 [01:28<00:00, 22.40it/s]
100%|███████████████████████████████████████| 2194/2194 [01:38<00:00, 22.17it/s]

train ner shape:  (17766, 4)





In [19]:
train_punc = train_punc.reshape(-1,1)
val_punc = val_punc.reshape(-1,1)
test_punc = test_punc.reshape(-1,1)

train_capital = train_capital.reshape(-1,1)
val_capital = val_capital.reshape(-1,1)
test_capital = test_capital.reshape(-1,1)

train_sentiment = train_sentiment.reshape(-1,1)
val_sentiment = val_sentiment.reshape(-1,1)
test_sentiment = test_sentiment.reshape(-1,1)

In [20]:
#concatenate all features
train_features = np.concatenate((train_punc, train_capital,train_sentiment, train_pos, train_ner), axis=1) #dim: (num_samples, 11)
val_features = np.concatenate((val_punc, val_capital,val_sentiment, val_pos, val_ner), axis=1) #dim: (num_samples, 11)
test_features = np.concatenate((test_punc, test_capital,test_sentiment, test_pos, test_ner), axis=1) #dim: (num_samples, 11)
print('train features shape: ', train_features.shape) #shape: (num_samples, 11)
print(train_features[0])
#concatenate all features
# train_features = np.concatenate((train_pos, train_ner), axis=1) #dim: (num_samples, 11)
# val_features = np.concatenate((val_pos, val_ner), axis=1) #dim: (num_samples, 11)
# test_features = np.concatenate((test_pos, test_ner), axis=1) #dim: (num_samples, 11)
# print('train features shape: ', train_features.shape) #shape: (num_samples, 11)
# print(train_features[0])

train features shape:  (17766, 12)
[12 13  1  8 15  4 11 13  0  1  0  1]


In [22]:
#save these features in a file
np.save('train_features.npy', train_features)
np.save('val_features.npy', val_features)
np.save('test_features.npy', test_features)

In [23]:
# from sklearn.decomposition import PCA
# # Set the number of components you want to keep
# n_components = 15
# # Fit PCA on the validation embeddings and transform them
# pca = PCA(n_components=n_components)

In [24]:
# train_embeddings_pca = pca.fit_transform(train_embeddings)
# print('train embeddings pca shape: ', train_embeddings_pca.shape) #shape: (num_samples, n_components)

# val_embeddings_pca = pca.transform(val_embeddings)
# print('validation embeddings pca shape: ', val_embeddings_pca.shape) #shape: (num_samples, n_components)

# test_embeddings_pca = pca.transform(test_embeddings)
# print('test embeddings pca shape: ', test_embeddings_pca.shape) #shape: (num_samples, n_components)

In [25]:
# from pysentimiento import create_analyzer
# analyzer = create_analyzer(task="sentiment", lang="es")
# text = "Este es un ejemplo de texto con sentimiento."

# result = analyzer.predict(text)

# pos_prob = result.prob_pos
# neg_prob = result.prob_neg
# neu_prob = result.prob_neu

# print("Positive Probability:", pos_prob)
# print("Negative Probability:", neg_prob)
# print("Neutral Probability:", neu_prob)

In [115]:
batch_size = 32
train_dataset = torch.utils.data.TensorDataset(torch.tensor(train_embeddings), torch.tensor(train_labels), torch.tensor(train_features))
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

valid_dataset = torch.utils.data.TensorDataset(torch.tensor(val_embeddings), torch.tensor(val_labels), torch.tensor(val_features))
val_loader = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False)

test_dataset = torch.utils.data.TensorDataset(torch.tensor(test_embeddings), torch.tensor(test_labels), torch.tensor(test_features))
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

for embeddings, labels, f in train_loader:
    print(embeddings)
    print(labels)
    print(f)
    break

tensor([[-0.0497, -0.4041, -0.3824,  ...,  0.0385,  0.6834,  0.2987],
        [ 0.0174,  0.5261, -0.3379,  ..., -0.5308,  0.4677,  0.5002],
        [ 0.2951,  0.7370,  0.2409,  ..., -1.1159,  0.2275,  0.1440],
        ...,
        [ 0.5721,  0.0393,  0.3432,  ..., -0.1891,  0.1854,  0.8616],
        [ 0.3103,  0.5615,  0.0251,  ..., -1.0129,  0.2394,  0.2624],
        [ 0.3008,  0.5643,  0.3183,  ..., -0.9900,  0.0948,  0.5841]])
tensor([3, 1, 5, 5, 3, 0, 0, 5, 2, 0, 4, 1, 4, 0, 2, 0, 4, 4, 1, 0, 0, 4, 1, 5,
        0, 0, 4, 3, 1, 0, 4, 5])
tensor([[ 8, 14,  2,  8, 13,  4, 12, 14,  2, 14,  0,  0],
        [11,  3,  2,  5, 17, 10, 10, 13,  0,  0,  0,  0],
        [ 7,  4,  5, 10,  9,  5,  7, 10,  0,  0,  0,  0],
        [ 8,  6,  3,  0, 23,  9, 17, 13,  0,  0,  0,  1],
        [ 8,  8,  1,  2, 13, 13,  6, 13,  0,  0,  0,  0],
        [ 7,  4,  4, 12, 22,  8, 14,  9,  0,  0,  0,  0],
        [ 6,  2,  4,  5, 13,  5,  6, 10,  0,  0,  0,  0],
        [10, 15,  3,  3, 14,  8, 15, 14,  3,  0

In [116]:
# # Define neural network architecture
# import torch.nn as nn
# import torch.nn.functional as F

# #create a neural network to use the embeddings and do classification
# class Net(nn.Module):
#     def __init__(self, input_size, hidden_size, num_classes):
#         super(Net, self).__init__()
#         self.fc1 = nn.Linear(input_size, hidden_size) 
#         self.dropout1 = nn.Dropout(0.1)
#         self.fc2 = nn.Linear(hidden_size, num_classes)  

#     def forward(self, x):
#         # out = F.relu(self.bn1(self.fc1(x)))
#         out = F.relu(self.fc1(x))
#         out = self.dropout1(out)
#         out = self.fc2(out)
#         return out
    
# # Hyperparameters
# input_size = 768
# hidden_size = 128
# num_classes = 2
# num_epochs = 20
# learning_rate = 0.001

In [119]:
# Define neural network architecture
import torch.nn as nn
import torch.nn.functional as F

#create a neural network to use the embeddings and do classification
class Net(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1) 
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2+12, num_classes) #11 is the number of features
        self.dropout = nn.Dropout(0.2)

    def forward(self, x, f):
        out = F.relu(self.fc1(x))
        out = self.dropout(out)
        out = F.relu(self.fc2(out))
        out = torch.cat((out, f), dim=1)    
        out = self.fc3(out)
        return out
    
# Hyperparameters
input_size = 768
hidden_size1 = 256
hidden_size2 = 38
num_classes = 6
num_epochs = 20
learning_rate = 0.001

In [120]:
# Create a model from the neural network
# model = Net(input_size, hidden_size, num_classes).to(device)
model = Net(input_size, hidden_size1, hidden_size2, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [121]:
# from tqdm import tqdm

# best_val_acc = 0.0
# total_step = len(train_loader)
# half_epoch_step = total_step // 2

# for epoch in range(num_epochs):
#     running_loss = 0.0
#     for i, (embeddings, labels) in tqdm(enumerate(train_loader), total=total_step, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch"):
#         # Move tensors to the configured device
#         embeddings = embeddings.to(device)
#         labels = labels.to(device)

#         # Forward pass
#         outputs = model(embeddings)
#         loss = criterion(outputs, labels)

#         # Backward and optimize
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#         running_loss += loss.item()

#         # Print loss every half epoch
#         if (i+1) % half_epoch_step == 0:
#             avg_loss = running_loss / half_epoch_step
#             print(f"Epoch {epoch+1}/{num_epochs} Loss after {i+1} batches: {avg_loss:.4f}")
#             running_loss = 0.0
            
#     # Validate the model
#     with torch.no_grad():
#         correct = 0
#         total = 0
#         for embeddings, labels in val_loader:
#             embeddings = embeddings.to(device)
#             labels = labels.to(device)
#             outputs = model(embeddings)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()

#         # Print validation stats
#         val_acc = 100 * correct / total
#         print(f'Epoch {epoch+1}/{num_epochs} Validation Accuracy: {val_acc:.2f} %')

#         # Save the model if the validation accuracy is better than the previous best
#         if val_acc > best_val_acc:
#             best_val_acc = val_acc
#             torch.save(model.state_dict(), 'best_model.pt')
#             print(f'Saved model with validation accuracy: {best_val_acc:.2f} %')

In [122]:
from tqdm import tqdm

best_val_loss = float('inf') # initialize best validation loss to infinity
total_step = len(train_loader)
half_epoch_step = total_step // 2

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (embeddings, labels, f) in tqdm(enumerate(train_loader), total=total_step, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch"):
        # Move tensors to the configured device
        embeddings = embeddings.to(device)
        labels = labels.to(device)
        f = f.to(device)
        
        # Forward pass
        outputs = model(embeddings, f)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Print loss every half epoch
        if (i+1) % half_epoch_step == 0:
            avg_loss = running_loss / half_epoch_step
            print(f"Epoch {epoch+1}/{num_epochs} Loss after {i+1} batches: {avg_loss:.4f}")
            running_loss = 0.0
            
    # Validate the model
    with torch.no_grad():
        val_loss = 0.0
        correct = 0
        total = 0
        for embeddings, labels, f in val_loader:
            embeddings = embeddings.to(device)
            labels = labels.to(device)
            f = f.to(device)
            outputs = model(embeddings, f)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        # Print validation stats
        val_acc = 100 * correct / total
        val_loss = val_loss / len(val_loader)
        print(f'Epoch {epoch+1}/{num_epochs} Validation Accuracy: {val_acc:.2f} %, Validation Loss: {val_loss:.4f}')

        # Save the model if the validation loss is better than the previous best
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pt')
            print(f'Saved model with validation loss: {best_val_loss:.4f}')


Epoch 1/20:  58%|██████████████▌          | 324/556 [00:01<00:00, 266.68batch/s]

Epoch 1/20 Loss after 278 batches: 1.6712


Epoch 1/20: 100%|█████████████████████████| 556/556 [00:01<00:00, 279.89batch/s]


Epoch 1/20 Loss after 556 batches: 1.3845
Epoch 1/20 Validation Accuracy: 42.48 %, Validation Loss: 1.3152
Saved model with validation loss: 1.3152


Epoch 2/20:  60%|███████████████          | 334/556 [00:01<00:00, 308.22batch/s]

Epoch 2/20 Loss after 278 batches: 1.2823


Epoch 2/20: 100%|█████████████████████████| 556/556 [00:01<00:00, 308.91batch/s]


Epoch 2/20 Loss after 556 batches: 1.2456
Epoch 2/20 Validation Accuracy: 44.96 %, Validation Loss: 1.2302
Saved model with validation loss: 1.2302


Epoch 3/20:  54%|█████████████▍           | 300/556 [00:01<00:00, 269.97batch/s]

Epoch 3/20 Loss after 278 batches: 1.1896


Epoch 3/20: 100%|█████████████████████████| 556/556 [00:02<00:00, 270.40batch/s]


Epoch 3/20 Loss after 556 batches: 1.1703
Epoch 3/20 Validation Accuracy: 46.08 %, Validation Loss: 1.2254
Saved model with validation loss: 1.2254


Epoch 4/20:  56%|█████████████▉           | 311/556 [00:01<00:01, 228.13batch/s]

Epoch 4/20 Loss after 278 batches: 1.1137


Epoch 4/20: 100%|█████████████████████████| 556/556 [00:02<00:00, 246.27batch/s]


Epoch 4/20 Loss after 556 batches: 1.1287
Epoch 4/20 Validation Accuracy: 46.58 %, Validation Loss: 1.1662
Saved model with validation loss: 1.1662


Epoch 5/20:  59%|██████████████▊          | 330/556 [00:01<00:00, 272.87batch/s]

Epoch 5/20 Loss after 278 batches: 1.0630


Epoch 5/20: 100%|█████████████████████████| 556/556 [00:02<00:00, 277.16batch/s]


Epoch 5/20 Loss after 556 batches: 1.0712
Epoch 5/20 Validation Accuracy: 47.09 %, Validation Loss: 1.1909


Epoch 6/20:  55%|█████████████▋           | 304/556 [00:01<00:00, 269.85batch/s]

Epoch 6/20 Loss after 278 batches: 1.0127


Epoch 6/20: 100%|█████████████████████████| 556/556 [00:02<00:00, 257.38batch/s]


Epoch 6/20 Loss after 556 batches: 1.0336
Epoch 6/20 Validation Accuracy: 47.65 %, Validation Loss: 1.1709


Epoch 7/20:  57%|██████████████▏          | 316/556 [00:01<00:00, 265.13batch/s]

Epoch 7/20 Loss after 278 batches: 0.9737


Epoch 7/20: 100%|█████████████████████████| 556/556 [00:02<00:00, 256.12batch/s]


Epoch 7/20 Loss after 556 batches: 0.9854
Epoch 7/20 Validation Accuracy: 48.15 %, Validation Loss: 1.1861


Epoch 8/20:  57%|██████████████▏          | 315/556 [00:01<00:00, 278.69batch/s]

Epoch 8/20 Loss after 278 batches: 0.9204


Epoch 8/20: 100%|█████████████████████████| 556/556 [00:02<00:00, 267.63batch/s]


Epoch 8/20 Loss after 556 batches: 0.9471
Epoch 8/20 Validation Accuracy: 48.10 %, Validation Loss: 1.2235


Epoch 9/20:  57%|██████████████▏          | 315/556 [00:01<00:00, 282.56batch/s]

Epoch 9/20 Loss after 278 batches: 0.8769


Epoch 9/20: 100%|█████████████████████████| 556/556 [00:02<00:00, 277.65batch/s]


Epoch 9/20 Loss after 556 batches: 0.9026
Epoch 9/20 Validation Accuracy: 45.42 %, Validation Loss: 1.2733


Epoch 10/20:  63%|███████████████         | 350/556 [00:01<00:00, 336.58batch/s]

Epoch 10/20 Loss after 278 batches: 0.8233


Epoch 10/20: 100%|████████████████████████| 556/556 [00:02<00:00, 270.51batch/s]


Epoch 10/20 Loss after 556 batches: 0.8784
Epoch 10/20 Validation Accuracy: 47.19 %, Validation Loss: 1.2710


Epoch 11/20:  58%|█████████████▊          | 321/556 [00:01<00:01, 231.31batch/s]

Epoch 11/20 Loss after 278 batches: 0.7881


Epoch 11/20: 100%|████████████████████████| 556/556 [00:02<00:00, 243.42batch/s]


Epoch 11/20 Loss after 556 batches: 0.8213
Epoch 11/20 Validation Accuracy: 45.22 %, Validation Loss: 1.3727


Epoch 12/20:  58%|██████████████          | 325/556 [00:01<00:00, 261.67batch/s]

Epoch 12/20 Loss after 278 batches: 0.7604


Epoch 12/20: 100%|████████████████████████| 556/556 [00:02<00:00, 240.81batch/s]


Epoch 12/20 Loss after 556 batches: 0.7820
Epoch 12/20 Validation Accuracy: 46.84 %, Validation Loss: 1.3908


Epoch 13/20:  60%|██████████████▍         | 335/556 [00:01<00:00, 265.31batch/s]

Epoch 13/20 Loss after 278 batches: 0.7147


Epoch 13/20: 100%|████████████████████████| 556/556 [00:02<00:00, 243.91batch/s]


Epoch 13/20 Loss after 556 batches: 0.7347
Epoch 13/20 Validation Accuracy: 46.03 %, Validation Loss: 1.4354


Epoch 14/20:  61%|██████████████▋         | 339/556 [00:01<00:00, 304.64batch/s]

Epoch 14/20 Loss after 278 batches: 0.6668


Epoch 14/20: 100%|████████████████████████| 556/556 [00:02<00:00, 261.55batch/s]


Epoch 14/20 Loss after 556 batches: 0.7117
Epoch 14/20 Validation Accuracy: 46.38 %, Validation Loss: 1.4505


Epoch 15/20:  58%|█████████████▉          | 323/556 [00:01<00:00, 260.05batch/s]

Epoch 15/20 Loss after 278 batches: 0.6387


Epoch 15/20: 100%|████████████████████████| 556/556 [00:02<00:00, 244.24batch/s]


Epoch 15/20 Loss after 556 batches: 0.6749
Epoch 15/20 Validation Accuracy: 46.94 %, Validation Loss: 1.5611


Epoch 16/20:  58%|█████████████▉          | 322/556 [00:01<00:00, 274.85batch/s]

Epoch 16/20 Loss after 278 batches: 0.5998


Epoch 16/20: 100%|████████████████████████| 556/556 [00:02<00:00, 265.42batch/s]


Epoch 16/20 Loss after 556 batches: 0.6407
Epoch 16/20 Validation Accuracy: 45.87 %, Validation Loss: 1.6362


Epoch 17/20:  56%|█████████████▍          | 311/556 [00:01<00:00, 264.31batch/s]

Epoch 17/20 Loss after 278 batches: 0.5687


Epoch 17/20: 100%|████████████████████████| 556/556 [00:02<00:00, 275.57batch/s]


Epoch 17/20 Loss after 556 batches: 0.6140
Epoch 17/20 Validation Accuracy: 47.14 %, Validation Loss: 1.6332


Epoch 18/20:  55%|█████████████           | 304/556 [00:01<00:00, 266.98batch/s]

Epoch 18/20 Loss after 278 batches: 0.5521


Epoch 18/20: 100%|████████████████████████| 556/556 [00:02<00:00, 254.96batch/s]


Epoch 18/20 Loss after 556 batches: 0.5710
Epoch 18/20 Validation Accuracy: 46.53 %, Validation Loss: 1.6907


Epoch 19/20:  59%|██████████████          | 327/556 [00:01<00:00, 234.48batch/s]

Epoch 19/20 Loss after 278 batches: 0.5137


Epoch 19/20: 100%|████████████████████████| 556/556 [00:02<00:00, 231.76batch/s]


Epoch 19/20 Loss after 556 batches: 0.5633
Epoch 19/20 Validation Accuracy: 45.97 %, Validation Loss: 1.7297


Epoch 20/20:  54%|████████████▉           | 299/556 [00:01<00:00, 271.62batch/s]

Epoch 20/20 Loss after 278 batches: 0.4913


Epoch 20/20: 100%|████████████████████████| 556/556 [00:02<00:00, 262.15batch/s]


Epoch 20/20 Loss after 556 batches: 0.5415
Epoch 20/20 Validation Accuracy: 45.32 %, Validation Loss: 1.8432


In [123]:
with torch.no_grad():
    correct = 0
    total = 0
    predicted_labels = []
    true_labels = []
    for embeddings, labels, f in tqdm(test_loader):
        embeddings = embeddings.to(device)
        labels = labels.to(device)
        f = f.to(device)
        outputs = model(embeddings,f)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        predicted_labels.extend(predicted.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())
    #generate classification report
    test_report = classification_report(true_labels, predicted_labels)

100%|██████████████████████████████████████████| 69/69 [00:00<00:00, 559.19it/s]


In [124]:
print(test_report)

              precision    recall  f1-score   support

           0       0.45      0.51      0.48       317
           1       0.31      0.40      0.35       354
           2       0.37      0.24      0.29       370
           3       0.47      0.50      0.49       378
           4       0.39      0.39      0.39       369
           5       0.75      0.70      0.73       406

    accuracy                           0.46      2194
   macro avg       0.46      0.46      0.45      2194
weighted avg       0.46      0.46      0.46      2194

