# Training notebook

In [40]:
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
from datasets import load_dataset, DatasetDict
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_scheduler
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data.dataloader import default_collate
import torch.nn as nn
from tqdm import tqdm
import torch
import pandas as pd

Read from the .csv file, remove all non string and non space characters

In [41]:
# Load the dataset
df = pd.read_csv('./emotions.csv')

# Preprocess the text data (e.g., lowercasing, removing punctuation)
df['text'] = df['text'].str.lower().str.replace('[^\w\s]', '')


Initialize Word2Vec, tokenize sentences

In [42]:
from gensim.models import Word2Vec

# Tokenize the text data
sentences = [text.split() for text in df['text']]

# Train the Word2Vec model
word2vec_model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)

Convert sentences to vectors

In [43]:
import numpy as np

def sentence_to_vector(sentence, model):
    words = sentence.split()
    word_vectors = [model.wv[word] for word in words if word in model.wv]
    if len(word_vectors) == 0:
        return np.zeros(model.vector_size)
    return np.mean(word_vectors, axis=0)

# Apply the function to the dataset
df['vector'] = df['text'].apply(lambda x: sentence_to_vector(x, word2vec_model))

Convert vectors to tensors

In [44]:
from sklearn.model_selection import train_test_split

# Split the dataset
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
# Convert vectors and labels to tensors
X_train = torch.tensor(np.stack(train_df['vector'].values))
y_train = torch.tensor(train_df['label'].values)
X_val = torch.tensor(np.stack(val_df['vector'].values))
y_val = torch.tensor(val_df['label'].values)
# Create a TensorDataset and DataLoader
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=16)

Create ML model, move data to computing device (GPU or CPU, train model)

In [45]:
def save_checkpoint(state, filename="best_model.pth.tar"):
    torch.save(state, filename)

In [46]:
from SimpleNN import SimpleNN

# Initialize the model, loss function, and optimizer
input_size = word2vec_model.vector_size
num_classes = len(df['label'].unique())
model = SimpleNN(input_size, 128, 3, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)
model.to(device)
best_loss = float('inf')

for epoch in range(num_epochs):
    model.train()
    for batch in train_dataloader:
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Validation phase
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_dataloader:
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    val_loss /= len(val_dataloader)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {val_loss}")
    
    if val_loss < best_loss:
        best_loss = val_loss
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'best_loss': best_loss,
        }, filename="best_model.pth.tar")

cuda
Epoch 1/50, Loss: 0.8998445027724632
Epoch 2/50, Loss: 0.8438431063555775
Epoch 3/50, Loss: 0.8337533130335318
Epoch 4/50, Loss: 0.8145224944835111
Epoch 5/50, Loss: 0.7945458711629133
Epoch 6/50, Loss: 0.7844093910358936
Epoch 7/50, Loss: 0.7830276697147741
Epoch 8/50, Loss: 0.7674169833452865
Epoch 9/50, Loss: 0.7697741696677174
Epoch 10/50, Loss: 0.7644101192227848
Epoch 11/50, Loss: 0.7631123577037441
Epoch 12/50, Loss: 0.7525854059188523
Epoch 13/50, Loss: 0.7982363381539762
Epoch 14/50, Loss: 0.7528124732932072
Epoch 15/50, Loss: 0.7934724068491822
Epoch 16/50, Loss: 0.7475995855948171
Epoch 17/50, Loss: 0.7355874576385134
Epoch 18/50, Loss: 0.748467768612349
Epoch 19/50, Loss: 0.7284003380529483
Epoch 20/50, Loss: 0.7377614253365908
Epoch 21/50, Loss: 0.7279405592327909
Epoch 22/50, Loss: 0.7325739142156292
Epoch 23/50, Loss: 0.754358298300187
Epoch 24/50, Loss: 0.7259057931354649
Epoch 25/50, Loss: 0.7225862524058078
Epoch 26/50, Loss: 0.7274578026991114
Epoch 27/50, Loss:

Save pretrained model to storage

In [47]:
def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    model = SimpleNN(input_size, 128, 3, num_classes)
    model.load_state_dict(checkpoint['state_dict'])
    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    optimizer.load_state_dict(checkpoint['optimizer'])
    
    epoch = checkpoint['epoch']
    best_loss = checkpoint['best_loss']
    
    return model, optimizer, epoch, best_loss

In [48]:
model, optimizer, start_epoch, best_loss = load_checkpoint('best_model.pth.tar')
print(best_loss)
torch.save(model, "pretrained_emotion_text_detector.pt")

0.7097153176537998


  checkpoint = torch.load(filepath)


Draw model graphic

In [49]:
from torchviz import make_dot
model.to(device)

# Create a dummy input tensor and move it to the same device as the model
dummy_input = torch.randn(1, input_size).to(device)

# Forward pass to get the computational graph
output = model(dummy_input)

# Visualize the model
dot = make_dot(output, params=dict(model.named_parameters()))
dot.format = 'png'
dot.render('model_visualization')

'model_visualization.png'

In [50]:
sentence = "I am mad at you"

vec = sentence_to_vector(sentence=sentence, model=word2vec_model)
model.to(device)
model.eval()
input = torch.tensor(vec).unsqueeze(0).to(device)

with torch.no_grad():
    pred = model(input)

print(pred)

tensor([[ 0.3545, -0.3080, -2.0409,  2.5661, -0.0327, -3.7139]],
       device='cuda:0')


In [51]:
import torch.nn.functional as F
# Assuming pred is the raw output from your model
# Apply softmax to get probabilities
probabilities = F.softmax(pred, dim=1)

# Get the index of the highest probability
predicted_class_idx = torch.argmax(probabilities, dim=1).item()

# Define your label mapping
label_mapping = {0: 'Sadness', 1: 'Joy', 2: 'Love', 3: 'Anger', 4: 'Fear', 5: 'Surprise'}

# Map the predicted index to the corresponding label
predicted_label = label_mapping[predicted_class_idx]

print(f'Predicted label: {predicted_label}')

Predicted label: Anger


In [52]:
word2vec_model.save('word2vec.model')