In [1]:
!pip install -U pip
!pip install -qU nltk transformers lightning scikit-learn pandas numpy matplotlib jupyter ipywidgets



In [2]:
import torch
device = "cuda" if torch.cuda.is_available else "cpu"
print(device)

cuda


In [3]:
import pandas as pd
import json
df = pd.read_csv('text_emotion.csv')

In [9]:
%%time
##### VERY SLOW
file_name = "data/data.json"

##### Open the file in write mode and store the data
with open(file_name, "r") as json_file:
    trainData_padded = json.load(json_file)

CPU times: user 5min 57s, sys: 1min 38s, total: 7min 36s
Wall time: 7min 36s


In [10]:
##%%
import pandas as pd
from sklearn.preprocessing import OneHotEncoder


# Extract the 'sentiment' column as the target variable
sentiments = df['sentiment'].values

sentiments = [[label] for label in sentiments]

# Initialize the OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)

# Fit and transform the encoder on the target variable
y = encoder.fit_transform(sentiments)

# y_train is now an array of one-hot encoded vectors
y
##%%

array([[0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [11]:
import torch.nn.functional as F
import torch.optim as optim
import torch
import torch.nn as nn
import lightning as L

class DualCnnBiLsmtModel(L.LightningModule):
    def __init__(self, embedding_dim, hidden_dim, tagset_size,senLen,lr):
        super(DualCnnBiLsmtModel, self).__init__()
        torch.manual_seed(seed=42)
        self.hidden_dim = hidden_dim
        self.tagset_size = tagset_size
        self.cnn = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(senLen, 1))
        self.fc1 = nn.Linear(1, hidden_dim)  # Adjust the hidden_dim as needed
        self.fc2 = nn.Linear(hidden_dim, self.tagset_size)
        self.lstm = nn.LSTM(embedding_dim, 128//2, num_layers=1, bidirectional=True)
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        self.dropout = nn.Dropout(p=0.35)
        self.relu = nn.ReLU()
        self.learning_rate =lr

    def forward(self, x):
        lstm_output1, _ = self.lstm(x)

        cnn_output1 = F.relu(self.cnn(lstm_output1))

        cnn_output1 = self.dropout(cnn_output1)
        # output1 = F.max_pool1d(lstm_output1, lstm_output1.size(2)).squeeze(2)

        cnn_output2 = F.relu(self.cnn(x))
        # cnn_output1 = F.max_pool1d(cnn_output1, cnn_output1.size(2)).squeeze(2)
        cnn_output2 = self.dropout(cnn_output2)
        lstm_output2, _ = self.lstm(cnn_output2)

        combined_output = torch.cat((lstm_output2, cnn_output1),dim=2)

        output = F.max_pool1d(combined_output, combined_output.size(2))

        output = output.view(combined_output.size(0), -1)

        # Apply the first dense (fully connected) layer
        output = self.fc1(output)
        # Apply the second dense layer
        output = self.fc2(output)

        output = F.softmax(output, dim=1)



        # print(output.size())

        # output2 = self.relu(lstm_output)  # Apply ReLU activation
        # output2 = self.hidden2tag(lstm_output)
        return output

    def loss(self, x, y):
      logits = self.forward(x)

      criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for softmax loss
      loss = criterion(logits, y)

      return loss

    # def decode(self, x):
    #     logits = self.forward(x)
    #     predicted_tags = self.crf.decode(logits)
    #     return predicted_tags

    def training_step(self, batch, batch_idx):
        x, y = batch
        loss = self.loss(x, y)
        print(loss)
        return loss
    #0.0031622776601683794
    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr= self.learning_rate)
        return optimizer

In [12]:
suggested_lr = 0.015848931924611148

In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import KFold
from torch.utils.data.dataset import random_split

# Define the dimensions and size of your dataset
embedding_dim = 768  # Change this to match your word embedding dimension
hidden_dim = 128  # Change this to match your model architecture
tagset_size = len(y[0].tolist())  # The number of classes: happy, sad, anger
batch_size = 32

trainData= torch.tensor(trainData_padded)
y = torch.tensor(y.tolist()).to(float)

# Create a PyTorch dataset
dataset = TensorDataset(trainData, y)

# Define the number of folds for cross-validation
num_folds = 10

# Initialize a PyTorch Lightning trainer with your desired settings
trainer = L.Trainer(max_epochs=10)  # Adjust max_epochs as needed

# Initialize KFold cross-validation
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
# model = BiLSTMCRFModel(embedding_dim, hidden_dim, tagset_size, len(trainData[0]))

# Lists to store performance metrics for each fold
fold_metrics = []
accuracy = 0
# Perform 10-fold cross-validation
for fold, (train_indices, test_indices) in enumerate(kf.split(dataset)):
    # Create data loaders for the current fold


    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    test_sampler = torch.utils.data.SubsetRandomSampler(test_indices)


    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, drop_last=True)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler, drop_last=True)

    # Create a new model for each fold
    model = DualCnnBiLsmtModel(embedding_dim, hidden_dim, tagset_size, len(trainData[0]),suggested_lr)

    # Train the model
    trainer.fit(model, train_loader, test_loader)

    correct_predictions = 0
    total_predictions = 0


    for batch in test_loader:  # Assuming you have a DataLoader for the test set
      x_test, y_test = batch

      # Forward pass to get model predictions
      with torch.no_grad():  # Disable gradient tracking
          predictions = model(x_test)



      # Convert predictions to class labels by selecting the class with the highest probability
      predicted_labels = torch.argmax(predictions, dim=1)
      true_labels = torch.argmax(y_test, dim=1)

      # Compare predicted labels with true labels
      correct_predictions += (predicted_labels == true_labels).sum().item()
      total_predictions += len(y_test)

    # accuracy = correct_predictions / total_predictions
    print(correct_predictions)
    print(total_predictions)
    accuracy += correct_predictions/total_predictions

print("Model 10-Fold accuracy = ",accuracy/10)

/home/UG/dhruv015/.conda/envs/sc4001_env/lib/python3.11/site-packages/lightning/fabric/plugins/environments/slurm.py:191: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/UG/dhruv015/.conda/envs/sc4001_env/lib/python3 ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/UG/dhruv015/.conda/envs/sc4001_env/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[

Training: |          | 0/? [00:00<?, ?it/s]

  output = F.softmax(output)


tensor(2.5691, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.5661, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.6110, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.4452, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.5953, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.4078, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.5013, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.5328, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.4703, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.3453, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.3766, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.5328, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(2.5641, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward1>)