Malayalam English Dataset

### Imports

In [1]:
!pip install torchtext

You should consider upgrading via the '/home/ckm/python3.7/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [2]:
import os
import math
import numpy as np
import pandas as pd
from torchtext.data.utils import get_tokenizer
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm
from time import sleep


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle

import matplotlib.pyplot as plt

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [4]:
!nvidia-smi

Mon Dec 13 21:42:00 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.82.00    Driver Version: 470.82.00    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-PCIE...  Off  | 00000000:17:00.0 Off |                    0 |
| N/A   52C    P0    31W / 250W |      8MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Load embedding

In [5]:
datapath = "/home/ckm/ck_project/data/MalayalmEnglishSentiments/"
df_embedding = pd.read_csv("./spaneng.txt", sep=" ", quoting=3, header=None, index_col=0,skiprows=1)

In [6]:
df_embedding.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,91,92,93,94,95,96,97,98,99,100
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
.,-0.371781,-0.275541,0.168219,0.400184,1.009334,-0.538346,0.762423,0.771619,-0.43137,-0.854382,...,0.467028,0.176614,-0.284068,0.709805,-0.282179,0.593814,-0.879223,-0.541967,-0.160379,-0.035693
de,-0.413273,-1.874861,0.852767,1.103241,2.159938,-1.389717,1.762775,-0.631351,-0.821627,-1.614193,...,0.939013,0.627162,0.044463,1.053295,-0.221535,0.943178,-1.053505,0.416619,-0.333015,0.415753
:,0.082906,-0.318736,-0.021506,0.095056,1.907815,-0.799135,1.181208,0.796357,-0.880041,-0.798561,...,0.692552,0.178389,0.010945,0.772131,-0.4758,1.436996,-0.402479,-0.825933,0.657984,-0.833658
",",-0.281618,-0.183099,-0.146602,0.670847,1.100933,-0.489237,1.128854,0.176842,-0.082408,-0.795335,...,0.336128,0.338131,0.009082,0.544464,-0.387602,-0.021935,-0.826885,-0.5524,-0.237138,-0.088589
!,-1.342119,-0.25364,0.183346,0.071576,1.888788,-0.654009,0.385222,0.421549,-0.134963,-1.107106,...,1.085194,-0.592844,-0.545974,1.290254,0.088172,1.115099,-0.945343,0.143484,-0.096425,-0.202514


### Create vocab dictionary

In [7]:
m = df_embedding.to_numpy()
m.shape

(4190, 100)

In [8]:
embedding_matrix = df_embedding.to_numpy()

vocab = []

for word in list(df_embedding.index):
  vocab.append(str(word))

vocab_size , vocab_dim = embedding_matrix.shape
vocab_size, vocab_dim

(4190, 100)

In [9]:
len(vocab)

4190

In [10]:
word2idx = {w: idx for (idx, w) in enumerate(vocab)}
idx2word = {idx: w for (idx, w) in enumerate(vocab)}

In [11]:
len(word2idx) , len(idx2word)

(4190, 4190)

In [12]:
import itertools
dict(itertools.islice(word2idx.items(), 10))

{'.': 0,
 'de': 1,
 ':': 2,
 ',': 3,
 '!': 4,
 'a': 5,
 'que': 6,
 'y': 7,
 'me': 8,
 'en': 9}

### Read data

In [13]:
df = pd.read_csv('/home/ckm/ck_project/data/SpanishEnglishSentiments/SP_ENG_Sentiments.csv')

df= df.sample(frac = 1)

train_df = df[:8000]
val_df = df[8000:10000]
test_df = df[10000:]

train_df.head()

train_data =  train_df['Sentence'].values
train_labels = train_df['Label'].values

val_data =  val_df['Sentence'].values
val_labels = val_df['Label'].values

test_data =  test_df['Sentence'].values
test_labels = test_df['Label'].values

print(len(train_data), len(train_labels))
print(len(val_data), len(val_labels))
print(len(test_data), len(test_labels))


8000 8000
2000 2000
2002 2002


In [15]:
df.head()

Unnamed: 0,Sentence,Label
8340,Buenos dias !! ☀️☀️ Ultimamente me siento muy ...,0
10069,El que me pida matrimonio con un cabron dance ...,2
1742,Yo me aguanto lol,0
7784,Yo los otros dias me fui a jangiar con el saur...,1
6550,X men esta muy cabrona .,0


In [14]:

# post = pd.read_csv("positive_add1and2.csv",sep=',')
# neg = pd.read_csv("negative_add_1and2.csv",sep=',')

# data = []
# labels = []

# for index, row in post.iterrows():
#   if index >= 30000:
#     break
  
#   comment = str(row['tweet'])
#   data.append(comment)
#   labels.append(1)


# for index, row in neg.iterrows():
#   if index >= 30000:
#     break
  
#   comment = str(row['tweet'])
#   data.append(comment)
#   labels.append(0)
  

# len(data), len(labels)

# data, labels  = shuffle(data, labels, random_state = 40)

# data[0], labels[0]

### Tokenizer

In [16]:
tokenizer = get_tokenizer('basic_english')


def tokenized_tensor(data):

  output_tokenized = []

  for sentence in data:
    output = []
    tokenized = tokenizer(sentence)
    
    for word in tokenized:
      if word in word2idx:
        id = word2idx[word]
        output.append(id)
      else:
        word2idx[word] = len(word2idx)
        id = word2idx[word]
        output.append(id)

    output = torch.tensor(output)


    output_tokenized.append(output)

  return output_tokenized

In [17]:
# tokenized_sequences = tokenized_tensor(data)

train_tokenized_sequences = tokenized_tensor(train_data)

test_tokenized_seuqences = tokenized_tensor(test_data)

val_tokenized_seuquences = tokenized_tensor(val_data)


In [18]:
print(train_tokenized_sequences[1])

tensor([  10,    6,    8, 4193, 4194,   20,   18,  337, 1157,  269,    7,   11,
        1677,    1, 4195, 4196,    3,   10,   13,  172,    0])


In [19]:
word2idx['<PAD>'] = len(word2idx)
word2idx['<PAD>']

28442

In [52]:
len(word2idx)

13511

In [20]:
## Create embedding matrix

random_init = torch.nn.Parameter(torch.Tensor( (len(word2idx) - vocab_size), vocab_dim))
torch.nn.init.kaiming_uniform_(random_init, a=math.sqrt(5))


new_matrix = np.zeros( (len(word2idx), vocab_dim) )

new_matrix[:vocab_size, :] = embedding_matrix

embedding_matrix = new_matrix

embedding_matrix[vocab_size:, :] = random_init.detach().numpy()

In [21]:
embedding_matrix.shape

(28443, 100)

In [22]:
# padded_sequences = pad_sequence(tokenized_sequences, batch_first= True, padding_value=107512)

train_padded_sequences = pad_sequence(train_tokenized_sequences, batch_first= True, padding_value=word2idx['<PAD>'])

val_padded_sequences = pad_sequence(val_tokenized_seuquences, batch_first= True, padding_value=word2idx['<PAD>'])

test_padded_sequences = pad_sequence(test_tokenized_seuqences, batch_first= True, padding_value=word2idx['<PAD>'])

In [23]:
(train_padded_sequences[1])

tensor([   10,     6,     8,  4193,  4194,    20,    18,   337,  1157,   269,
            7,    11,  1677,     1,  4195,  4196,     3,    10,    13,   172,
            0, 28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442,
        28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442,
        28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442])

### Dataset and Data loader 

In [24]:
class Dataset(torch.utils.data.Dataset):
    """
    This is our custom dataset class which will load the text and their corresponding labels into Pytorch tensors
    """
    def __init__(self, sequences, labels):
        self.labels = labels
        self.sequences = sequences

    def __getitem__(self, idx):
        sample = {}
        sequence = self.sequences[idx]
        label = torch.tensor(self.labels[idx])

        try:
            sample["label"] = label
            sample["token"] = sequence
        except Exception as e:
            print(e)
        
        return sample
    
    def __len__(self):
        return len(self.labels)
        

In [25]:
train_dataset = Dataset(train_padded_sequences, train_labels)

val_dataset = Dataset(val_padded_sequences, val_labels)

test_dataset = Dataset(test_padded_sequences, test_labels)

### Hyper parameters

In [27]:
## Hyper parameter

vocab_size = len(word2idx)
embed_dim = vocab_dim
seq_len = 50
hidden_size = 512
num_layer = 3
num_class = 7
batch_size = 2

LEARNING_RATE = 1e-3
EPOCHS = 30
CLIP = 0.3

In [60]:
# # Create datasets
# dataset = Dataset(padded_sequences, labels)

# split = 0.85
# train_size = int(split*len(dataset))
# val_size = len(dataset) - train_size

# train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

In [28]:

## We call the dataloader class
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    pin_memory=True,
    num_workers=2,
    shuffle=True,
    drop_last=True
 )

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=batch_size,
    pin_memory=True,
    num_workers=2,
    shuffle=True,
    drop_last=True
 )

## For testing
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
    pin_memory=True,
    num_workers=2,
    shuffle=True,
    drop_last=True
 )


dataloaders = {'Train': train_loader, 'Val': val_loader}



In [29]:
len(train_loader), len(val_loader), len(test_loader)

(250, 62, 62)

In [30]:
max_len = -1
for comment in val_dataset:
  sentence = comment['token']
  max_len = max(max_len,sentence.shape[0])

print(max_len)

40


In [31]:
train_dataset[0]

{'label': tensor(0),
 'token': tensor([ 1288,   402,     4,     4,  4190,  4191,     8,   597,   202,  3946,
            32,    91,   380,     9,   399,    34,  4186,     7,   116,    19,
             1,  4192,   239, 28442, 28442, 28442, 28442, 28442, 28442, 28442,
         28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442,
         28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442, 28442])}

### Model

In [33]:
class SelfMatchingLayer(nn.Module):

    def __init__(self,  seq_length, embed_dim, **kwargs):

      super(SelfMatchingLayer, self).__init__()

      self.seq_length = seq_length
      self.embed_dim  = embed_dim

      self.P = torch.nn.Parameter(torch.Tensor(self.embed_dim, self.embed_dim))

      self.reset_parameters()

    def reset_parameters(self):
        torch.nn.init.kaiming_uniform_(self.P, a=math.sqrt(5))

      
    def forward(self, x):  
      
      # input shape: [batch, seq_len, embed_dim]


      #---------------------------------------------#
      # calculate weight vector a = {e_i . P.Q . e_j}
      #---------------------------------------------#

      out = torch.matmul(x,  self.P)   #out shape: [batch, seq_len, embed_dim]

      out = torch.matmul(out, torch.transpose(x, 1, 2))   #out shape: [batch, seq_len, seq_len]

      out = F.gelu(out)         # apply non linear activation

      #------------------------------------#
      # take row wise mean and apply softmax
      #------------------------------------#
      out = torch.mean(out, 2)  #out shape: [batch, seq_len, seq_len]

      out = torch.softmax(out, 0)     #out shape: [batch, seq_len, seq_len]

      out = out.unsqueeze(1)          #out shape: [batch, 1, seq_len]

      #-------------------------------------------#
      # calculate weighted embedding of every word
      #-------------------------------------------#
      out = torch.matmul(out, x)

      out = out.squeeze(1)

      return out      #out shape: [batch, seq_len]


In [34]:
word2idx['<PAD>']

28442

In [35]:
class SelfNet(nn.Module):

    def __init__(self, vocab_size, embed_dim, hidden_size, num_layer, seq_len, num_class):
        super(SelfNet, self).__init__()


        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx = word2idx['<PAD>'])
        self.embedding.load_state_dict({'weight': torch.from_numpy(embedding_matrix)})
        self.embedding.weight.requires_grad = True

        self.selfnet_layer = SelfMatchingLayer(seq_len, embed_dim)

        self.lstm = nn.LSTM(input_size = embed_dim, hidden_size = hidden_size, num_layers = num_layer, dropout = 0.3, bidirectional = True, batch_first = True )


        self.fc1 = nn.Linear(2* hidden_size + embed_dim , hidden_size//4)
        self.fc2 = nn.Linear(hidden_size//4, num_class)

        self.dropout = nn.Dropout(0.3)



    def forward(self, input):

        embedded = self.embedding(input)  #out shape = [batch, seq_len, embed_dim] 

        selfmatch_output = self.selfnet_layer(embedded)  #out shape = [batch, seq_len] 

        lstm_out, _ = self.lstm(embedded)     

        lstm_out = lstm_out[:, -1, :]      #out shape = [batch, 2 * hidden_size]      

        concat = torch.cat( (selfmatch_output, lstm_out), 1)     #out shape = [batch, 2 * hidden_size + seq_len ]      

        linear_out = self.dropout(F.relu(self.fc1(concat)))     #out shape = [batch, hidden_size]      

        final_out = self.fc2(linear_out)     #out shape = [batch, 2]      

        return final_out

In [36]:

### Test
model = SelfNet( vocab_size, embed_dim, hidden_size, num_layer, seq_len, num_class)
model = model.to(device)

# for batch in train_loader:
#   x = batch['token'].to(device)
#   out = model(x)
#   print(out.shape)
#   break



In [37]:
model

SelfNet(
  (embedding): Embedding(28443, 100, padding_idx=28442)
  (selfnet_layer): SelfMatchingLayer()
  (lstm): LSTM(100, 512, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  (fc1): Linear(in_features=1124, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=7, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

### Optimizer and loss

In [38]:
#optimizer
optimizer = Adam(model.parameters(), lr = LEARNING_RATE, eps=1e-8)
#Loss function
criterion = nn.CrossEntropyLoss()

In [40]:
#to calculate accuracy

def get_accuracy(preds, labels):
  total_acc = 0.0
  
  for i in range(len(labels)):
    if labels[i] == preds[i]:
      total_acc+=1.0
  
  return total_acc / len(labels)

### Training

In [41]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
#scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.2, patience=5, threshold=0.0008, verbose = True)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5, verbose = True)


Adjusting learning rate of group 0 to 1.0000e-03.


In [42]:
PATH = '/home/ckm/ck_project/codemix/code/selfnet_mihir/models/selfnet_mean+gelu_2_mihir_spaneng_1.pt'

In [43]:
best_valid_f1 = 0.0000

for epoch in range(0, EPOCHS):
  

    print('-'*50)
    print('Epoch {}/{}'.format(epoch+1, EPOCHS))

    for phase in ['Train', 'Val']:

        loss = 0.0   #epoch loss
        accuracy = 0.0   #epoch accuracy

        y_true = []
        y_pred = []

        if phase == 'Train':
            model.train()
        else:
            model.eval()
        
        with tqdm(dataloaders[phase], unit="batch") as tepoch:

          for batch in tepoch:
            labels = batch["label"].to(device)
            text = batch["token"].to(device)
            

            output = model(text)

            loss = criterion(output, labels)

            if phase == 'Train':

                #zero gradients
                optimizer.zero_grad() 

                # Backward pass  (calculates the gradients)
                loss.backward()   

                # gradient clipping
                nn.utils.clip_grad_norm_(model.parameters(), CLIP)    

                optimizer.step()             # Updates the weights    

            sleep(0.1)
            _, preds = output.data.max(1)
            y_pred.extend(preds.tolist())
            y_true.extend(labels.tolist())
            
            batch_acc = get_accuracy(preds.tolist(), labels.tolist())
            
            
            loss += loss.item()
            accuracy+= batch_acc

              
          epoch_loss = loss / (len(dataloaders[phase]))
          epoch_acc = accuracy / (len(dataloaders[phase]))

          print(phase + ":")
          
          
          #print(confusion_matrix(y_true, y_pred))
          pre = precision_score(y_true, y_pred, average='weighted')
          recall = recall_score(y_true, y_pred, average='weighted')
          f1 = f1_score(y_true, y_pred, average='weighted')
          

          print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))
          # save best model
          print()
          
            
          if phase == 'Val':
                
                if f1 > best_valid_f1:
                    best_valid_f1 = f1
                    
                    torch.save(model.state_dict(), PATH)
                    print('Model Saved!')
                    
                scheduler.step()
                    


--------------------------------------------------
Epoch 1/30


100%|██████████| 250/250 [00:34<00:00,  7.19batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.3671, Precision: 0.3876, Recall : 0.4826, Accuracy: 0.4826, Loss: 0.0078.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3361, Precision: 0.2525, Recall : 0.5025, Accuracy: 0.5025, Loss: 0.0299.

Model Saved!
Adjusting learning rate of group 0 to 5.0000e-04.
--------------------------------------------------
Epoch 2/30


100%|██████████| 250/250 [00:34<00:00,  7.30batch/s]


Train:
F1: 0.3511, Precision: 0.4354, Recall : 0.4979, Accuracy: 0.4979, Loss: 0.0096.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3371, Precision: 0.2865, Recall : 0.5000, Accuracy: 0.5000, Loss: 0.0329.

Model Saved!
Adjusting learning rate of group 0 to 2.5000e-04.
--------------------------------------------------
Epoch 3/30


100%|██████████| 250/250 [00:34<00:00,  7.31batch/s]


Train:
F1: 0.3611, Precision: 0.4202, Recall : 0.5021, Accuracy: 0.5021, Loss: 0.0077.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3592, Precision: 0.2946, Recall : 0.4919, Accuracy: 0.4919, Loss: 0.0346.

Model Saved!
Adjusting learning rate of group 0 to 1.2500e-04.
--------------------------------------------------
Epoch 4/30


100%|██████████| 250/250 [00:34<00:00,  7.32batch/s]


Train:
F1: 0.3707, Precision: 0.4537, Recall : 0.5041, Accuracy: 0.5041, Loss: 0.0076.



100%|██████████| 62/62 [00:07<00:00,  8.65batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3605, Precision: 0.3018, Recall : 0.5015, Accuracy: 0.5015, Loss: 0.0339.

Model Saved!
Adjusting learning rate of group 0 to 6.2500e-05.
--------------------------------------------------
Epoch 5/30


100%|██████████| 250/250 [00:34<00:00,  7.30batch/s]


Train:
F1: 0.3766, Precision: 0.5124, Recall : 0.5112, Accuracy: 0.5112, Loss: 0.0077.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3593, Precision: 0.2999, Recall : 0.4990, Accuracy: 0.4990, Loss: 0.0307.

Adjusting learning rate of group 0 to 3.1250e-05.
--------------------------------------------------
Epoch 6/30


100%|██████████| 250/250 [00:34<00:00,  7.30batch/s]


Train:
F1: 0.3764, Precision: 0.3925, Recall : 0.5096, Accuracy: 0.5096, Loss: 0.0075.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3624, Precision: 0.3024, Recall : 0.5020, Accuracy: 0.5020, Loss: 0.0320.

Model Saved!
Adjusting learning rate of group 0 to 1.5625e-05.
--------------------------------------------------
Epoch 7/30


100%|██████████| 250/250 [00:34<00:00,  7.32batch/s]


Train:
F1: 0.3853, Precision: 0.4582, Recall : 0.5125, Accuracy: 0.5125, Loss: 0.0074.



100%|██████████| 62/62 [00:07<00:00,  8.61batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3554, Precision: 0.2941, Recall : 0.4965, Accuracy: 0.4965, Loss: 0.0314.

Adjusting learning rate of group 0 to 7.8125e-06.
--------------------------------------------------
Epoch 8/30


100%|██████████| 250/250 [00:34<00:00,  7.31batch/s]


Train:
F1: 0.3817, Precision: 0.4454, Recall : 0.5121, Accuracy: 0.5121, Loss: 0.0077.



100%|██████████| 62/62 [00:07<00:00,  8.60batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3550, Precision: 0.2932, Recall : 0.4955, Accuracy: 0.4955, Loss: 0.0311.

Adjusting learning rate of group 0 to 3.9063e-06.
--------------------------------------------------
Epoch 9/30


100%|██████████| 250/250 [00:34<00:00,  7.32batch/s]


Train:
F1: 0.3817, Precision: 0.4483, Recall : 0.5110, Accuracy: 0.5110, Loss: 0.0078.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3573, Precision: 0.2960, Recall : 0.4975, Accuracy: 0.4975, Loss: 0.0333.

Adjusting learning rate of group 0 to 1.9531e-06.
--------------------------------------------------
Epoch 10/30


100%|██████████| 250/250 [00:34<00:00,  7.30batch/s]


Train:
F1: 0.3796, Precision: 0.4390, Recall : 0.5110, Accuracy: 0.5110, Loss: 0.0078.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3572, Precision: 0.2953, Recall : 0.4965, Accuracy: 0.4965, Loss: 0.0329.

Adjusting learning rate of group 0 to 9.7656e-07.
--------------------------------------------------
Epoch 11/30


100%|██████████| 250/250 [00:34<00:00,  7.30batch/s]


Train:
F1: 0.3803, Precision: 0.4409, Recall : 0.5106, Accuracy: 0.5106, Loss: 0.0071.



100%|██████████| 62/62 [00:07<00:00,  8.61batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3581, Precision: 0.2969, Recall : 0.4980, Accuracy: 0.4980, Loss: 0.0317.

Adjusting learning rate of group 0 to 4.8828e-07.
--------------------------------------------------
Epoch 12/30


100%|██████████| 250/250 [00:34<00:00,  7.28batch/s]


Train:
F1: 0.3831, Precision: 0.4504, Recall : 0.5124, Accuracy: 0.5124, Loss: 0.0086.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3570, Precision: 0.2949, Recall : 0.4970, Accuracy: 0.4970, Loss: 0.0324.

Adjusting learning rate of group 0 to 2.4414e-07.
--------------------------------------------------
Epoch 13/30


100%|██████████| 250/250 [00:34<00:00,  7.30batch/s]


Train:
F1: 0.3858, Precision: 0.4783, Recall : 0.5146, Accuracy: 0.5146, Loss: 0.0067.



100%|██████████| 62/62 [00:07<00:00,  8.58batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3567, Precision: 0.2949, Recall : 0.4970, Accuracy: 0.4970, Loss: 0.0331.

Adjusting learning rate of group 0 to 1.2207e-07.
--------------------------------------------------
Epoch 14/30


100%|██████████| 250/250 [00:34<00:00,  7.30batch/s]


Train:
F1: 0.3843, Precision: 0.4473, Recall : 0.5138, Accuracy: 0.5138, Loss: 0.0070.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3570, Precision: 0.2963, Recall : 0.4990, Accuracy: 0.4990, Loss: 0.0281.

Adjusting learning rate of group 0 to 6.1035e-08.
--------------------------------------------------
Epoch 15/30


100%|██████████| 250/250 [00:34<00:00,  7.29batch/s]


Train:
F1: 0.3825, Precision: 0.4261, Recall : 0.5140, Accuracy: 0.5140, Loss: 0.0083.



100%|██████████| 62/62 [00:07<00:00,  8.58batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3595, Precision: 0.3000, Recall : 0.5000, Accuracy: 0.5000, Loss: 0.0353.

Adjusting learning rate of group 0 to 3.0518e-08.
--------------------------------------------------
Epoch 16/30


100%|██████████| 250/250 [00:34<00:00,  7.30batch/s]


Train:
F1: 0.3809, Precision: 0.4555, Recall : 0.5120, Accuracy: 0.5120, Loss: 0.0082.



100%|██████████| 62/62 [00:07<00:00,  8.58batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3607, Precision: 0.3007, Recall : 0.5010, Accuracy: 0.5010, Loss: 0.0348.

Adjusting learning rate of group 0 to 1.5259e-08.
--------------------------------------------------
Epoch 17/30


100%|██████████| 250/250 [00:34<00:00,  7.27batch/s]


Train:
F1: 0.3803, Precision: 0.4107, Recall : 0.5118, Accuracy: 0.5118, Loss: 0.0078.



100%|██████████| 62/62 [00:07<00:00,  8.58batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3552, Precision: 0.2939, Recall : 0.4955, Accuracy: 0.4955, Loss: 0.0307.

Adjusting learning rate of group 0 to 7.6294e-09.
--------------------------------------------------
Epoch 18/30


100%|██████████| 250/250 [00:34<00:00,  7.30batch/s]


Train:
F1: 0.3810, Precision: 0.4536, Recall : 0.5121, Accuracy: 0.5121, Loss: 0.0069.



100%|██████████| 62/62 [00:07<00:00,  8.60batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3564, Precision: 0.2957, Recall : 0.4980, Accuracy: 0.4980, Loss: 0.0299.

Adjusting learning rate of group 0 to 3.8147e-09.
--------------------------------------------------
Epoch 19/30


100%|██████████| 250/250 [00:34<00:00,  7.31batch/s]


Train:
F1: 0.3771, Precision: 0.4388, Recall : 0.5089, Accuracy: 0.5089, Loss: 0.0081.



100%|██████████| 62/62 [00:07<00:00,  8.60batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3562, Precision: 0.2959, Recall : 0.4970, Accuracy: 0.4970, Loss: 0.0324.

Adjusting learning rate of group 0 to 1.9073e-09.
--------------------------------------------------
Epoch 20/30


100%|██████████| 250/250 [00:34<00:00,  7.27batch/s]


Train:
F1: 0.3840, Precision: 0.4457, Recall : 0.5145, Accuracy: 0.5145, Loss: 0.0072.



100%|██████████| 62/62 [00:07<00:00,  8.58batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3575, Precision: 0.2972, Recall : 0.4975, Accuracy: 0.4975, Loss: 0.0282.

Adjusting learning rate of group 0 to 9.5367e-10.
--------------------------------------------------
Epoch 21/30


100%|██████████| 250/250 [00:34<00:00,  7.31batch/s]


Train:
F1: 0.3836, Precision: 0.4793, Recall : 0.5121, Accuracy: 0.5121, Loss: 0.0087.



100%|██████████| 62/62 [00:07<00:00,  8.60batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3623, Precision: 0.3013, Recall : 0.5015, Accuracy: 0.5015, Loss: 0.0272.

Adjusting learning rate of group 0 to 4.7684e-10.
--------------------------------------------------
Epoch 22/30


100%|██████████| 250/250 [00:34<00:00,  7.28batch/s]


Train:
F1: 0.3822, Precision: 0.4476, Recall : 0.5130, Accuracy: 0.5130, Loss: 0.0069.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3605, Precision: 0.2988, Recall : 0.5010, Accuracy: 0.5010, Loss: 0.0305.

Adjusting learning rate of group 0 to 2.3842e-10.
--------------------------------------------------
Epoch 23/30


100%|██████████| 250/250 [00:34<00:00,  7.30batch/s]


Train:
F1: 0.3782, Precision: 0.4401, Recall : 0.5090, Accuracy: 0.5090, Loss: 0.0078.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3549, Precision: 0.2934, Recall : 0.4950, Accuracy: 0.4950, Loss: 0.0305.

Adjusting learning rate of group 0 to 1.1921e-10.
--------------------------------------------------
Epoch 24/30


100%|██████████| 250/250 [00:34<00:00,  7.28batch/s]


Train:
F1: 0.3796, Precision: 0.4354, Recall : 0.5110, Accuracy: 0.5110, Loss: 0.0075.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3579, Precision: 0.2960, Recall : 0.4980, Accuracy: 0.4980, Loss: 0.0335.

Adjusting learning rate of group 0 to 5.9605e-11.
--------------------------------------------------
Epoch 25/30


100%|██████████| 250/250 [00:34<00:00,  7.29batch/s]


Train:
F1: 0.3850, Precision: 0.4729, Recall : 0.5144, Accuracy: 0.5144, Loss: 0.0080.



100%|██████████| 62/62 [00:07<00:00,  8.58batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3639, Precision: 0.3059, Recall : 0.5030, Accuracy: 0.5030, Loss: 0.0323.

Model Saved!
Adjusting learning rate of group 0 to 2.9802e-11.
--------------------------------------------------
Epoch 26/30


100%|██████████| 250/250 [00:34<00:00,  7.27batch/s]


Train:
F1: 0.3837, Precision: 0.4725, Recall : 0.5138, Accuracy: 0.5138, Loss: 0.0078.



100%|██████████| 62/62 [00:07<00:00,  8.55batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3590, Precision: 0.2995, Recall : 0.5000, Accuracy: 0.5000, Loss: 0.0353.

Adjusting learning rate of group 0 to 1.4901e-11.
--------------------------------------------------
Epoch 27/30


100%|██████████| 250/250 [00:34<00:00,  7.27batch/s]


Train:
F1: 0.3801, Precision: 0.4413, Recall : 0.5096, Accuracy: 0.5096, Loss: 0.0071.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3603, Precision: 0.2980, Recall : 0.5000, Accuracy: 0.5000, Loss: 0.0285.

Adjusting learning rate of group 0 to 7.4506e-12.
--------------------------------------------------
Epoch 28/30


100%|██████████| 250/250 [00:34<00:00,  7.30batch/s]


Train:
F1: 0.3838, Precision: 0.4615, Recall : 0.5139, Accuracy: 0.5139, Loss: 0.0076.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3559, Precision: 0.2945, Recall : 0.4960, Accuracy: 0.4960, Loss: 0.0307.

Adjusting learning rate of group 0 to 3.7253e-12.
--------------------------------------------------
Epoch 29/30


100%|██████████| 250/250 [00:34<00:00,  7.28batch/s]


Train:
F1: 0.3832, Precision: 0.4802, Recall : 0.5131, Accuracy: 0.5131, Loss: 0.0063.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3553, Precision: 0.2954, Recall : 0.4970, Accuracy: 0.4970, Loss: 0.0292.

Adjusting learning rate of group 0 to 1.8626e-12.
--------------------------------------------------
Epoch 30/30


100%|██████████| 250/250 [00:34<00:00,  7.31batch/s]


Train:
F1: 0.3807, Precision: 0.4333, Recall : 0.5115, Accuracy: 0.5115, Loss: 0.0071.



100%|██████████| 62/62 [00:07<00:00,  8.59batch/s]

Val:
F1: 0.3585, Precision: 0.2967, Recall : 0.4980, Accuracy: 0.4980, Loss: 0.0288.

Adjusting learning rate of group 0 to 9.3132e-13.



  _warn_prf(average, modifier, msg_start, len(result))


### Testing

In [75]:
model = SelfNet( vocab_size, embed_dim, hidden_size, num_layer, seq_len, num_class)

In [76]:
model.load_state_dict(torch.load(PATH))

model.to(device)


SelfNet(
  (embedding): Embedding(13511, 100, padding_idx=13510)
  (selfnet_layer): SelfMatchingLayer()
  (lstm): LSTM(100, 512, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  (fc1): Linear(in_features=1124, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=7, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [77]:
loss = 0.0   #epoch loss
accuracy = 0.0   #epoch accuracy

y_true = []
y_pred = []

# set the model to evaluation mode            
model.eval()
        
with tqdm(test_loader, unit="batch") as tepoch:
  for batch in tepoch:
    labels = batch["label"].to(device)
    text = batch["token"].to(device)
    
    with torch.no_grad():
        output = model(text)
    
    
    _, preds = output.data.max(1)
    y_pred.extend(preds.tolist())
    y_true.extend(labels.tolist())
            
    batch_acc = get_accuracy(preds.tolist(), labels.tolist())

    loss = criterion(output, labels)
            
            
    loss += loss.item()
    accuracy+= batch_acc

    sleep(0.1)

              
epoch_loss = loss / (len(val_loader))
epoch_acc = accuracy / (len(val_loader))
print('')
print("Inference:")
print("")
print(confusion_matrix(y_true, y_pred))
pre = precision_score(y_true, y_pred, average='micro')
recall = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')
print("")

print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))

100%|██████████| 14/14 [00:17<00:00,  1.27s/batch]


Inference:

[[145   0  58   0   0]
 [ 13   0  50   0   0]
 [ 31   0  86   0   0]
 [ 15   0   9   0   0]
 [ 15   0  26   0   0]]

F1: 0.5156, Precision: 0.5156, Recall : 0.5156, Accuracy: 0.6016, Loss: 0.2322.





In [78]:
model.load_state_dict(torch.load(PATH))

model.to(device)


SelfNet(
  (embedding): Embedding(13511, 100, padding_idx=13510)
  (selfnet_layer): SelfMatchingLayer()
  (lstm): LSTM(100, 512, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  (fc1): Linear(in_features=1124, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=7, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [44]:
loss = 0.0   #epoch loss
accuracy = 0.0   #epoch accuracy

y_true = []
y_pred = []

# set the model to evaluation mode            
model.eval()
        
with tqdm(test_loader, unit="batch") as tepoch:
  for batch in tepoch:
    labels = batch["label"].to(device)
    text = batch["token"].to(device)
    
    with torch.no_grad():
        output = model(text)
    
    
    _, preds = output.data.max(1)
    y_pred.extend(preds.tolist())
    y_true.extend(labels.tolist())
            
    batch_acc = get_accuracy(preds.tolist(), labels.tolist())

    loss = criterion(output, labels)
            
            
    loss += loss.item()
    accuracy+= batch_acc

    sleep(0.1)

              
epoch_loss = loss / (len(val_loader))
epoch_acc = accuracy / (len(val_loader))
print('')
print("Inference:")
print("")
print(confusion_matrix(y_true, y_pred))
pre = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
print("")

print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))

100%|██████████| 62/62 [00:07<00:00,  8.46batch/s]



Inference:

[[978  32   0]
 [301  24   0]
 [621  28   0]]

F1: 0.3614, Precision: 0.3088, Recall : 0.5050, Accuracy: 0.5050, Loss: 0.0276.


  _warn_prf(average, modifier, msg_start, len(result))


In [80]:
loss = 0.0   #epoch loss
accuracy = 0.0   #epoch accuracy

y_true = []
y_pred = []

# set the model to evaluation mode            
model.eval()
        
with tqdm(test_loader, unit="batch") as tepoch:
  for batch in tepoch:
    labels = batch["label"].to(device)
    text = batch["token"].to(device)
    
    with torch.no_grad():
        output = model(text)
    
    
    _, preds = output.data.max(1)
    y_pred.extend(preds.tolist())
    y_true.extend(labels.tolist())
            
    batch_acc = get_accuracy(preds.tolist(), labels.tolist())

    loss = criterion(output, labels)
            
            
    loss += loss.item()
    accuracy+= batch_acc

    sleep(0.1)

              
epoch_loss = loss / (len(val_loader))
epoch_acc = accuracy / (len(val_loader))
print('')
print("Inference:")
print("")
print(confusion_matrix(y_true, y_pred))
pre = precision_score(y_true, y_pred, average='micro')
recall = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')
print("")

print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))

100%|██████████| 14/14 [00:17<00:00,  1.27s/batch]


Inference:

[[141   0  61   0   0]
 [ 12   0  51   0   0]
 [ 29   0  88   0   0]
 [  9   0  16   0   0]
 [ 14   0  27   0   0]]

F1: 0.5112, Precision: 0.5112, Recall : 0.5112, Accuracy: 0.5964, Loss: 0.2325.



