Malayalam English Dataset

### Imports

In [33]:
!pip install torchtext

You should consider upgrading via the '/home/ckm/python3.7/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [1]:
import os
import math
import numpy as np
import pandas as pd
from torchtext.data.utils import get_tokenizer
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm
from time import sleep


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle

import matplotlib.pyplot as plt

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [3]:
!nvidia-smi

Mon Nov 15 11:03:04 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.82.00    Driver Version: 470.82.00    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-PCIE...  Off  | 00000000:17:00.0 Off |                    0 |
| N/A   56C    P0    45W / 250W |    311MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Load embedding

In [37]:
datapath = "/home/ckm/ck_project/data/MalayalmEnglishSentiments/"
df_embedding = pd.read_csv(datapath+"mal_eng_emb", sep=" ", quoting=3, header=None, index_col=0,skiprows=1)

In [38]:
df_embedding.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,91,92,93,94,95,96,97,98,99,100
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
like,-0.071869,0.27537,-0.071923,0.032835,0.199517,0.16,-0.159244,0.026283,0.34317,0.352353,...,0.145661,0.04793,0.269013,-0.226423,-0.122173,-0.181215,0.034296,0.128978,-0.14102,-0.143799
oru,-0.07783,0.266342,-0.074621,0.033034,0.198433,0.159149,-0.159517,0.021624,0.346139,0.352183,...,0.139258,0.042647,0.274619,-0.225015,-0.124942,-0.18224,0.036177,0.13615,-0.133971,-0.136864
trailer,-0.074339,0.276567,-0.075053,0.036022,0.206773,0.164624,-0.159937,0.022718,0.350942,0.356257,...,0.146939,0.046356,0.279289,-0.230388,-0.132882,-0.184771,0.033757,0.134053,-0.13714,-0.141513
fans,-0.076445,0.272194,-0.076098,0.033571,0.199238,0.159381,-0.161973,0.020347,0.344558,0.354843,...,0.140745,0.042472,0.277846,-0.223677,-0.124965,-0.186585,0.031947,0.13169,-0.138251,-0.138841
ikka,-0.0736,0.267904,-0.076091,0.032226,0.19686,0.157712,-0.166063,0.01881,0.339874,0.351412,...,0.135749,0.04358,0.276364,-0.222802,-0.128295,-0.188228,0.034005,0.132371,-0.137775,-0.142165


### Create vocab dictionary

In [39]:
m = df_embedding.to_numpy()
m.shape

(1109, 100)

In [40]:
embedding_matrix = df_embedding.to_numpy()

vocab = []

for word in list(df_embedding.index):
  vocab.append(str(word))

vocab_size , vocab_dim = embedding_matrix.shape
vocab_size, vocab_dim

(1109, 100)

In [41]:
len(vocab)

1109

In [42]:
word2idx = {w: idx for (idx, w) in enumerate(vocab)}
idx2word = {idx: w for (idx, w) in enumerate(vocab)}

In [43]:
len(word2idx) , len(idx2word)

(1109, 1109)

In [44]:
import itertools
dict(itertools.islice(word2idx.items(), 10))

{'like': 0,
 'oru': 1,
 'trailer': 2,
 'fans': 3,
 'ikka': 4,
 'padam': 5,
 'mass': 6,
 'Trailer': 7,
 'movie': 8,
 'waiting': 9}

### Read data

In [45]:
df = pd.read_csv('/home/ckm/ck_project/data/MalayalmEnglishSentiments/Malayalam_English_Data.csv')

df= df.sample(frac = 1)

train_df = df[:4000]
val_df = df[4000:4400]
test_df = df[4400:]

train_df.head()

train_data =  train_df['text'].values
train_labels = train_df['category'].values

val_data =  val_df['text'].values
val_labels = val_df['category'].values

test_data =  test_df['text'].values
test_labels = test_df['category'].values

print(len(train_data), len(train_labels))
print(len(val_data), len(val_labels))
print(len(test_data), len(test_labels))


4000 4000
400 400
451 451


In [46]:
df.head()

Unnamed: 0.1,Unnamed: 0,text,category
3637,3637,Oru njettalum illa romanjavum illa ayye,4
3169,3169,Adhu ennavo namma Tamizh rasigalukku MAMMOOTT...,1
2840,2840,Suraj ettan vere level... 1:01 Soubin ikka ...,0
1217,1217,പടം പൊളിക്കും HBD mammokka By Die hard ...,0
3507,3507,Urutaam enna dialogue parayenda tone sheriyal...,3


In [47]:

# post = pd.read_csv("positive_add1and2.csv",sep=',')
# neg = pd.read_csv("negative_add_1and2.csv",sep=',')

# data = []
# labels = []

# for index, row in post.iterrows():
#   if index >= 30000:
#     break
  
#   comment = str(row['tweet'])
#   data.append(comment)
#   labels.append(1)


# for index, row in neg.iterrows():
#   if index >= 30000:
#     break
  
#   comment = str(row['tweet'])
#   data.append(comment)
#   labels.append(0)
  

# len(data), len(labels)

# data, labels  = shuffle(data, labels, random_state = 40)

# data[0], labels[0]

### Tokenizer

In [48]:
tokenizer = get_tokenizer('basic_english')


def tokenized_tensor(data):

  output_tokenized = []

  for sentence in data:
    output = []
    tokenized = tokenizer(sentence)
    
    for word in tokenized:
      if word in word2idx:
        id = word2idx[word]
        output.append(id)
      else:
        word2idx[word] = len(word2idx)
        id = word2idx[word]
        output.append(id)

    output = torch.tensor(output)


    output_tokenized.append(output)

  return output_tokenized

In [49]:
# tokenized_sequences = tokenized_tensor(data)

train_tokenized_sequences = tokenized_tensor(train_data)

test_tokenized_seuqences = tokenized_tensor(test_data)

val_tokenized_seuquences = tokenized_tensor(val_data)


In [50]:
print(train_tokenized_sequences[1])

tensor([1112, 1113, 1114, 1115, 1116,  349,  114,  654, 1117, 1118])


In [51]:
word2idx['<PAD>'] = len(word2idx)
word2idx['<PAD>']

13510

In [52]:
len(word2idx)

13511

In [53]:
## Create embedding matrix

random_init = torch.nn.Parameter(torch.Tensor( (len(word2idx) - vocab_size), vocab_dim))
torch.nn.init.kaiming_uniform_(random_init, a=math.sqrt(5))


new_matrix = np.zeros( (len(word2idx), vocab_dim) )

new_matrix[:vocab_size, :] = embedding_matrix

embedding_matrix = new_matrix

embedding_matrix[vocab_size:, :] = random_init.detach().numpy()

In [54]:
embedding_matrix.shape

(13511, 100)

In [55]:
# padded_sequences = pad_sequence(tokenized_sequences, batch_first= True, padding_value=107512)

train_padded_sequences = pad_sequence(train_tokenized_sequences, batch_first= True, padding_value=word2idx['<PAD>'])

val_padded_sequences = pad_sequence(val_tokenized_seuquences, batch_first= True, padding_value=word2idx['<PAD>'])

test_padded_sequences = pad_sequence(test_tokenized_seuqences, batch_first= True, padding_value=word2idx['<PAD>'])

In [56]:
(train_padded_sequences[1])

tensor([ 1112,  1113,  1114,  1115,  1116,   349,   114,   654,  1117,  1118,
        13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510,
        13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510,
        13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510,
        13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510,
        13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510,
        13510])

### Dataset and Data loader 

In [57]:
class Dataset(torch.utils.data.Dataset):
    """
    This is our custom dataset class which will load the text and their corresponding labels into Pytorch tensors
    """
    def __init__(self, sequences, labels):
        self.labels = labels
        self.sequences = sequences

    def __getitem__(self, idx):
        sample = {}
        sequence = self.sequences[idx]
        label = torch.tensor(self.labels[idx])

        try:
            sample["label"] = label
            sample["token"] = sequence
        except Exception as e:
            print(e)
        
        return sample
    
    def __len__(self):
        return len(self.labels)
        

In [58]:
train_dataset = Dataset(train_padded_sequences, train_labels)

val_dataset = Dataset(val_padded_sequences, val_labels)

test_dataset = Dataset(test_padded_sequences, test_labels)

### Hyper parameters

In [59]:
## Hyper parameter

vocab_size = len(word2idx)
embed_dim = vocab_dim
seq_len = 50
hidden_size = 512
num_layer = 3
num_class = 7
batch_size = 32

LEARNING_RATE = 1e-3
EPOCHS = 30
CLIP = 0.3

In [60]:
# # Create datasets
# dataset = Dataset(padded_sequences, labels)

# split = 0.85
# train_size = int(split*len(dataset))
# val_size = len(dataset) - train_size

# train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

In [61]:

## We call the dataloader class
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    pin_memory=True,
    num_workers=2,
    shuffle=True,
    drop_last=True
 )

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=batch_size,
    pin_memory=True,
    num_workers=2,
    shuffle=True,
    drop_last=True
 )

## For testing
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
    pin_memory=True,
    num_workers=2,
    shuffle=True,
    drop_last=True
 )


dataloaders = {'Train': train_loader, 'Val': val_loader}



In [62]:
len(train_loader), len(val_loader), len(test_loader)

(125, 12, 14)

In [63]:
max_len = -1
for comment in val_dataset:
  sentence = comment['token']
  max_len = max(max_len,sentence.shape[0])

print(max_len)

44


In [64]:
train_dataset[0]

{'label': tensor(4),
 'token': tensor([    1,  1109,    32,  1110,    32,  1111, 13510, 13510, 13510, 13510,
         13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510,
         13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510,
         13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510,
         13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510,
         13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510, 13510,
         13510])}

### Model

In [65]:
class SelfMatchingLayer(nn.Module):

    def __init__(self,  seq_length, embed_dim, **kwargs):

      super(SelfMatchingLayer, self).__init__()

      self.seq_length = seq_length
      self.embed_dim  = embed_dim

      self.P = torch.nn.Parameter(torch.Tensor(self.embed_dim, self.embed_dim))

      self.reset_parameters()

    def reset_parameters(self):
        torch.nn.init.kaiming_uniform_(self.P, a=math.sqrt(5))

      
    def forward(self, x):  
      
      # input shape: [batch, seq_len, embed_dim]


      #---------------------------------------------#
      # calculate weight vector a = {e_i . P.Q . e_j}
      #---------------------------------------------#

      out = torch.matmul(x,  self.P)   #out shape: [batch, seq_len, embed_dim]

      out = torch.matmul(out, torch.transpose(x, 1, 2))   #out shape: [batch, seq_len, seq_len]

      out = F.gelu(out)         # apply non linear activation

      #------------------------------------#
      # take row wise mean and apply softmax
      #------------------------------------#
      out = torch.mean(out, 2)  #out shape: [batch, seq_len, seq_len]

      out = torch.softmax(out, 0)     #out shape: [batch, seq_len, seq_len]

      out = out.unsqueeze(1)          #out shape: [batch, 1, seq_len]

      #-------------------------------------------#
      # calculate weighted embedding of every word
      #-------------------------------------------#
      out = torch.matmul(out, x)

      out = out.squeeze(1)

      return out      #out shape: [batch, seq_len]


In [66]:
word2idx['<PAD>']

13510

In [67]:
class SelfNet(nn.Module):

    def __init__(self, vocab_size, embed_dim, hidden_size, num_layer, seq_len, num_class):
        super(SelfNet, self).__init__()


        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx = word2idx['<PAD>'])
        self.embedding.load_state_dict({'weight': torch.from_numpy(embedding_matrix)})
        self.embedding.weight.requires_grad = True

        self.selfnet_layer = SelfMatchingLayer(seq_len, embed_dim)

        self.lstm = nn.LSTM(input_size = embed_dim, hidden_size = hidden_size, num_layers = num_layer, dropout = 0.3, bidirectional = True, batch_first = True )


        self.fc1 = nn.Linear(2* hidden_size + embed_dim , hidden_size//4)
        self.fc2 = nn.Linear(hidden_size//4, num_class)

        self.dropout = nn.Dropout(0.3)



    def forward(self, input):

        embedded = self.embedding(input)  #out shape = [batch, seq_len, embed_dim] 

        selfmatch_output = self.selfnet_layer(embedded)  #out shape = [batch, seq_len] 

        lstm_out, _ = self.lstm(embedded)     

        lstm_out = lstm_out[:, -1, :]      #out shape = [batch, 2 * hidden_size]      

        concat = torch.cat( (selfmatch_output, lstm_out), 1)     #out shape = [batch, 2 * hidden_size + seq_len ]      

        linear_out = self.dropout(F.relu(self.fc1(concat)))     #out shape = [batch, hidden_size]      

        final_out = self.fc2(linear_out)     #out shape = [batch, 2]      

        return final_out

In [68]:

### Test
model = SelfNet( vocab_size, embed_dim, hidden_size, num_layer, seq_len, num_class)
model = model.to(device)

# for batch in train_loader:
#   x = batch['token'].to(device)
#   out = model(x)
#   print(out.shape)
#   break



In [69]:
model

SelfNet(
  (embedding): Embedding(13511, 100, padding_idx=13510)
  (selfnet_layer): SelfMatchingLayer()
  (lstm): LSTM(100, 512, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  (fc1): Linear(in_features=1124, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=7, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

### Optimizer and loss

In [70]:
#optimizer
optimizer = Adam(model.parameters(), lr = LEARNING_RATE, eps=1e-8)
#Loss function
criterion = nn.CrossEntropyLoss()

In [71]:
#to calculate accuracy

def get_accuracy(preds, labels):
  total_acc = 0.0
  
  for i in range(len(labels)):
    if labels[i] == preds[i]:
      total_acc+=1.0
  
  return total_acc / len(labels)

### Training

In [72]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
#scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.2, patience=5, threshold=0.0008, verbose = True)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5, verbose = True)


Adjusting learning rate of group 0 to 1.0000e-03.


In [73]:
PATH = '/home/ckm/ck_project/codemix/code/selfnet_mihir/models/selfnet_mean+gelu_2_mihir_yt_1.pt'

In [74]:
best_valid_f1 = 0.0000

for epoch in range(0, EPOCHS):
  

    print('-'*50)
    print('Epoch {}/{}'.format(epoch+1, EPOCHS))

    for phase in ['Train', 'Val']:

        loss = 0.0   #epoch loss
        accuracy = 0.0   #epoch accuracy

        y_true = []
        y_pred = []

        if phase == 'Train':
            model.train()
        else:
            model.eval()
        
        with tqdm(dataloaders[phase], unit="batch") as tepoch:

          for batch in tepoch:
            labels = batch["label"].to(device)
            text = batch["token"].to(device)
            

            output = model(text)

            loss = criterion(output, labels)

            if phase == 'Train':

                #zero gradients
                optimizer.zero_grad() 

                # Backward pass  (calculates the gradients)
                loss.backward()   

                # gradient clipping
                nn.utils.clip_grad_norm_(model.parameters(), CLIP)    

                optimizer.step()             # Updates the weights    

            sleep(0.1)
            _, preds = output.data.max(1)
            y_pred.extend(preds.tolist())
            y_true.extend(labels.tolist())
            
            batch_acc = get_accuracy(preds.tolist(), labels.tolist())
            
            
            loss += loss.item()
            accuracy+= batch_acc

              
          epoch_loss = loss / (len(dataloaders[phase]))
          epoch_acc = accuracy / (len(dataloaders[phase]))

          print(phase + ":")
          
          
          #print(confusion_matrix(y_true, y_pred))
          pre = precision_score(y_true, y_pred, average='weighted')
          recall = recall_score(y_true, y_pred, average='weighted')
          f1 = f1_score(y_true, y_pred, average='weighted')
          

          print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))
          # save best model
          print()
          
            
          if phase == 'Val':
                
                if f1 > best_valid_f1:
                    best_valid_f1 = f1
                    
                    torch.save(model.state_dict(), PATH)
                    print('Model Saved!')
                    
                scheduler.step()
                    


--------------------------------------------------
Epoch 1/30


100%|██████████| 125/125 [01:07<00:00,  1.85batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.2948, Precision: 0.2837, Recall : 0.3985, Accuracy: 0.3985, Loss: 0.0233.



100%|██████████| 12/12 [00:17<00:00,  1.42s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.2583, Precision: 0.1846, Recall : 0.4297, Accuracy: 0.4297, Loss: 0.2176.

Model Saved!
Adjusting learning rate of group 0 to 5.0000e-04.
--------------------------------------------------
Epoch 2/30


100%|██████████| 125/125 [01:07<00:00,  1.85batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.3344, Precision: 0.2949, Recall : 0.4410, Accuracy: 0.4410, Loss: 0.0202.



100%|██████████| 12/12 [00:18<00:00,  1.50s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.4296, Precision: 0.3710, Recall : 0.5104, Accuracy: 0.5104, Loss: 0.2076.

Model Saved!
Adjusting learning rate of group 0 to 2.5000e-04.
--------------------------------------------------
Epoch 3/30


100%|██████████| 125/125 [01:05<00:00,  1.92batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4014, Precision: 0.3399, Recall : 0.4905, Accuracy: 0.4905, Loss: 0.0221.



100%|██████████| 12/12 [00:18<00:00,  1.54s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.4636, Precision: 0.4193, Recall : 0.5391, Accuracy: 0.5391, Loss: 0.2202.

Model Saved!
Adjusting learning rate of group 0 to 1.2500e-04.
--------------------------------------------------
Epoch 4/30


100%|██████████| 125/125 [01:04<00:00,  1.93batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4144, Precision: 0.3567, Recall : 0.5012, Accuracy: 0.5012, Loss: 0.0175.



100%|██████████| 12/12 [00:16<00:00,  1.38s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.4480, Precision: 0.3873, Recall : 0.5312, Accuracy: 0.5312, Loss: 0.2177.

Adjusting learning rate of group 0 to 6.2500e-05.
--------------------------------------------------
Epoch 5/30


100%|██████████| 125/125 [01:01<00:00,  2.03batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4271, Precision: 0.3632, Recall : 0.5198, Accuracy: 0.5198, Loss: 0.0192.



100%|██████████| 12/12 [00:16<00:00,  1.41s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3641, Precision: 0.3220, Recall : 0.4635, Accuracy: 0.4635, Loss: 0.2084.

Adjusting learning rate of group 0 to 3.1250e-05.
--------------------------------------------------
Epoch 6/30


100%|██████████| 125/125 [01:01<00:00,  2.04batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4500, Precision: 0.3902, Recall : 0.5403, Accuracy: 0.5403, Loss: 0.0171.



100%|██████████| 12/12 [00:17<00:00,  1.50s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3484, Precision: 0.3114, Recall : 0.4505, Accuracy: 0.4505, Loss: 0.2103.

Adjusting learning rate of group 0 to 1.5625e-05.
--------------------------------------------------
Epoch 7/30


100%|██████████| 125/125 [01:00<00:00,  2.08batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4629, Precision: 0.4043, Recall : 0.5535, Accuracy: 0.5535, Loss: 0.0182.



100%|██████████| 12/12 [00:18<00:00,  1.50s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3509, Precision: 0.3172, Recall : 0.4557, Accuracy: 0.4557, Loss: 0.1846.

Adjusting learning rate of group 0 to 7.8125e-06.
--------------------------------------------------
Epoch 8/30


100%|██████████| 125/125 [01:05<00:00,  1.91batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4686, Precision: 0.4105, Recall : 0.5593, Accuracy: 0.5593, Loss: 0.0185.



100%|██████████| 12/12 [00:17<00:00,  1.44s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3544, Precision: 0.3224, Recall : 0.4557, Accuracy: 0.4557, Loss: 0.1789.

Adjusting learning rate of group 0 to 3.9063e-06.
--------------------------------------------------
Epoch 9/30


100%|██████████| 125/125 [01:01<00:00,  2.05batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4734, Precision: 0.4144, Recall : 0.5653, Accuracy: 0.5653, Loss: 0.0217.



100%|██████████| 12/12 [00:16<00:00,  1.41s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3563, Precision: 0.3227, Recall : 0.4661, Accuracy: 0.4661, Loss: 0.2224.

Adjusting learning rate of group 0 to 1.9531e-06.
--------------------------------------------------
Epoch 10/30


100%|██████████| 125/125 [01:00<00:00,  2.08batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4759, Precision: 0.4207, Recall : 0.5660, Accuracy: 0.5660, Loss: 0.0177.



100%|██████████| 12/12 [00:18<00:00,  1.57s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3365, Precision: 0.3046, Recall : 0.4453, Accuracy: 0.4453, Loss: 0.2257.

Adjusting learning rate of group 0 to 9.7656e-07.
--------------------------------------------------
Epoch 11/30


100%|██████████| 125/125 [01:00<00:00,  2.06batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4764, Precision: 0.4197, Recall : 0.5670, Accuracy: 0.5670, Loss: 0.0168.



100%|██████████| 12/12 [00:17<00:00,  1.43s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3364, Precision: 0.3052, Recall : 0.4427, Accuracy: 0.4427, Loss: 0.2885.

Adjusting learning rate of group 0 to 4.8828e-07.
--------------------------------------------------
Epoch 12/30


100%|██████████| 125/125 [01:00<00:00,  2.07batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4741, Precision: 0.4176, Recall : 0.5645, Accuracy: 0.5645, Loss: 0.0151.



100%|██████████| 12/12 [00:17<00:00,  1.50s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3463, Precision: 0.3102, Recall : 0.4531, Accuracy: 0.4531, Loss: 0.2444.

Adjusting learning rate of group 0 to 2.4414e-07.
--------------------------------------------------
Epoch 13/30


100%|██████████| 125/125 [00:59<00:00,  2.09batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4777, Precision: 0.4204, Recall : 0.5687, Accuracy: 0.5687, Loss: 0.0208.



100%|██████████| 12/12 [00:20<00:00,  1.67s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3387, Precision: 0.3004, Recall : 0.4427, Accuracy: 0.4427, Loss: 0.2167.

Adjusting learning rate of group 0 to 1.2207e-07.
--------------------------------------------------
Epoch 14/30


100%|██████████| 125/125 [01:00<00:00,  2.06batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4762, Precision: 0.4791, Recall : 0.5657, Accuracy: 0.5657, Loss: 0.0176.



100%|██████████| 12/12 [00:17<00:00,  1.45s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3331, Precision: 0.2995, Recall : 0.4375, Accuracy: 0.4375, Loss: 0.2905.

Adjusting learning rate of group 0 to 6.1035e-08.
--------------------------------------------------
Epoch 15/30


100%|██████████| 125/125 [01:00<00:00,  2.06batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4768, Precision: 0.4196, Recall : 0.5680, Accuracy: 0.5680, Loss: 0.0179.



100%|██████████| 12/12 [00:17<00:00,  1.47s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3412, Precision: 0.3073, Recall : 0.4479, Accuracy: 0.4479, Loss: 0.2729.

Adjusting learning rate of group 0 to 3.0518e-08.
--------------------------------------------------
Epoch 16/30


100%|██████████| 125/125 [01:00<00:00,  2.06batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4729, Precision: 0.4167, Recall : 0.5627, Accuracy: 0.5627, Loss: 0.0167.



100%|██████████| 12/12 [00:18<00:00,  1.51s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3470, Precision: 0.3063, Recall : 0.4505, Accuracy: 0.4505, Loss: 0.2356.

Adjusting learning rate of group 0 to 1.5259e-08.
--------------------------------------------------
Epoch 17/30


100%|██████████| 125/125 [01:00<00:00,  2.08batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4786, Precision: 0.4218, Recall : 0.5697, Accuracy: 0.5697, Loss: 0.0191.



100%|██████████| 12/12 [00:17<00:00,  1.50s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3388, Precision: 0.3022, Recall : 0.4453, Accuracy: 0.4453, Loss: 0.2131.

Adjusting learning rate of group 0 to 7.6294e-09.
--------------------------------------------------
Epoch 18/30


100%|██████████| 125/125 [01:02<00:00,  1.99batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4752, Precision: 0.4200, Recall : 0.5650, Accuracy: 0.5650, Loss: 0.0141.



100%|██████████| 12/12 [00:16<00:00,  1.40s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3446, Precision: 0.3074, Recall : 0.4505, Accuracy: 0.4505, Loss: 0.2759.

Adjusting learning rate of group 0 to 3.8147e-09.
--------------------------------------------------
Epoch 19/30


100%|██████████| 125/125 [01:01<00:00,  2.04batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4734, Precision: 0.4179, Recall : 0.5630, Accuracy: 0.5630, Loss: 0.0169.



100%|██████████| 12/12 [00:18<00:00,  1.57s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3396, Precision: 0.3080, Recall : 0.4453, Accuracy: 0.4453, Loss: 0.2149.

Adjusting learning rate of group 0 to 1.9073e-09.
--------------------------------------------------
Epoch 20/30


100%|██████████| 125/125 [01:00<00:00,  2.06batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4753, Precision: 0.4186, Recall : 0.5660, Accuracy: 0.5660, Loss: 0.0164.



100%|██████████| 12/12 [00:17<00:00,  1.42s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3380, Precision: 0.3074, Recall : 0.4427, Accuracy: 0.4427, Loss: 0.2396.

Adjusting learning rate of group 0 to 9.5367e-10.
--------------------------------------------------
Epoch 21/30


100%|██████████| 125/125 [01:00<00:00,  2.08batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4769, Precision: 0.4203, Recall : 0.5677, Accuracy: 0.5677, Loss: 0.0166.



100%|██████████| 12/12 [00:17<00:00,  1.44s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3410, Precision: 0.3077, Recall : 0.4453, Accuracy: 0.4453, Loss: 0.1937.

Adjusting learning rate of group 0 to 4.7684e-10.
--------------------------------------------------
Epoch 22/30


100%|██████████| 125/125 [00:59<00:00,  2.09batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4791, Precision: 0.4224, Recall : 0.5703, Accuracy: 0.5703, Loss: 0.0194.



100%|██████████| 12/12 [00:17<00:00,  1.49s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3403, Precision: 0.3068, Recall : 0.4453, Accuracy: 0.4453, Loss: 0.2316.

Adjusting learning rate of group 0 to 2.3842e-10.
--------------------------------------------------
Epoch 23/30


100%|██████████| 125/125 [01:00<00:00,  2.08batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4763, Precision: 0.4203, Recall : 0.5665, Accuracy: 0.5665, Loss: 0.0147.



100%|██████████| 12/12 [00:18<00:00,  1.52s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3369, Precision: 0.3000, Recall : 0.4427, Accuracy: 0.4427, Loss: 0.2417.

Adjusting learning rate of group 0 to 1.1921e-10.
--------------------------------------------------
Epoch 24/30


100%|██████████| 125/125 [01:00<00:00,  2.06batch/s]


Train:


  _warn_prf(average, modifier, msg_start, len(result))


F1: 0.4791, Precision: 0.4225, Recall : 0.5700, Accuracy: 0.5700, Loss: 0.0221.



100%|██████████| 12/12 [00:19<00:00,  1.64s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3495, Precision: 0.3171, Recall : 0.4557, Accuracy: 0.4557, Loss: 0.2350.

Adjusting learning rate of group 0 to 5.9605e-11.
--------------------------------------------------
Epoch 25/30


100%|██████████| 125/125 [00:59<00:00,  2.08batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4775, Precision: 0.4207, Recall : 0.5683, Accuracy: 0.5683, Loss: 0.0176.



100%|██████████| 12/12 [00:18<00:00,  1.53s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3532, Precision: 0.3188, Recall : 0.4583, Accuracy: 0.4583, Loss: 0.2111.

Adjusting learning rate of group 0 to 2.9802e-11.
--------------------------------------------------
Epoch 26/30


100%|██████████| 125/125 [01:00<00:00,  2.08batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4768, Precision: 0.4209, Recall : 0.5670, Accuracy: 0.5670, Loss: 0.0206.



100%|██████████| 12/12 [00:17<00:00,  1.43s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3486, Precision: 0.3119, Recall : 0.4505, Accuracy: 0.4505, Loss: 0.2449.

Adjusting learning rate of group 0 to 1.4901e-11.
--------------------------------------------------
Epoch 27/30


100%|██████████| 125/125 [00:59<00:00,  2.09batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4738, Precision: 0.4169, Recall : 0.5643, Accuracy: 0.5643, Loss: 0.0161.



100%|██████████| 12/12 [00:17<00:00,  1.45s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3518, Precision: 0.3191, Recall : 0.4531, Accuracy: 0.4531, Loss: 0.2454.

Adjusting learning rate of group 0 to 7.4506e-12.
--------------------------------------------------
Epoch 28/30


100%|██████████| 125/125 [00:59<00:00,  2.08batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4787, Precision: 0.4226, Recall : 0.5693, Accuracy: 0.5693, Loss: 0.0154.



100%|██████████| 12/12 [00:17<00:00,  1.50s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3321, Precision: 0.2967, Recall : 0.4375, Accuracy: 0.4375, Loss: 0.2166.

Adjusting learning rate of group 0 to 3.7253e-12.
--------------------------------------------------
Epoch 29/30


100%|██████████| 125/125 [01:00<00:00,  2.07batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4760, Precision: 0.4187, Recall : 0.5670, Accuracy: 0.5670, Loss: 0.0147.



100%|██████████| 12/12 [00:20<00:00,  1.73s/batch]
  _warn_prf(average, modifier, msg_start, len(result))


Val:
F1: 0.3504, Precision: 0.3148, Recall : 0.4583, Accuracy: 0.4583, Loss: 0.2065.

Adjusting learning rate of group 0 to 1.8626e-12.
--------------------------------------------------
Epoch 30/30


100%|██████████| 125/125 [00:59<00:00,  2.09batch/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train:
F1: 0.4770, Precision: 0.4201, Recall : 0.5680, Accuracy: 0.5680, Loss: 0.0157.



100%|██████████| 12/12 [00:17<00:00,  1.44s/batch]

Val:
F1: 0.3323, Precision: 0.2950, Recall : 0.4349, Accuracy: 0.4349, Loss: 0.2340.

Adjusting learning rate of group 0 to 9.3132e-13.



  _warn_prf(average, modifier, msg_start, len(result))


### Testing

In [75]:
model = SelfNet( vocab_size, embed_dim, hidden_size, num_layer, seq_len, num_class)

In [76]:
model.load_state_dict(torch.load(PATH))

model.to(device)


SelfNet(
  (embedding): Embedding(13511, 100, padding_idx=13510)
  (selfnet_layer): SelfMatchingLayer()
  (lstm): LSTM(100, 512, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  (fc1): Linear(in_features=1124, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=7, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [77]:
loss = 0.0   #epoch loss
accuracy = 0.0   #epoch accuracy

y_true = []
y_pred = []

# set the model to evaluation mode            
model.eval()
        
with tqdm(test_loader, unit="batch") as tepoch:
  for batch in tepoch:
    labels = batch["label"].to(device)
    text = batch["token"].to(device)
    
    with torch.no_grad():
        output = model(text)
    
    
    _, preds = output.data.max(1)
    y_pred.extend(preds.tolist())
    y_true.extend(labels.tolist())
            
    batch_acc = get_accuracy(preds.tolist(), labels.tolist())

    loss = criterion(output, labels)
            
            
    loss += loss.item()
    accuracy+= batch_acc

    sleep(0.1)

              
epoch_loss = loss / (len(val_loader))
epoch_acc = accuracy / (len(val_loader))
print('')
print("Inference:")
print("")
print(confusion_matrix(y_true, y_pred))
pre = precision_score(y_true, y_pred, average='micro')
recall = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')
print("")

print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))

100%|██████████| 14/14 [00:17<00:00,  1.27s/batch]


Inference:

[[145   0  58   0   0]
 [ 13   0  50   0   0]
 [ 31   0  86   0   0]
 [ 15   0   9   0   0]
 [ 15   0  26   0   0]]

F1: 0.5156, Precision: 0.5156, Recall : 0.5156, Accuracy: 0.6016, Loss: 0.2322.





In [78]:
model.load_state_dict(torch.load(PATH))

model.to(device)


SelfNet(
  (embedding): Embedding(13511, 100, padding_idx=13510)
  (selfnet_layer): SelfMatchingLayer()
  (lstm): LSTM(100, 512, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  (fc1): Linear(in_features=1124, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=7, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [79]:
loss = 0.0   #epoch loss
accuracy = 0.0   #epoch accuracy

y_true = []
y_pred = []

# set the model to evaluation mode            
model.eval()
        
with tqdm(test_loader, unit="batch") as tepoch:
  for batch in tepoch:
    labels = batch["label"].to(device)
    text = batch["token"].to(device)
    
    with torch.no_grad():
        output = model(text)
    
    
    _, preds = output.data.max(1)
    y_pred.extend(preds.tolist())
    y_true.extend(labels.tolist())
            
    batch_acc = get_accuracy(preds.tolist(), labels.tolist())

    loss = criterion(output, labels)
            
            
    loss += loss.item()
    accuracy+= batch_acc

    sleep(0.1)

              
epoch_loss = loss / (len(val_loader))
epoch_acc = accuracy / (len(val_loader))
print('')
print("Inference:")
print("")
print(confusion_matrix(y_true, y_pred))
pre = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
print("")

print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))

100%|██████████| 14/14 [00:19<00:00,  1.37s/batch]


Inference:

[[143   0  59   0   0]
 [ 15   0  48   0   0]
 [ 23   0  94   0   0]
 [ 10   0  15   0   0]
 [ 17   0  24   0   0]]

F1: 0.4521, Precision: 0.4123, Recall : 0.5290, Accuracy: 0.6172, Loss: 0.2004.



  _warn_prf(average, modifier, msg_start, len(result))


In [80]:
loss = 0.0   #epoch loss
accuracy = 0.0   #epoch accuracy

y_true = []
y_pred = []

# set the model to evaluation mode            
model.eval()
        
with tqdm(test_loader, unit="batch") as tepoch:
  for batch in tepoch:
    labels = batch["label"].to(device)
    text = batch["token"].to(device)
    
    with torch.no_grad():
        output = model(text)
    
    
    _, preds = output.data.max(1)
    y_pred.extend(preds.tolist())
    y_true.extend(labels.tolist())
            
    batch_acc = get_accuracy(preds.tolist(), labels.tolist())

    loss = criterion(output, labels)
            
            
    loss += loss.item()
    accuracy+= batch_acc

    sleep(0.1)

              
epoch_loss = loss / (len(val_loader))
epoch_acc = accuracy / (len(val_loader))
print('')
print("Inference:")
print("")
print(confusion_matrix(y_true, y_pred))
pre = precision_score(y_true, y_pred, average='micro')
recall = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')
print("")

print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))

100%|██████████| 14/14 [00:17<00:00,  1.27s/batch]


Inference:

[[141   0  61   0   0]
 [ 12   0  51   0   0]
 [ 29   0  88   0   0]
 [  9   0  16   0   0]
 [ 14   0  27   0   0]]

F1: 0.5112, Precision: 0.5112, Recall : 0.5112, Accuracy: 0.5964, Loss: 0.2325.



