### Imports

In [1]:
import os
import math
import numpy as np
import pandas as pd
from torchtext.data.utils import get_tokenizer
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm
from time import sleep


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle

import matplotlib.pyplot as plt

In [2]:
device = torch.device('cuda:5' if torch.cuda.is_available() else 'cpu')

### Load embedding

In [3]:

df_embedding = pd.read_csv('hing_emb (1)', sep=" ", quoting=3, header=None, index_col=0,skiprows=1)

### Create vocab dictionary

In [4]:
embedding_matrix = df_embedding.to_numpy()

vocab = []

for word in list(df_embedding.index):
  vocab.append(str(word))

vocab_size , vocab_dim = embedding_matrix.shape
vocab_size, vocab_dim

(44050, 100)

In [5]:
word2idx = {w: idx for (idx, w) in enumerate(vocab)}
idx2word = {idx: w for (idx, w) in enumerate(vocab)}

### Read data

In [6]:
train_df = pd.read_csv('train.csv')

val_df = pd.read_csv("valid.csv")

test_df = pd.read_csv("test.csv")

train_df.head()


train_data =  train_df['tweets'].values
train_labels = train_df['labels'].values

val_data =  val_df['tweets'].values
val_labels = val_df['labels'].values

test_data =  test_df['tweets'].values
test_labels = test_df['labels'].values

len(train_data), len(train_labels)


(115000, 115000)

In [7]:

# post = pd.read_csv("positive_add1and2.csv",sep=',')
# neg = pd.read_csv("negative_add_1and2.csv",sep=',')

# data = []
# labels = []

# for index, row in post.iterrows():
#   if index >= 30000:
#     break
  
#   comment = str(row['tweet'])
#   data.append(comment)
#   labels.append(1)


# for index, row in neg.iterrows():
#   if index >= 30000:
#     break
  
#   comment = str(row['tweet'])
#   data.append(comment)
#   labels.append(0)
  

# len(data), len(labels)

# data, labels  = shuffle(data, labels, random_state = 40)

# data[0], labels[0]

### Tokenizer

In [8]:
tokenizer = get_tokenizer('basic_english')


def tokenized_tensor(data):

  output_tokenized = []

  for sentence in data:
    output = []
    tokenized = tokenizer(sentence)
    
    for word in tokenized:
      if word in word2idx:
        id = word2idx[word]
        output.append(id)
      else:
        word2idx[word] = len(word2idx)
        id = word2idx[word]
        output.append(id)

    output = torch.tensor(output)


    output_tokenized.append(output)

  return output_tokenized

In [9]:
# tokenized_sequences = tokenized_tensor(data)

train_tokenized_sequences = tokenized_tensor(train_data)

test_tokenized_seuqences = tokenized_tensor(test_data)

val_tokenized_seuquences = tokenized_tensor(val_data)


In [10]:
word2idx['<PAD>'] = len(word2idx)
word2idx['<PAD>']

172527

In [11]:
len(word2idx)

172528

In [12]:
## Create embedding matrix

random_init = torch.nn.Parameter(torch.Tensor( (len(word2idx) - vocab_size), vocab_dim))
torch.nn.init.kaiming_uniform_(random_init, a=math.sqrt(5))


new_matrix = np.zeros( (len(word2idx), vocab_dim) )

new_matrix[:vocab_size, :] = embedding_matrix

embedding_matrix = new_matrix

embedding_matrix[vocab_size:, :] = random_init.detach().numpy()

In [13]:
# padded_sequences = pad_sequence(tokenized_sequences, batch_first= True, padding_value=107512)

train_padded_sequences = pad_sequence(train_tokenized_sequences, batch_first= True, padding_value=172527)

val_padded_sequences = pad_sequence(val_tokenized_seuquences, batch_first= True, padding_value=172527)

test_padded_sequences = pad_sequence(test_tokenized_seuqences, batch_first= True, padding_value=172527)

In [14]:
len(train_padded_sequences[0])

65

### Dataset and Data loader 

In [15]:
class Dataset(torch.utils.data.Dataset):
    """
    This is our custom dataset class which will load the text and their corresponding labels into Pytorch tensors
    """
    def __init__(self, sequences, labels):
        self.labels = labels
        self.sequences = sequences

    def __getitem__(self, idx):
        sample = {}
        sequence = self.sequences[idx]
        label = torch.tensor(self.labels[idx])

        try:
            sample["label"] = label
            sample["token"] = sequence
        except Exception as e:
            print(e)
        
        return sample
    
    def __len__(self):
        return len(self.labels)
        

In [16]:
train_dataset = Dataset(train_padded_sequences, train_labels)

val_dataset = Dataset(val_padded_sequences, val_labels)

test_dataset = Dataset(test_padded_sequences, test_labels)

### Hyper parameters

In [39]:
## Hyper parameter

vocab_size = len(word2idx)
embed_dim = vocab_dim
seq_len = 65
hidden_size = 512
num_layer = 3
num_class = 2
batch_size = 256

LEARNING_RATE = 1e-3
EPOCHS = 30
CLIP = 0.3

In [40]:
# # Create datasets
# dataset = Dataset(padded_sequences, labels)

# split = 0.85
# train_size = int(split*len(dataset))
# val_size = len(dataset) - train_size

# train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

In [41]:

## We call the dataloader class
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    pin_memory=True,
    num_workers=2,
    shuffle=True,
    drop_last=True
 )

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=batch_size,
    pin_memory=True,
    num_workers=2,
    shuffle=True,
    drop_last=True
 )

## For testing
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
    pin_memory=True,
    num_workers=2,
    shuffle=True,
    drop_last=True
 )


dataloaders = {'Train': train_loader, 'Val': val_loader}



In [42]:
len(train_loader), len(val_loader), len(test_loader)

(449, 83, 117)

In [43]:
train_dataset[0]

{'label': tensor(0),
 'token': tensor([ 23066,  23067,  22357,  29756,  21000,  21010,  23068,  21018,  23069,
          21000,  21000,  20996,  21182, 172527, 172527, 172527, 172527, 172527,
         172527, 172527, 172527, 172527, 172527, 172527, 172527, 172527, 172527,
         172527, 172527, 172527, 172527, 172527, 172527, 172527, 172527, 172527,
         172527, 172527, 172527, 172527, 172527, 172527, 172527, 172527, 172527,
         172527, 172527, 172527, 172527, 172527, 172527, 172527, 172527, 172527,
         172527, 172527, 172527, 172527, 172527, 172527, 172527, 172527, 172527,
         172527, 172527])}

### Model

In [44]:
class SelfMatchingLayer(nn.Module):

    def __init__(self,  seq_length, embed_dim, **kwargs):

      super(SelfMatchingLayer, self).__init__()

      self.seq_length = seq_length
      self.embed_dim  = embed_dim

      self.P = torch.nn.Parameter(torch.Tensor(self.embed_dim, self.embed_dim))

      self.reset_parameters()

    def reset_parameters(self):
        torch.nn.init.kaiming_uniform_(self.P, a=math.sqrt(5))

      
    def forward(self, x):  
      
      # input shape: [batch, seq_len, embed_dim]


      #---------------------------------------------#
      # calculate weight vector a = {e_i . P.Q . e_j}
      #---------------------------------------------#

      out = torch.matmul(x,  self.P)   #out shape: [batch, seq_len, embed_dim]

      out = torch.matmul(out, torch.transpose(x, 1, 2))   #out shape: [batch, seq_len, seq_len]

      out = F.gelu(out)         # apply non linear activation

      #------------------------------------#
      # take row wise mean and apply softmax
      #------------------------------------#
      out = torch.mean(out, 2)  #out shape: [batch, seq_len, seq_len]

      out = torch.softmax(out, 0)     #out shape: [batch, seq_len, seq_len]

      out = out.unsqueeze(1)          #out shape: [batch, 1, seq_len]

      #-------------------------------------------#
      # calculate weighted embedding of every word
      #-------------------------------------------#
      out = torch.matmul(out, x)

      out = out.squeeze(1)

      return out      #out shape: [batch, seq_len]


In [54]:
class SelfNet(nn.Module):

    def __init__(self, vocab_size, embed_dim, hidden_size, num_layer, seq_len, num_class):
        super(SelfNet, self).__init__()


        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx = word2idx['<PAD>'])
        self.embedding.load_state_dict({'weight': torch.from_numpy(embedding_matrix)})
        self.embedding.weight.requires_grad = True

        self.selfnet_layer = SelfMatchingLayer(seq_len, embed_dim)

        self.lstm = nn.LSTM(input_size = embed_dim, hidden_size = hidden_size, num_layers = num_layer, dropout = 0.3, bidirectional = True, batch_first = True )


        self.fc1 = nn.Linear(2* hidden_size + embed_dim , hidden_size//4)
        self.fc2 = nn.Linear(hidden_size//4, num_class)

        self.dropout = nn.Dropout(0.3)



    def forward(self, input):

        embedded = self.embedding(input)  #out shape = [batch, seq_len, embed_dim] 

        selfmatch_output = self.selfnet_layer(embedded)  #out shape = [batch, seq_len] 

        lstm_out, _ = self.lstm(embedded)     

        lstm_out = lstm_out[:, -1, :]      #out shape = [batch, 2 * hidden_size]      

        concat = torch.cat( (selfmatch_output, lstm_out), 1)     #out shape = [batch, 2 * hidden_size + seq_len ]      

        linear_out = self.dropout(F.relu(self.fc1(concat)))     #out shape = [batch, hidden_size]      

        final_out = self.fc2(linear_out)     #out shape = [batch, 2]      

        return final_out

In [55]:

### Test
model = SelfNet( vocab_size, embed_dim, hidden_size, num_layer, seq_len, num_class)
model = model.to(device)

# for batch in train_loader:
#   x = batch['token'].to(device)
#   out = model(x)
#   print(out.shape)
#   break



In [56]:
model

SelfNet(
  (embedding): Embedding(172528, 100, padding_idx=172527)
  (selfnet_layer): SelfMatchingLayer()
  (lstm): LSTM(100, 512, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  (fc1): Linear(in_features=1124, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

### Optimizer and loss

In [57]:
#optimizer
optimizer = Adam(model.parameters(), lr = LEARNING_RATE, eps=1e-8)
#Loss function
criterion = nn.CrossEntropyLoss()

In [58]:
#to calculate accuracy

def get_accuracy(preds, labels):
  total_acc = 0.0
  
  for i in range(len(labels)):
    if labels[i] == preds[i]:
      total_acc+=1.0
  
  return total_acc / len(labels)

### Training

In [1]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
#scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.2, patience=5, threshold=0.0008, verbose = True)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5, verbose = True)


NameError: name 'torch' is not defined

In [60]:
PATH = 'models/best_selfnet_mean+gelu_2.pt'

In [61]:
best_valid_f1 = 0.0000

for epoch in range(0, EPOCHS):
  

    print('-'*50)
    print('Epoch {}/{}'.format(epoch+1, EPOCHS))

    for phase in ['Train', 'Val']:

        loss = 0.0   #epoch loss
        accuracy = 0.0   #epoch accuracy

        y_true = []
        y_pred = []

        if phase == 'Train':
            model.train()
        else:
            model.eval()
        
        with tqdm(dataloaders[phase], unit="batch") as tepoch:

          for batch in tepoch:
            labels = batch["label"].to(device)
            text = batch["token"].to(device)

            output = model(text)

            loss = criterion(output, labels)

            if phase == 'Train':

                #zero gradients
                optimizer.zero_grad() 

                # Backward pass  (calculates the gradients)
                loss.backward()   

                # gradient clipping
                nn.utils.clip_grad_norm_(model.parameters(), CLIP)    

                optimizer.step()             # Updates the weights    

            sleep(0.1)
            _, preds = output.data.max(1)
            y_pred.extend(preds.tolist())
            y_true.extend(labels.tolist())
            
            batch_acc = get_accuracy(preds.tolist(), labels.tolist())
            
            
            loss += loss.item()
            accuracy+= batch_acc

              
          epoch_loss = loss / (len(dataloaders[phase]))
          epoch_acc = accuracy / (len(dataloaders[phase]))

          print(phase + ":")
          
          
          #print(confusion_matrix(y_true, y_pred))
          pre = precision_score(y_true, y_pred, average='weighted')
          recall = recall_score(y_true, y_pred, average='weighted')
          f1 = f1_score(y_true, y_pred, average='weighted')
          

          print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))
          # save best model
          print()
          
            
          if phase == 'Val':
                
                if f1 > best_valid_f1:
                    best_valid_f1 = f1
                    
                    torch.save(model.state_dict(), PATH)
                    print('Model Saved!')
                    
                scheduler.step(f1)
                    


--------------------------------------------------
Epoch 1/30


100%|██████████| 449/449 [02:02<00:00,  3.68batch/s]


Train:
F1: 0.7914, Precision: 0.7937, Recall : 0.7918, Accuracy: 0.7918, Loss: 0.0017.



100%|██████████| 83/83 [00:10<00:00,  8.08batch/s]


Val:
F1: 0.8589, Precision: 0.8598, Recall : 0.8590, Accuracy: 0.8590, Loss: 0.0091.

Model Saved!
--------------------------------------------------
Epoch 2/30


100%|██████████| 449/449 [02:02<00:00,  3.66batch/s]


Train:
F1: 0.8692, Precision: 0.8703, Recall : 0.8693, Accuracy: 0.8693, Loss: 0.0016.



100%|██████████| 83/83 [00:10<00:00,  8.14batch/s]


Val:
F1: 0.8708, Precision: 0.8724, Recall : 0.8709, Accuracy: 0.8709, Loss: 0.0071.

Model Saved!
--------------------------------------------------
Epoch 3/30


100%|██████████| 449/449 [02:02<00:00,  3.67batch/s]


Train:
F1: 0.8869, Precision: 0.8874, Recall : 0.8869, Accuracy: 0.8869, Loss: 0.0015.



100%|██████████| 83/83 [00:10<00:00,  8.18batch/s]


Val:
F1: 0.8731, Precision: 0.8737, Recall : 0.8732, Accuracy: 0.8732, Loss: 0.0075.

Model Saved!
--------------------------------------------------
Epoch 4/30


100%|██████████| 449/449 [02:01<00:00,  3.68batch/s]


Train:
F1: 0.9011, Precision: 0.9015, Recall : 0.9011, Accuracy: 0.9011, Loss: 0.0010.



100%|██████████| 83/83 [00:10<00:00,  8.19batch/s]


Val:
F1: 0.8729, Precision: 0.8746, Recall : 0.8730, Accuracy: 0.8730, Loss: 0.0082.

--------------------------------------------------
Epoch 5/30


100%|██████████| 449/449 [02:02<00:00,  3.65batch/s]


Train:
F1: 0.9148, Precision: 0.9150, Recall : 0.9148, Accuracy: 0.9148, Loss: 0.0008.



100%|██████████| 83/83 [00:10<00:00,  8.10batch/s]


Val:
F1: 0.8664, Precision: 0.8664, Recall : 0.8664, Accuracy: 0.8664, Loss: 0.0096.

--------------------------------------------------
Epoch 6/30


100%|██████████| 449/449 [02:02<00:00,  3.67batch/s]


Train:
F1: 0.9278, Precision: 0.9281, Recall : 0.9278, Accuracy: 0.9278, Loss: 0.0008.



100%|██████████| 83/83 [00:10<00:00,  8.07batch/s]


Val:
F1: 0.8669, Precision: 0.8687, Recall : 0.8671, Accuracy: 0.8671, Loss: 0.0087.

--------------------------------------------------
Epoch 7/30


100%|██████████| 449/449 [02:03<00:00,  3.65batch/s]


Train:
F1: 0.9388, Precision: 0.9390, Recall : 0.9388, Accuracy: 0.9388, Loss: 0.0009.



100%|██████████| 83/83 [00:10<00:00,  8.07batch/s]


Val:
F1: 0.8650, Precision: 0.8674, Recall : 0.8652, Accuracy: 0.8652, Loss: 0.0074.

--------------------------------------------------
Epoch 8/30


100%|██████████| 449/449 [02:02<00:00,  3.66batch/s]


Train:
F1: 0.9454, Precision: 0.9455, Recall : 0.9454, Accuracy: 0.9454, Loss: 0.0007.



100%|██████████| 83/83 [00:10<00:00,  8.18batch/s]


Val:
F1: 0.8578, Precision: 0.8579, Recall : 0.8578, Accuracy: 0.8578, Loss: 0.0108.

--------------------------------------------------
Epoch 9/30


100%|██████████| 449/449 [02:02<00:00,  3.67batch/s]


Train:
F1: 0.9513, Precision: 0.9514, Recall : 0.9513, Accuracy: 0.9513, Loss: 0.0005.



100%|██████████| 83/83 [00:10<00:00,  8.12batch/s]


Val:
F1: 0.8558, Precision: 0.8572, Recall : 0.8559, Accuracy: 0.8559, Loss: 0.0100.

Epoch     9: reducing learning rate of group 0 to 2.0000e-04.
--------------------------------------------------
Epoch 10/30


100%|██████████| 449/449 [02:03<00:00,  3.64batch/s]


Train:
F1: 0.9608, Precision: 0.9608, Recall : 0.9608, Accuracy: 0.9608, Loss: 0.0005.



100%|██████████| 83/83 [00:10<00:00,  8.20batch/s]


Val:
F1: 0.8561, Precision: 0.8569, Recall : 0.8562, Accuracy: 0.8562, Loss: 0.0129.

--------------------------------------------------
Epoch 11/30


100%|██████████| 449/449 [02:03<00:00,  3.64batch/s]


Train:
F1: 0.9622, Precision: 0.9623, Recall : 0.9622, Accuracy: 0.9622, Loss: 0.0006.



100%|██████████| 83/83 [00:10<00:00,  8.05batch/s]


Val:
F1: 0.8519, Precision: 0.8528, Recall : 0.8519, Accuracy: 0.8519, Loss: 0.0141.

--------------------------------------------------
Epoch 12/30


100%|██████████| 449/449 [02:02<00:00,  3.66batch/s]


Train:
F1: 0.9639, Precision: 0.9639, Recall : 0.9639, Accuracy: 0.9639, Loss: 0.0002.



100%|██████████| 83/83 [00:10<00:00,  8.14batch/s]


Val:
F1: 0.8505, Precision: 0.8515, Recall : 0.8506, Accuracy: 0.8506, Loss: 0.0166.

--------------------------------------------------
Epoch 13/30


100%|██████████| 449/449 [02:02<00:00,  3.66batch/s]


Train:
F1: 0.9654, Precision: 0.9654, Recall : 0.9654, Accuracy: 0.9654, Loss: 0.0003.



100%|██████████| 83/83 [00:10<00:00,  8.17batch/s]


Val:
F1: 0.8481, Precision: 0.8483, Recall : 0.8481, Accuracy: 0.8481, Loss: 0.0094.

--------------------------------------------------
Epoch 14/30


100%|██████████| 449/449 [02:02<00:00,  3.66batch/s]


Train:
F1: 0.9669, Precision: 0.9669, Recall : 0.9669, Accuracy: 0.9669, Loss: 0.0003.



100%|██████████| 83/83 [00:10<00:00,  8.14batch/s]


Val:
F1: 0.8490, Precision: 0.8506, Recall : 0.8491, Accuracy: 0.8491, Loss: 0.0160.

--------------------------------------------------
Epoch 15/30


100%|██████████| 449/449 [02:03<00:00,  3.65batch/s]


Train:
F1: 0.9679, Precision: 0.9679, Recall : 0.9679, Accuracy: 0.9679, Loss: 0.0003.



100%|██████████| 83/83 [00:10<00:00,  8.04batch/s]


Val:
F1: 0.8476, Precision: 0.8484, Recall : 0.8477, Accuracy: 0.8477, Loss: 0.0156.

Epoch    15: reducing learning rate of group 0 to 4.0000e-05.
--------------------------------------------------
Epoch 16/30


100%|██████████| 449/449 [02:02<00:00,  3.66batch/s]


Train:
F1: 0.9698, Precision: 0.9699, Recall : 0.9698, Accuracy: 0.9698, Loss: 0.0004.



100%|██████████| 83/83 [00:10<00:00,  8.04batch/s]


Val:
F1: 0.8460, Precision: 0.8468, Recall : 0.8461, Accuracy: 0.8461, Loss: 0.0159.

--------------------------------------------------
Epoch 17/30


100%|██████████| 449/449 [02:02<00:00,  3.66batch/s]


Train:
F1: 0.9696, Precision: 0.9696, Recall : 0.9696, Accuracy: 0.9696, Loss: 0.0005.



100%|██████████| 83/83 [00:10<00:00,  8.11batch/s]


Val:
F1: 0.8450, Precision: 0.8457, Recall : 0.8451, Accuracy: 0.8451, Loss: 0.0136.

--------------------------------------------------
Epoch 18/30


100%|██████████| 449/449 [02:02<00:00,  3.66batch/s]


Train:
F1: 0.9697, Precision: 0.9697, Recall : 0.9697, Accuracy: 0.9697, Loss: 0.0003.



100%|██████████| 83/83 [00:10<00:00,  8.08batch/s]


Val:
F1: 0.8462, Precision: 0.8469, Recall : 0.8463, Accuracy: 0.8463, Loss: 0.0268.

--------------------------------------------------
Epoch 19/30


100%|██████████| 449/449 [02:03<00:00,  3.65batch/s]


Train:
F1: 0.9706, Precision: 0.9706, Recall : 0.9706, Accuracy: 0.9706, Loss: 0.0004.



100%|██████████| 83/83 [00:10<00:00,  8.16batch/s]


Val:
F1: 0.8453, Precision: 0.8459, Recall : 0.8454, Accuracy: 0.8454, Loss: 0.0117.

--------------------------------------------------
Epoch 20/30


100%|██████████| 449/449 [02:03<00:00,  3.65batch/s]


Train:
F1: 0.9704, Precision: 0.9704, Recall : 0.9704, Accuracy: 0.9704, Loss: 0.0005.



100%|██████████| 83/83 [00:10<00:00,  8.20batch/s]


Val:
F1: 0.8438, Precision: 0.8441, Recall : 0.8438, Accuracy: 0.8438, Loss: 0.0174.

--------------------------------------------------
Epoch 21/30


100%|██████████| 449/449 [02:02<00:00,  3.65batch/s]


Train:
F1: 0.9703, Precision: 0.9703, Recall : 0.9703, Accuracy: 0.9703, Loss: 0.0003.



100%|██████████| 83/83 [00:10<00:00,  8.17batch/s]


Val:
F1: 0.8463, Precision: 0.8472, Recall : 0.8463, Accuracy: 0.8463, Loss: 0.0179.

Epoch    21: reducing learning rate of group 0 to 8.0000e-06.
--------------------------------------------------
Epoch 22/30


100%|██████████| 449/449 [02:02<00:00,  3.67batch/s]


Train:
F1: 0.9710, Precision: 0.9711, Recall : 0.9710, Accuracy: 0.9710, Loss: 0.0003.



100%|██████████| 83/83 [00:10<00:00,  8.14batch/s]


Val:
F1: 0.8453, Precision: 0.8460, Recall : 0.8454, Accuracy: 0.8454, Loss: 0.0162.

--------------------------------------------------
Epoch 23/30


100%|██████████| 449/449 [02:03<00:00,  3.65batch/s]


Train:
F1: 0.9709, Precision: 0.9709, Recall : 0.9709, Accuracy: 0.9709, Loss: 0.0003.



100%|██████████| 83/83 [00:10<00:00,  8.16batch/s]


Val:
F1: 0.8456, Precision: 0.8464, Recall : 0.8457, Accuracy: 0.8457, Loss: 0.0194.

--------------------------------------------------
Epoch 24/30


100%|██████████| 449/449 [02:02<00:00,  3.65batch/s]


Train:
F1: 0.9710, Precision: 0.9711, Recall : 0.9710, Accuracy: 0.9710, Loss: 0.0005.



100%|██████████| 83/83 [00:10<00:00,  8.14batch/s]


Val:
F1: 0.8457, Precision: 0.8464, Recall : 0.8458, Accuracy: 0.8458, Loss: 0.0176.

--------------------------------------------------
Epoch 25/30


100%|██████████| 449/449 [02:03<00:00,  3.63batch/s]


Train:
F1: 0.9712, Precision: 0.9712, Recall : 0.9712, Accuracy: 0.9712, Loss: 0.0004.



100%|██████████| 83/83 [00:10<00:00,  8.16batch/s]


Val:
F1: 0.8445, Precision: 0.8452, Recall : 0.8446, Accuracy: 0.8446, Loss: 0.0150.

--------------------------------------------------
Epoch 26/30


100%|██████████| 449/449 [02:03<00:00,  3.63batch/s]


Train:
F1: 0.9712, Precision: 0.9712, Recall : 0.9712, Accuracy: 0.9712, Loss: 0.0003.



100%|██████████| 83/83 [00:10<00:00,  8.07batch/s]


Val:
F1: 0.8458, Precision: 0.8466, Recall : 0.8459, Accuracy: 0.8459, Loss: 0.0212.

--------------------------------------------------
Epoch 27/30


100%|██████████| 449/449 [02:03<00:00,  3.62batch/s]


Train:
F1: 0.9707, Precision: 0.9707, Recall : 0.9707, Accuracy: 0.9707, Loss: 0.0004.



100%|██████████| 83/83 [00:10<00:00,  8.05batch/s]


Val:
F1: 0.8440, Precision: 0.8446, Recall : 0.8440, Accuracy: 0.8440, Loss: 0.0161.

Epoch    27: reducing learning rate of group 0 to 1.6000e-06.
--------------------------------------------------
Epoch 28/30


100%|██████████| 449/449 [02:03<00:00,  3.64batch/s]


Train:
F1: 0.9712, Precision: 0.9712, Recall : 0.9712, Accuracy: 0.9712, Loss: 0.0005.



100%|██████████| 83/83 [00:10<00:00,  8.06batch/s]


Val:
F1: 0.8442, Precision: 0.8450, Recall : 0.8443, Accuracy: 0.8443, Loss: 0.0151.

--------------------------------------------------
Epoch 29/30


100%|██████████| 449/449 [02:03<00:00,  3.64batch/s]


Train:
F1: 0.9711, Precision: 0.9711, Recall : 0.9711, Accuracy: 0.9711, Loss: 0.0004.



100%|██████████| 83/83 [00:10<00:00,  8.03batch/s]


Val:
F1: 0.8447, Precision: 0.8455, Recall : 0.8448, Accuracy: 0.8448, Loss: 0.0134.

--------------------------------------------------
Epoch 30/30


100%|██████████| 449/449 [02:03<00:00,  3.64batch/s]


Train:
F1: 0.9710, Precision: 0.9710, Recall : 0.9710, Accuracy: 0.9710, Loss: 0.0005.



100%|██████████| 83/83 [00:10<00:00,  8.21batch/s]


Val:
F1: 0.8442, Precision: 0.8449, Recall : 0.8443, Accuracy: 0.8443, Loss: 0.0108.



### Testing

In [None]:
model = SelfNet( vocab_size, embed_dim, hidden_size, num_layer, seq_len, num_class)

In [None]:
model.load_state_dict(torch.load(PATH))

model.to(device)


In [None]:
loss = 0.0   #epoch loss
accuracy = 0.0   #epoch accuracy

y_true = []
y_pred = []

# set the model to evaluation mode            
model.eval()
        
with tqdm(test_loader, unit="batch") as tepoch:
  for batch in tepoch:
    labels = batch["label"].to(device)
    text = batch["token"].to(device)
    
    with torch.no_grad():
        output = model(text)
    
    
    _, preds = output.data.max(1)
    y_pred.extend(preds.tolist())
    y_true.extend(labels.tolist())
            
    batch_acc = get_accuracy(preds.tolist(), labels.tolist())

    loss = criterion(output, labels)
            
            
    loss += loss.item()
    accuracy+= batch_acc

    sleep(0.1)

              
epoch_loss = loss / (len(val_loader))
epoch_acc = accuracy / (len(val_loader))
print('')
print("Inference:")
print("")
print(confusion_matrix(y_true, y_pred))
pre = precision_score(y_true, y_pred, average='micro')
recall = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')
print("")

print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))

In [43]:
model.load_state_dict(torch.load('models/best_selfnet_mean+gelu.pt'))

model.to(device)


RuntimeError: Error(s) in loading state_dict for SelfNet:
	Missing key(s) in state_dict: "lstm.weight_ih_l2", "lstm.weight_hh_l2", "lstm.bias_ih_l2", "lstm.bias_hh_l2", "lstm.weight_ih_l2_reverse", "lstm.weight_hh_l2_reverse", "lstm.bias_ih_l2_reverse", "lstm.bias_hh_l2_reverse". 
	size mismatch for lstm.weight_ih_l0: copying a param with shape torch.Size([1024, 100]) from checkpoint, the shape in current model is torch.Size([2048, 100]).
	size mismatch for lstm.weight_hh_l0: copying a param with shape torch.Size([1024, 256]) from checkpoint, the shape in current model is torch.Size([2048, 512]).
	size mismatch for lstm.bias_ih_l0: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for lstm.bias_hh_l0: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for lstm.weight_ih_l0_reverse: copying a param with shape torch.Size([1024, 100]) from checkpoint, the shape in current model is torch.Size([2048, 100]).
	size mismatch for lstm.weight_hh_l0_reverse: copying a param with shape torch.Size([1024, 256]) from checkpoint, the shape in current model is torch.Size([2048, 512]).
	size mismatch for lstm.bias_ih_l0_reverse: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for lstm.bias_hh_l0_reverse: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for lstm.weight_ih_l1: copying a param with shape torch.Size([1024, 512]) from checkpoint, the shape in current model is torch.Size([2048, 1024]).
	size mismatch for lstm.weight_hh_l1: copying a param with shape torch.Size([1024, 256]) from checkpoint, the shape in current model is torch.Size([2048, 512]).
	size mismatch for lstm.bias_ih_l1: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for lstm.bias_hh_l1: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for lstm.weight_ih_l1_reverse: copying a param with shape torch.Size([1024, 512]) from checkpoint, the shape in current model is torch.Size([2048, 1024]).
	size mismatch for lstm.weight_hh_l1_reverse: copying a param with shape torch.Size([1024, 256]) from checkpoint, the shape in current model is torch.Size([2048, 512]).
	size mismatch for lstm.bias_ih_l1_reverse: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for lstm.bias_hh_l1_reverse: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for fc1.weight: copying a param with shape torch.Size([256, 612]) from checkpoint, the shape in current model is torch.Size([512, 1124]).
	size mismatch for fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for fc2.weight: copying a param with shape torch.Size([2, 256]) from checkpoint, the shape in current model is torch.Size([2, 512]).

In [None]:
loss = 0.0   #epoch loss
accuracy = 0.0   #epoch accuracy

y_true = []
y_pred = []

# set the model to evaluation mode            
model.eval()
        
with tqdm(test_loader, unit="batch") as tepoch:
  for batch in tepoch:
    labels = batch["label"].to(device)
    text = batch["token"].to(device)
    
    with torch.no_grad():
        output = model(text)
    
    
    _, preds = output.data.max(1)
    y_pred.extend(preds.tolist())
    y_true.extend(labels.tolist())
            
    batch_acc = get_accuracy(preds.tolist(), labels.tolist())

    loss = criterion(output, labels)
            
            
    loss += loss.item()
    accuracy+= batch_acc

    sleep(0.1)

              
epoch_loss = loss / (len(val_loader))
epoch_acc = accuracy / (len(val_loader))
print('')
print("Inference:")
print("")
print(confusion_matrix(y_true, y_pred))
pre = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
print("")

print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))

In [None]:
loss = 0.0   #epoch loss
accuracy = 0.0   #epoch accuracy

y_true = []
y_pred = []

# set the model to evaluation mode            
model.eval()
        
with tqdm(test_loader, unit="batch") as tepoch:
  for batch in tepoch:
    labels = batch["label"].to(device)
    text = batch["token"].to(device)
    
    with torch.no_grad():
        output = model(text)
    
    
    _, preds = output.data.max(1)
    y_pred.extend(preds.tolist())
    y_true.extend(labels.tolist())
            
    batch_acc = get_accuracy(preds.tolist(), labels.tolist())

    loss = criterion(output, labels)
            
            
    loss += loss.item()
    accuracy+= batch_acc

    sleep(0.1)

              
epoch_loss = loss / (len(val_loader))
epoch_acc = accuracy / (len(val_loader))
print('')
print("Inference:")
print("")
print(confusion_matrix(y_true, y_pred))
pre = precision_score(y_true, y_pred, average='micro')
recall = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')
print("")

print("F1: {:.4f}, Precision: {:.4f}, Recall : {:.4f}, Accuracy: {:.4f}, Loss: {:.4f}.".format(f1, pre, recall, epoch_acc, epoch_loss))