In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import transformers
from transformers import AutoModel, BertTokenizerFast
from sklearn.utils.class_weight import compute_class_weight
from utilities.preprocess import Preproccesor

# specify GPU
device = torch.device("cuda")

df = pd.read_csv("../ethos_data/Ethos_Dataset_Binary.csv", delimiter=';')
df.head()

df["isHate"] = np.where((df.isHate>=0.5), 1, 0)

X,y = df['comment'].values, df['isHate'].values

In [2]:
X, y = Preproccesor.load_data(True)

In [3]:
df = pd.DataFrame({'comment': X, 'isHate': y})


In [4]:
X,y = df['comment'].values, df['isHate'].values

In [5]:
y[:5]

array([0, 1, 0, 0, 0])

train_text, temp_text, train_labels, temp_labels = train_test_split(df['comment'], df['isHate'], 
                                                                    random_state=2018, 
                                                                    test_size=0.3, 
                                                                    stratify=df['isHate'])


val_text, test_text, val_labels, test_labels = train_test_split(temp_text, temp_labels, 
                                                                random_state=2018, 
                                                                test_size=0.5, 
                                                                stratify=temp_labels)

In [6]:
from sklearn.model_selection import StratifiedKFold, KFold
n_fold = 10
folds = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=7)

In [53]:
import matplotlib.pyplot as plt
%matplotlib inline

In [54]:
unique, counts = np.unique(val_labels, return_counts=True)
print(unique)
plt.bar(unique, counts)

NameError: name 'val_labels' is not defined

In [7]:

# import BERT-base pretrained model
bert = AutoModel.from_pretrained('bert-base-uncased')

# Load the BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

In [9]:
# sample data
text = ["this is a bert model tutorial", "we will fine-tune a bert model"]

# encode text
sent_id = tokenizer.batch_encode_plus(text, padding=True)

# output
print(sent_id)

{'input_ids': [[101, 2023, 2003, 1037, 14324, 2944, 14924, 4818, 102, 0], [101, 2057, 2097, 2986, 1011, 8694, 1037, 14324, 2944, 102]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}


In [57]:
seq_len = [len(i.split()) for i in train_text]

pd.Series(seq_len).hist(bins = 30)

NameError: name 'train_text' is not defined

# tokenize and encode sequences in the training set
tokens_train = tokenizer.batch_encode_plus(
    train_text.tolist(),
    max_length = 100,
    pad_to_max_length=True,
    truncation=True
)

# tokenize and encode sequences in the validation set
tokens_val = tokenizer.batch_encode_plus(
    val_text.tolist(),
    max_length = 100,
    pad_to_max_length=True,
    truncation=True
)

# tokenize and encode sequences in the test set
tokens_test = tokenizer.batch_encode_plus(
    test_text.tolist(),
    max_length = 100,
    pad_to_max_length=True,
    truncation=True
)

train_seq = torch.tensor(tokens_train['input_ids'])
train_mask = torch.tensor(tokens_train['attention_mask'])
train_y = torch.tensor(train_labels.tolist())

val_seq = torch.tensor(tokens_val['input_ids'])
val_mask = torch.tensor(tokens_val['attention_mask'])
val_y = torch.tensor(val_labels.tolist())

test_seq = torch.tensor(tokens_test['input_ids'])
test_mask = torch.tensor(tokens_test['attention_mask'])
test_y = torch.tensor(test_labels.tolist())

In [10]:
# freeze all the parameters
for param in bert.parameters():
    param.requires_grad = False

In [11]:
class BERT_Arch(nn.Module):

    def __init__(self, bert):
      
      super(BERT_Arch, self).__init__()

      self.bert = bert 
      
      # dropout layer
      self.dropout = nn.Dropout(0.1)
      
      # relu activation function
      self.relu =  nn.ReLU()

      # dense layer 1
      self.fc1 = nn.Linear(768,512)
    
      self.fc2 = nn.Linear(512,256)
      
      # dense layer 2 (Output layer)
      self.fc3 = nn.Linear(256,2)

      #softmax activation function
      self.softmax = nn.LogSoftmax(dim=1)

    #define the forward pass
    def forward(self, sent_id, mask):

      #pass the inputs to the model  
      _, cls_hs = self.bert(sent_id, attention_mask=mask)
      
      x = self.fc1(cls_hs)

      x = self.relu(x)

      x = self.dropout(x)

      # output layer
      x = self.fc2(x)
      x = self.fc3(x)
      
      # apply softmax activation
      x = self.softmax(x)

      return x

In [12]:
# pass the pre-trained BERT to our define architecture
model = BERT_Arch(bert)

# push the model to GPU
model = model.to(device)

In [13]:
# optimizer from hugging face transformers
from transformers import AdamW

# define the optimizer
optimizer = AdamW(model.parameters(),
                  lr = 1e-5)          

from sklearn.utils.class_weight import compute_class_weight

#compute the class weights
class_weights = compute_class_weight('balanced', np.unique(train_labels), train_labels)

print("Class Weights:",class_weights)

In [14]:


# number of training epochs
epochs = 10

In [15]:
# function to train the model
def train(train_dataloader):
  
  model.train()

  total_loss, total_accuracy = 0, 0
  
  # empty list to save model predictions
  total_preds=[]
  
  # iterate over batches
  for step,batch in enumerate(train_dataloader):
    
    # progress update after every 50 batches.
    if step % 50 == 0 and not step == 0:
      print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(train_dataloader)))

    # push the batch to gpu
    batch = [r.to(device) for r in batch]
 
    sent_id, mask, labels = batch

    # clear previously calculated gradients 
    model.zero_grad()        

    # get model predictions for the current batch
    preds = model(sent_id, mask)

    # compute the loss between actual and predicted values
    loss = cross_entropy(preds, labels)

    # add on to the total loss
    total_loss = total_loss + loss.item()

    # backward pass to calculate the gradients
    loss.backward()

    # clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

    # update parameters
    optimizer.step()

    # model predictions are stored on GPU. So, push it to CPU
    preds=preds.detach().cpu().numpy()

    # append the model predictions
    total_preds.append(preds)

  # compute the training loss of the epoch
  avg_loss = total_loss / len(train_dataloader)
  
  # predictions are in the form of (no. of batches, size of batch, no. of classes).
  # reshape the predictions in form of (number of samples, no. of classes)
  total_preds  = np.concatenate(total_preds, axis=0)

  #returns the loss and predictions
  return avg_loss, total_preds

In [16]:
# function for evaluating the model
def evaluate(val_dataloader):
  
  print("\nEvaluating...")
  
  # deactivate dropout layers
  model.eval()

  total_loss, total_accuracy = 0, 0
  
  # empty list to save the model predictions
  total_preds = []

  # iterate over batches
  for step,batch in enumerate(val_dataloader):
    
    # Progress update every 50 batches.
    if step % 50 == 0 and not step == 0:
      
      # Calculate elapsed time in minutes.
      elapsed = format_time(time.time() - t0)
            
      # Report progress.
      print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(val_dataloader)))

    # push the batch to gpu
    batch = [t.to(device) for t in batch]

    sent_id, mask, labels = batch

    # deactivate autograd
    with torch.no_grad():
      
      # model predictions
      preds = model(sent_id, mask)

      # compute the validation loss between actual and predicted values
      loss = cross_entropy(preds,labels)

      total_loss = total_loss + loss.item()

      preds = preds.detach().cpu().numpy()

      total_preds.append(preds)

  # compute the validation loss of the epoch
  avg_loss = total_loss / len(val_dataloader) 

  # reshape the predictions in form of (number of samples, no. of classes)
  total_preds  = np.concatenate(total_preds, axis=0)

  return avg_loss, total_preds

In [17]:
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, precision_score, recall_score
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import time

In [18]:

f_results = []
n_fold = 10
counter = 0
scores = {}
scores.setdefault('fit_time', [])
scores.setdefault('score_time', [])
scores.setdefault('test_F1', [])
scores.setdefault('test_Precision', [])
scores.setdefault('test_Recall', [])
scores.setdefault('test_Accuracy', [])
for fold_n, (train_index, valid_index) in enumerate(folds.split(X, y)):
    print('Fold', fold_n, 'started at', time.ctime())
    # set initial loss to infinite
    best_valid_loss = float('inf')
    

    #define a batch size
    batch_size = 32
    
    # tokenize and encode sequences in the training set
    tokens_train = tokenizer.batch_encode_plus(
        X[train_index].tolist(),
        max_length = 100,
        pad_to_max_length=True,
        truncation=True
    )

    # tokenize and encode sequences in the validation set
    tokens_val = tokenizer.batch_encode_plus(
        X[valid_index].tolist(),
        max_length = 100,
        pad_to_max_length=True,
        truncation=True
    )

    # tokenize and encode sequences in the test set
    #tokens_test = tokenizer.batch_encode_plus(
     #   test_text.tolist(),
      #  max_length = 100,
       # pad_to_max_length=True,
        #truncation=True
    #)
    
    train_seq = torch.tensor(tokens_train['input_ids'])
    train_mask = torch.tensor(tokens_train['attention_mask'])
    train_y = torch.tensor(y[train_index].tolist())

    val_seq = torch.tensor(tokens_val['input_ids'])
    val_mask = torch.tensor(tokens_val['attention_mask'])
    val_y = torch.tensor(y[valid_index].tolist())

    #test_seq = torch.tensor(tokens_test['input_ids'])
    #test_mask = torch.tensor(tokens_test['attention_mask'])
    #test_y = torch.tensor(test_labels.tolist())

    # wrap tensors
    train_data = TensorDataset(train_seq, train_mask, train_y)

    # sampler for sampling the data during training
    train_sampler = RandomSampler(train_data)

    # dataLoader for train set
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

    # wrap tensors
    val_data = TensorDataset(val_seq, val_mask, val_y)

    # sampler for sampling the data during training
    val_sampler = SequentialSampler(val_data)

    # dataLoader for validation set
    val_dataloader = DataLoader(val_data, sampler = val_sampler, batch_size=batch_size)
    
    
    #compute the class weights
    class_weights = compute_class_weight('balanced', np.unique(y[train_index]), y[train_index])

    print("Class Weights:",class_weights)
    
    # converting list of class weights to a tensor
    weights= torch.tensor(class_weights,dtype=torch.float)

    # push to GPU
    weights = weights.to(device)

    # define the loss function
    cross_entropy  = nn.NLLLoss(weight=weights) 
    
    

    # empty lists to store training and validation loss of each epoch
    train_losses=[]
    valid_losses=[]

    #for each epoch
    for epoch in range(epochs):

        print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))

        #train model
        train_loss, _ = train(train_dataloader)

        #evaluate model
        valid_loss, _ = evaluate(val_dataloader)

        #save the best model
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), 'saved_weights.pt_'+str(fold_n))

        # append training and validation loss
        train_losses.append(train_loss)
        valid_losses.append(valid_loss)

        print(f'\nTraining Loss: {train_loss:.3f}')
        print(f'Validation Loss: {valid_loss:.3f}')
    
    path = 'saved_weights.pt_'+str(fold_n)
    model.load_state_dict(torch.load(path))
    
    with torch.no_grad():
      preds = model(val_seq.to(device), val_mask.to(device))
      preds = preds.detach().cpu().numpy()
    y_valid = y[valid_index]
    y_preds = np.argmax(preds, axis = 1)
    print(accuracy_score(y_valid, y_preds))
    scores['test_F1'].append(f1_score(y_valid, y_preds, average='macro'))
    scores['test_Precision'].append(precision_score(y_valid, y_preds, average='macro'))
    scores['test_Recall'].append(recall_score(y_valid, y_preds, average='macro'))
    scores['test_Accuracy'].append(accuracy_score(y_valid, y_preds))
    #scores['test_Specificity'].append(specificity(y_valid, y_preds))
    #scores['test_Sensitivity'].append(sensitivity(y_valid, y_preds))

print("{:<10} | {:<7} {:<7} {:<7} {:<7}".format("Bert_davidson",
                                                           str('%.4f' % (sum(scores['test_F1']) / 10)),
                                                           str('%.4f' % (sum(scores['test_Precision']) / 10)),
                                                           str('%.4f' % (sum(scores['test_Recall']) / 10)),
                                                           str('%.4f' % (sum(scores['test_Accuracy']) / 10))))

Fold 0 started at Wed Oct 14 14:28:04 2020


 1 0 0 1 0 0 1 1 1 1 0 1 1 1 0 1 0 0 1 1 0 0 1 0 1 1 0 1 0 0 0 1 1 1 0 0 0
 0 0 0 0 1 0 0 0 0 1 1 1 1 0 0 1 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 1 1 1
 0 1 1 1 1 0 1 1 1 1 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 1 1 0 0 1 1 0 0 1 0 1 0
 1 1 0 1 1 0 1 0 0 1 1 1 1 0 0 0 1 0 0 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0
 1 0 1 0 1 0 1 0 0 0 0 1 0 0 1 0 0 0 1 1 0 1 1 0 1 1 0 1 0 0 1 0 1 1 0 0 0
 0 0 1 0 1 0 0 1 1 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 0 1 1 1 0 0 1 0 1 1 0 0 0 1 0 0 0 1 0 1 1 0 0 0 0 0 1 0 1 1
 0 0 1 1 1 0 0 1 1 1 0 1 0 0 1 0 0 0 1 1 1 0 1 1 0 1 0 1 1 0 0 0 0 0 0 0 0
 1 1 1 1 1 0 1 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 0 1 1 1 1 0 0 0 1 0 0 1 0 0
 1 0 0 1 1 0 1 0 0 0 1 0 0 0 1 1 0 1 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0
 0 1 0 1 1 0 1 0 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 0
 1 0 0 1 1 1 0 1 0 1 1 1 0 0 0 0 0 0 0 1 1 1 1 0 0 1 1 0 0 1 0 0 0 1 1 0 0
 1 0 0 1 1 1 1 1 0 0 1 0 0 0 1 0 0 1 1 0 1 1 1 0 1 1 1 1 0 0 0 0 1 0 1 1 1
 0 0 0 0 0 1 1 0 0 0 0 0 

Class Weights: [0.88385827 1.15128205]

 Epoch 1 / 10

Evaluating...

Training Loss: 0.694
Validation Loss: 0.686

 Epoch 2 / 10

Evaluating...

Training Loss: 0.692
Validation Loss: 0.682

 Epoch 3 / 10

Evaluating...

Training Loss: 0.691
Validation Loss: 0.678

 Epoch 4 / 10

Evaluating...

Training Loss: 0.688
Validation Loss: 0.675

 Epoch 5 / 10

Evaluating...

Training Loss: 0.691
Validation Loss: 0.673

 Epoch 6 / 10

Evaluating...

Training Loss: 0.690
Validation Loss: 0.671

 Epoch 7 / 10

Evaluating...

Training Loss: 0.689
Validation Loss: 0.668

 Epoch 8 / 10

Evaluating...

Training Loss: 0.684
Validation Loss: 0.667

 Epoch 9 / 10

Evaluating...

Training Loss: 0.683
Validation Loss: 0.665

 Epoch 10 / 10

Evaluating...

Training Loss: 0.680
Validation Loss: 0.663
0.74
Fold 1 started at Wed Oct 14 14:28:44 2020
Class Weights: [0.88385827 1.15128205]

 Epoch 1 / 10


 0 1 0 0 0 1 1 1 1 1 1 1 0 1 1 0 0 0 1 0 0 0 1 1 0 0 1 1 0 1 0 0 0 1 1 0 0
 0 0 0 0 0 1 1 0 0 0 1 1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 0
 1 1 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 0 0 0 0 1 0 1 0 0 0 0 1 0 1 0 1 1 1 0
 0 1 0 1 0 0 1 1 0 0 1 1 0 1 1 0 0 1 1 0 1 1 0 0 0 1 0 1 0 0 0 1 1 1 0 0 0
 1 0 0 0 1 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 1 1 0 0 1 0 0 0 1 0 1 1 0 1 1 0
 0 0 1 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 0 1 0 1 0 1 0 1 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 1 1 0 0 0 0 1 0 1 0 1 0 0 1
 1 0 1 0 0 0 1 0 1 1 0 1 0 1 1 1 0 1 1 1 0 1 0 1 0 1 1 1 1 0 1 0 1 0 1 1 0
 0 0 0 0 1 0 0 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 1 0 0 0 1 0 1 0 0 0 1 1 1 0 0
 1 1 0 0 1 1 0 0 1 0 0 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0
 0 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 1 0 0 0 1 0 0 1 1 1 0 0 0 0 1 0 1 1 1 1 1
 0 1 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 1 0 0 0 1 0 0 1 0 0
 1 0 1 1 1 1 0 0 1 0 0 1 0 0 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 0 0 1 0 1 1 1 1
 0 0 0 0 0 1 1 0 0 0 0 1 


Evaluating...

Training Loss: 0.679
Validation Loss: 0.680

 Epoch 2 / 10

Evaluating...

Training Loss: 0.678
Validation Loss: 0.679

 Epoch 3 / 10

Evaluating...

Training Loss: 0.677
Validation Loss: 0.678

 Epoch 4 / 10

Evaluating...

Training Loss: 0.677
Validation Loss: 0.677

 Epoch 5 / 10

Evaluating...

Training Loss: 0.675
Validation Loss: 0.675

 Epoch 6 / 10

Evaluating...

Training Loss: 0.678
Validation Loss: 0.674

 Epoch 7 / 10

Evaluating...

Training Loss: 0.677
Validation Loss: 0.672

 Epoch 8 / 10

Evaluating...

Training Loss: 0.674
Validation Loss: 0.671

 Epoch 9 / 10

Evaluating...

Training Loss: 0.674
Validation Loss: 0.669

 Epoch 10 / 10

Evaluating...

Training Loss: 0.673
Validation Loss: 0.668
0.54
Fold 2 started at Wed Oct 14 14:29:24 2020
Class Weights: [0.88385827 1.15128205]

 Epoch 1 / 10


 0 0 0 0 0 1 1 1 1 0 1 1 1 0 1 0 0 1 0 0 0 0 1 1 0 1 1 1 0 0 1 1 1 0 0 0 0
 0 0 1 0 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1 1 1 1 0 1
 0 1 1 0 1 1 0 1 0 1 0 0 0 0 1 0 1 0 0 0 0 0 1 0 1 0 1 1 1 0 0 1 0 0 0 1 1
 0 1 0 1 1 1 0 1 0 0 1 1 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 0 0 1 0
 0 0 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0 1 0 0 1 1 1 0 0 0 0
 0 0 0 0 1 0 1 0 0 1 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0
 1 0 0 0 0 0 0 1 0 1 1 0 0 1 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 1 1 1 0 1 0 0 0
 1 0 1 1 0 1 0 1 1 0 0 1 1 1 0 1 0 0 1 0 0 0 1 1 1 0 1 1 0 1 0 1 1 0 0 0 0
 0 1 0 0 1 1 1 1 0 1 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 1 1 1 0 0 0 1 1 0 0
 1 1 0 0 0 0 1 1 0 1 0 0 0 1 0 0 0 1 1 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0
 0 1 1 0 0 1 0 1 0 1 1 1 0 1 1 0 0 0 1 0 0 1 1 1 1 0 1 0 0 0 0 0 0 0 0 1 0
 1 1 1 1 1 1 0 1 0 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 1 1 1 0 0 1 1 0 0 0 1 0 0
 0 1 1 0 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 0 0 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 0
 0 1 1 1 1 0 0 0 0 0 1 0 


Evaluating...

Training Loss: 0.675
Validation Loss: 0.665

 Epoch 2 / 10

Evaluating...

Training Loss: 0.677
Validation Loss: 0.663

 Epoch 3 / 10

Evaluating...

Training Loss: 0.672
Validation Loss: 0.667

 Epoch 4 / 10

Evaluating...

Training Loss: 0.666
Validation Loss: 0.666

 Epoch 5 / 10

Evaluating...

Training Loss: 0.664
Validation Loss: 0.664

 Epoch 6 / 10

Evaluating...

Training Loss: 0.665
Validation Loss: 0.663

 Epoch 7 / 10

Evaluating...

Training Loss: 0.664
Validation Loss: 0.659

 Epoch 8 / 10

Evaluating...

Training Loss: 0.664
Validation Loss: 0.657

 Epoch 9 / 10

Evaluating...

Training Loss: 0.659
Validation Loss: 0.657

 Epoch 10 / 10

Evaluating...

Training Loss: 0.656
Validation Loss: 0.659
0.69
Fold 3 started at Wed Oct 14 14:29:58 2020
Class Weights: [0.88385827 1.15128205]

 Epoch 1 / 10


 1 0 0 1 1 1 0 1 1 0 1 1 0 0 1 0 1 0 0 0 1 1 0 0 1 1 0 1 0 0 0 1 0 0 0 0 0
 0 0 0 1 0 0 0 1 1 1 1 0 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 1 1 0 1
 0 1 1 0 1 1 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 1 0 1 1 1 0 0 1 0 1
 0 0 1 1 0 1 0 1 1 0 1 1 0 1 0 1 1 0 1 1 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1 0
 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 1 1 1 0 1 0
 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0
 0 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1 1 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0 1 1
 1 0 1 0 0 0 0 1 0 1 1 0 1 0 1 1 1 0 0 1 1 0 1 0 0 1 0 0 1 1 1 1 0 1 1 0 0
 1 1 0 0 0 0 1 0 0 0 1 1 1 1 0 1 1 0 1 1 1 0 0 1 0 0 1 0 1 0 0 0 1 1 1 0 0
 1 1 0 1 1 0 1 0 0 1 1 0 1 0 0 0 1 0 0 1 1 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 0
 0 0 1 0 1 0 0 1 1 0 1 0 1 0 0 0 1 0 1 1 1 1 0 1 0 0 0 0 0 0 0 1 0 1 1 1 1
 0 1 0 0 1 1 1 0 1 0 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0
 0 1 1 1 1 1 0 0 1 0 1 0 0 1 0 0 1 0 1 1 1 0 1 1 0 1 1 1 0 0 0 0 1 0 1 1 1
 0 0 0 0 1 1 0 0 0 0 0 1 


Evaluating...

Training Loss: 0.663
Validation Loss: 0.649

 Epoch 2 / 10

Evaluating...

Training Loss: 0.660
Validation Loss: 0.649

 Epoch 3 / 10

Evaluating...

Training Loss: 0.664
Validation Loss: 0.652

 Epoch 4 / 10

Evaluating...

Training Loss: 0.651
Validation Loss: 0.653

 Epoch 5 / 10

Evaluating...

Training Loss: 0.654
Validation Loss: 0.651

 Epoch 6 / 10

Evaluating...

Training Loss: 0.655
Validation Loss: 0.647

 Epoch 7 / 10

Evaluating...

Training Loss: 0.639
Validation Loss: 0.657

 Epoch 8 / 10

Evaluating...

Training Loss: 0.655
Validation Loss: 0.643

 Epoch 9 / 10

Evaluating...

Training Loss: 0.650
Validation Loss: 0.646

 Epoch 10 / 10

Evaluating...

Training Loss: 0.644
Validation Loss: 0.639
0.64
Fold 4 started at Wed Oct 14 14:30:29 2020
Class Weights: [0.88385827 1.15128205]

 Epoch 1 / 10


 0 1 0 0 0 1 1 1 1 0 1 1 1 0 1 1 0 0 1 0 1 0 0 1 1 0 0 1 0 1 0 0 0 1 1 1 0
 0 0 0 1 0 1 0 0 1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 1 1 1 1 1
 1 0 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 0 1 1 1 1 0 0 1 0 1 0 0
 1 0 1 0 1 1 0 1 1 0 1 0 0 1 0 1 1 0 0 0 0 1 0 1 0 0 0 1 1 0 0 0 1 0 0 1 0
 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 1 0 0 1 1 0 1 1 0 1 1 0 1 0 0 1 1 0 0 0 0
 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 0 1 0 1 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 1 0 1 1 1 0 1 0 1 1 0 0 0 1 1 0 0 1 1 0 0 1 1 1 0 1
 0 0 0 0 1 0 1 1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 0 1 1 1 0 1 1 0 1 0 1 0 0 0
 1 0 0 1 1 1 1 0 1 1 0 1 1 1 0 0 1 0 0 0 1 0 1 0 0 0 1 1 1 1 0 0 1 1 0 0 1
 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 1 0 0
 1 0 1 0 1 1 0 1 0 1 1 0 0 0 1 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1
 0 1 0 1 1 1 0 1 0 1 1 1 0 0 0 0 0 0 0 1 1 1 1 0 1 1 0 0 0 1 0 0 0 1 1 0 0
 1 0 0 1 1 1 1 1 0 1 0 1 0 0 1 0 0 1 0 1 1 1 0 1 0 1 1 1 0 0 0 0 1 0 1 1 0
 0 0 0 0 1 1 0 0 0 0 0 0 


Evaluating...

Training Loss: 0.652
Validation Loss: 0.646

 Epoch 2 / 10

Evaluating...

Training Loss: 0.650
Validation Loss: 0.644

 Epoch 3 / 10

Evaluating...

Training Loss: 0.642
Validation Loss: 0.646

 Epoch 4 / 10

Evaluating...

Training Loss: 0.642
Validation Loss: 0.644

 Epoch 5 / 10

Evaluating...

Training Loss: 0.640
Validation Loss: 0.644

 Epoch 6 / 10

Evaluating...

Training Loss: 0.643
Validation Loss: 0.643

 Epoch 7 / 10

Evaluating...

Training Loss: 0.657
Validation Loss: 0.645

 Epoch 8 / 10

Evaluating...

Training Loss: 0.653
Validation Loss: 0.642

 Epoch 9 / 10

Evaluating...

Training Loss: 0.633
Validation Loss: 0.640

 Epoch 10 / 10

Evaluating...

Training Loss: 0.640
Validation Loss: 0.639
0.65
Fold 5 started at Wed Oct 14 14:31:04 2020
Class Weights: [0.88212181 1.15424165]

 Epoch 1 / 10


 1 0 0 0 1 1 0 1 1 1 0 1 1 0 0 1 0 1 0 0 0 1 0 0 1 1 0 1 0 0 0 1 1 1 0 0 0
 0 0 0 1 0 1 0 1 1 1 0 0 1 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 1 0 0 0 1 1 1 0 1
 1 0 1 1 0 1 1 1 0 1 1 0 1 0 0 0 0 0 1 0 0 0 0 1 1 0 1 1 1 0 1 0 1 0 1 1 0
 1 0 1 1 0 1 0 1 0 1 1 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 0 0 0 1 0
 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 1 0 1 0 1 0 0 1 0 1 1
 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 1 0 0 0 0 0 1 1 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 1 0 1 1 0 0 0 1 0 1 0 0 1 1 0 1 1
 0 1 0 0 0 0 0 0 1 0 1 1 1 0 0 1 1 1 0 1 0 0 0 0 0 1 1 1 1 0 1 1 0 1 1 1 0
 0 0 0 0 1 0 0 0 1 1 1 1 1 0 1 1 0 1 1 1 0 0 1 1 0 0 0 1 0 0 0 0 1 1 1 1 0
 0 0 1 0 0 1 1 0 1 0 1 1 0 0 0 1 0 0 1 1 0 1 1 0 0 0 0 1 0 0 0 1 0 0 1 0 0
 0 1 1 0 0 1 1 0 1 1 0 1 0 1 1 0 0 0 0 0 1 1 1 1 0 1 0 0 0 0 0 0 0 1 0 1 1
 1 1 1 1 0 0 0 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 1 0 0 0 1 0 0 0
 1 1 0 0 1 0 0 1 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 0 1 1 1 0 0 0 0 1
 0 1 1 1 1 0 0 0 0 0 1 1 


Evaluating...

Training Loss: 0.641
Validation Loss: 0.644

 Epoch 2 / 10

Evaluating...

Training Loss: 0.645
Validation Loss: 0.649

 Epoch 3 / 10

Evaluating...

Training Loss: 0.626
Validation Loss: 0.641

 Epoch 4 / 10

Evaluating...

Training Loss: 0.633
Validation Loss: 0.652

 Epoch 5 / 10

Evaluating...

Training Loss: 0.640
Validation Loss: 0.638

 Epoch 6 / 10

Evaluating...

Training Loss: 0.636
Validation Loss: 0.648

 Epoch 7 / 10

Evaluating...

Training Loss: 0.656
Validation Loss: 0.643

 Epoch 8 / 10

Evaluating...

Training Loss: 0.636
Validation Loss: 0.654

 Epoch 9 / 10

Evaluating...

Training Loss: 0.629
Validation Loss: 0.641

 Epoch 10 / 10

Evaluating...

Training Loss: 0.623
Validation Loss: 0.648
0.71
Fold 6 started at Wed Oct 14 14:31:33 2020
Class Weights: [0.88212181 1.15424165]

 Epoch 1 / 10


 1 1 1 1 0 1 1 1 0 1 1 0 0 1 0 1 0 0 0 1 1 0 0 1 1 0 1 0 0 0 1 1 1 0 0 0 0
 0 0 0 1 0 1 0 0 0 1 1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 1 1
 1 1 0 1 1 0 1 1 1 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 1 0 1 1 0 0 1
 0 1 0 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 0 1 1 0 0 0 0 1 0 1 0 0 0 1 1 1 0 0
 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 1 1 0 1 0 0 0 1 1 0 1 1 0 1 0
 1 0 1 0 1 1 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 0
 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 1 1 0 0 0 1 0 1
 0 0 1 1 0 0 1 1 1 0 1 0 0 0 0 1 0 1 1 1 0 1 1 1 0 0 1 1 1 1 0 0 1 0 0 0 1
 1 1 1 1 1 0 1 0 1 1 0 0 0 0 0 1 0 0 0 1 1 1 1 0 1 0 1 1 1 0 0 1 1 0 0 0 1
 0 1 0 0 0 1 1 1 0 0 0 1 1 0 0 1 1 0 0 1 0 1 1 0 1 0 0 0 1 0 0 1 1 0 1 1 0
 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 1 0 1 0 1 1 0 1 0 0 1 0 1 1 1 1 0 0 0
 0 0 0 0 0 1 0 1 1 1 0 1 0 0 1 1 1 0 1 0 1 0 0 0 0 0 0 1 1 1 1 0 0 1 1 0 0
 0 1 0 0 0 1 1 0 1 0 0 1 1 1 1 1 0 0 1 0 1 0 1 0 0 1 1 0 1 1 0 1 1 0 1 1 0
 0 0 0 1 0 1 1 1 1 0 0 0 


Evaluating...

Training Loss: 0.638
Validation Loss: 0.594

 Epoch 2 / 10

Evaluating...

Training Loss: 0.630
Validation Loss: 0.593

 Epoch 3 / 10

Evaluating...

Training Loss: 0.633
Validation Loss: 0.590

 Epoch 4 / 10

Evaluating...

Training Loss: 0.633
Validation Loss: 0.585

 Epoch 5 / 10

Evaluating...

Training Loss: 0.643
Validation Loss: 0.586

 Epoch 6 / 10

Evaluating...

Training Loss: 0.625
Validation Loss: 0.580

 Epoch 7 / 10

Evaluating...

Training Loss: 0.619
Validation Loss: 0.593

 Epoch 8 / 10

Evaluating...

Training Loss: 0.629
Validation Loss: 0.582

 Epoch 9 / 10

Evaluating...

Training Loss: 0.628
Validation Loss: 0.580

 Epoch 10 / 10

Evaluating...

Training Loss: 0.621
Validation Loss: 0.587
0.74
Fold 7 started at Wed Oct 14 14:32:07 2020
Class Weights: [0.88212181 1.15424165]

 Epoch 1 / 10


 1 0 0 0 1 1 1 0 1 1 1 0 0 1 0 1 0 0 0 1 1 0 0 1 1 0 0 0 0 1 1 1 0 0 0 0 0
 0 0 1 0 1 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 1 1 1 1 0
 1 1 0 1 1 0 1 1 1 0 1 1 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 1 1
 0 1 0 1 0 1 1 0 1 0 0 1 0 1 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0
 1 0 1 0 1 1 0 0 0 0 1 1 0 0 0 0 1 1 0 1 1 0 1 1 1 0 0 1 0 1 0 0 0 0 0 0 1
 0 1 0 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0
 0 0 0 0 0 0 1 0 0 1 1 1 0 0 1 0 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 1 1 1 1 0 0
 0 1 1 1 0 1 0 1 1 0 0 1 1 1 0 1 0 0 1 0 0 0 1 1 1 1 0 1 0 1 0 1 1 0 0 0 0
 1 0 0 0 1 1 1 1 1 0 1 0 1 1 1 0 0 1 1 0 0 1 0 0 1 1 1 0 0 0 1 1 0 0 1 0 0
 1 0 0 1 1 0 1 0 0 1 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 1 1 0 0
 1 0 1 0 1 1 0 1 0 1 1 0 0 0 1 0 0 1 1 1 1 0 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1
 1 1 1 0 0 1 1 1 0 0 1 1 1 0 0 0 0 0 0 1 1 1 1 0 1 1 0 0 0 1 0 0 0 1 1 0 0
 1 0 1 1 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 1 1 0 1 1 0 1 1 1 0 0 0 1 0 1 1 1 1
 0 0 0 1 1 0 0 0 0 0 1 1 


Evaluating...

Training Loss: 0.618
Validation Loss: 0.611

 Epoch 2 / 10

Evaluating...

Training Loss: 0.624
Validation Loss: 0.611

 Epoch 3 / 10

Evaluating...

Training Loss: 0.626
Validation Loss: 0.614

 Epoch 4 / 10

Evaluating...

Training Loss: 0.624
Validation Loss: 0.609

 Epoch 5 / 10

Evaluating...

Training Loss: 0.624
Validation Loss: 0.607

 Epoch 6 / 10

Evaluating...

Training Loss: 0.618
Validation Loss: 0.608

 Epoch 7 / 10

Evaluating...

Training Loss: 0.615
Validation Loss: 0.607

 Epoch 8 / 10

Evaluating...

Training Loss: 0.618
Validation Loss: 0.607

 Epoch 9 / 10

Evaluating...

Training Loss: 0.623
Validation Loss: 0.610

 Epoch 10 / 10

Evaluating...

Training Loss: 0.610
Validation Loss: 0.611
0.67
Fold 8 started at Wed Oct 14 14:32:41 2020
Class Weights: [0.88310413 1.1525641 ]

 Epoch 1 / 10


 0 0 0 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 0 0 1 1 0 0 1 1 0 1 0 0 0 1 1 1 0 0 0
 0 0 0 1 0 1 0 0 0 1 1 1 1 0 0 1 0 0 1 0 0 0 1 0 0 1 0 0 0 1 0 0 0 1 1 1 1
 0 1 1 0 1 0 1 1 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0
 1 0 0 1 1 0 1 1 1 0 1 1 0 0 0 1 1 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 1
 0 0 0 1 0 0 0 0 1 0 1 0 1 0 1 0 0 0 1 1 0 0 1 0 0 1 1 0 1 1 0 1 1 0 1 0 0
 1 0 1 1 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 1 0 0 0 1 0 1 0 1 0 0 1 0 0 0 0 0
 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 1 0 1 1 0 0 0 0 0 1 0 0 1
 1 0 0 1 1 1 0 1 0 0 0 0 1 0 1 1 0 1 0 1 1 1 0 1 1 1 0 1 0 1 0 0 0 1 1 1 1
 0 1 1 1 0 1 0 0 0 0 0 1 0 0 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 0 0 0 1 0 1 0 0
 0 1 1 1 1 0 0 0 1 1 0 0 1 1 0 0 1 0 0 1 1 0 1 0 0 0 1 0 0 0 1 1 0 1 1 0 0
 0 0 1 0 0 1 0 0 1 0 0 0 1 0 1 0 1 0 1 0 1 0 1 1 0 0 0 1 0 0 1 1 1 1 0 1 0
 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 0 1 1 1 0 1 0 1 1 1 0 0 0 0 0 0 0 0 1 1
 1 1 0 0 1 1 0 0 0 0 0 0 1 0 0 1 0 0 1 1 1 1 0 0 1 0 1 0 1 0 0 1 1 0 1 1 1
 1 1 0 1 1 0 0 0 0 1 1 1 


Evaluating...

Training Loss: 0.624
Validation Loss: 0.580

 Epoch 2 / 10

Evaluating...

Training Loss: 0.627
Validation Loss: 0.574

 Epoch 3 / 10

Evaluating...

Training Loss: 0.623
Validation Loss: 0.570

 Epoch 4 / 10

Evaluating...

Training Loss: 0.603
Validation Loss: 0.570

 Epoch 5 / 10

Evaluating...

Training Loss: 0.608
Validation Loss: 0.569

 Epoch 6 / 10

Evaluating...

Training Loss: 0.627
Validation Loss: 0.570

 Epoch 7 / 10

Evaluating...

Training Loss: 0.624
Validation Loss: 0.567

 Epoch 8 / 10

Evaluating...

Training Loss: 0.625
Validation Loss: 0.568

 Epoch 9 / 10

Evaluating...

Training Loss: 0.632
Validation Loss: 0.565

 Epoch 10 / 10

Evaluating...

Training Loss: 0.617
Validation Loss: 0.570
0.7676767676767676
Fold 9 started at Wed Oct 14 14:33:16 2020
Class Weights: [0.88310413 1.1525641 ]

 Epoch 1 / 10


 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 0 0 0 1 1 0 0 1 0 1 0 1 1 1 0 0 0 0 0 0 1 0
 1 0 0 0 1 1 1 1 1 0 1 0 0 1 0 1 0 0 0 0 1 0 0 0 1 1 1 0 1 1 0 1 1 0 1 1 0
 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 1 0 1 0 1 1 1 0 0 1 0 1 0 0 1 1 1 0 1 1 0
 1 1 0 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 1
 1 0 1 0 0 0 0 1 1 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 1
 0 1 0 0 0 0 0 1 0 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 0 0 1 0 1 0 0 1 1 0 0 1 1 1 0 1 0 0 0 1
 0 1 1 0 1 1 1 1 0 0 1 1 1 0 0 0 1 0 0 0 1 1 1 0 1 1 0 1 0 1 1 0 0 0 0 1 0
 0 0 1 1 1 1 1 1 0 1 1 0 1 1 0 0 0 1 1 0 0 1 1 1 1 0 0 0 1 1 0 1 1 0 0 1 0
 0 1 1 0 1 0 0 0 1 0 0 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 1 0
 1 0 1 1 0 1 0 1 1 0 0 1 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 0 1
 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 1 1 0 0 1 0 0 1 1 1 0
 0 1 0 1 0 0 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 1 0 1 1 1 1 0 0 0 0 1 1 0 0 0 0
 0 1 1 1 0 0 0 1 0 0 0 0 


Evaluating...

Training Loss: 0.614
Validation Loss: 0.628

 Epoch 2 / 10

Evaluating...

Training Loss: 0.618
Validation Loss: 0.640

 Epoch 3 / 10

Evaluating...

Training Loss: 0.595
Validation Loss: 0.625

 Epoch 4 / 10

Evaluating...

Training Loss: 0.606
Validation Loss: 0.634

 Epoch 5 / 10

Evaluating...

Training Loss: 0.615
Validation Loss: 0.647

 Epoch 6 / 10

Evaluating...

Training Loss: 0.613
Validation Loss: 0.627

 Epoch 7 / 10

Evaluating...

Training Loss: 0.603
Validation Loss: 0.634

 Epoch 8 / 10

Evaluating...

Training Loss: 0.597
Validation Loss: 0.627

 Epoch 9 / 10

Evaluating...

Training Loss: 0.606
Validation Loss: 0.633

 Epoch 10 / 10

Evaluating...

Training Loss: 0.609
Validation Loss: 0.621
0.6868686868686869
Bert_davidson | 0.6796  0.6864  0.6850  0.6835 


In [54]:
# set initial loss to infinite
best_valid_loss = float('inf')

# empty lists to store training and validation loss of each epoch
train_losses=[]
valid_losses=[]

#for each epoch
for epoch in range(epochs):
     
    print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
    
    #train model
    train_loss, _ = train()
    
    #evaluate model
    valid_loss, _ = evaluate()
    
    #save the best model
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'saved_weights.pt')
    
    # append training and validation loss
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
    
    print(f'\nTraining Loss: {train_loss:.3f}')
    print(f'Validation Loss: {valid_loss:.3f}')


 Epoch 1 / 20

Evaluating...

Training Loss: 0.672
Validation Loss: 0.667

 Epoch 2 / 20

Evaluating...

Training Loss: 0.668
Validation Loss: 0.666

 Epoch 3 / 20

Evaluating...

Training Loss: 0.670
Validation Loss: 0.665

 Epoch 4 / 20

Evaluating...

Training Loss: 0.672
Validation Loss: 0.664

 Epoch 5 / 20

Evaluating...

Training Loss: 0.668
Validation Loss: 0.663

 Epoch 6 / 20

Evaluating...

Training Loss: 0.666
Validation Loss: 0.663

 Epoch 7 / 20

Evaluating...

Training Loss: 0.668
Validation Loss: 0.662

 Epoch 8 / 20

Evaluating...

Training Loss: 0.666
Validation Loss: 0.661

 Epoch 9 / 20

Evaluating...

Training Loss: 0.666
Validation Loss: 0.661

 Epoch 10 / 20

Evaluating...

Training Loss: 0.664
Validation Loss: 0.660

 Epoch 11 / 20

Evaluating...

Training Loss: 0.668
Validation Loss: 0.660

 Epoch 12 / 20

Evaluating...

Training Loss: 0.664
Validation Loss: 0.659

 Epoch 13 / 20

Evaluating...

Training Loss: 0.664
Validation Loss: 0.657

 Epoch 14 / 20

Eval

In [51]:
#load weights of best model
path = 'saved_weights.pt'
model.load_state_dict(torch.load(path))

<All keys matched successfully>

In [52]:
# get predictions for test data
with torch.no_grad():
  preds = model(test_seq.to(device), test_mask.to(device))
  preds = preds.detach().cpu().numpy()

In [53]:
preds = np.argmax(preds, axis = 1)
print(classification_report(test_y, preds))

              precision    recall  f1-score   support

           0       0.67      0.68      0.67        85
           1       0.57      0.55      0.56        65

    accuracy                           0.63       150
   macro avg       0.62      0.62      0.62       150
weighted avg       0.63      0.63      0.63       150

