In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
! pip install transformers



In [8]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import logging
import gzip
import gensim 
import re
import spacy
import math
from collections import Counter
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import string
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence, pad_sequence
from transformers import BertTokenizer, BertModel, DistilBertConfig, TFDistilBertModel, DistilBertModel, DistilBertTokenizer
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [9]:
print(torch.cuda.is_available())
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device

True


'cuda:0'

# Preprocessing & Loading data

## Preprocess 

### Clothing Review Dataset

In [11]:
# Load dataset for clothing reviews
reviews = pd.read_csv("/content/drive/MyDrive/McGill/Comp 550/Womens Clothing E-Commerce Reviews.csv")
reviews = reviews.dropna()
print(reviews.shape)
reviews.head()

(19662, 11)


Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name
2,2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses
3,3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants
4,4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses
5,5,1080,49,Not for the very petite,"I love tracy reese dresses, but this one is no...",2,0,4,General,Dresses,Dresses
6,6,858,39,Cagrcoal shimmer fun,I aded this in my basket at hte last mintue to...,5,1,1,General Petite,Tops,Knits


In [12]:
reviews['Title'] = reviews['Title'].fillna('')
reviews['Review Text'] = reviews['Review Text'].fillna('')
reviews['review'] = reviews['Title'] + ' ' + reviews['Review Text']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [13]:
reviews = reviews[['review', 'Rating']]
reviews.columns = ['review', 'rating']
reviews.head()

Unnamed: 0,review,rating
2,Some major design flaws I had such high hopes ...,3
3,"My favorite buy! I love, love, love this jumps...",5
4,Flattering shirt This shirt is very flattering...,5
5,Not for the very petite I love tracy reese dre...,2
6,Cagrcoal shimmer fun I aded this in my basket ...,5


#### Tokenize each sentence

In [14]:
#take advantage of nltk to tokenize all sentences
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
en_stop_words = set(stopwords.words('english'))
tokenizer = nltk.RegexpTokenizer(r'\w+|\$[\d\.]+')

def tokenize_sent(sent):
    
    tokenized = tokenizer.tokenize(sent)
    filtered = [w.lower() for w in tokenized if w.lower() not in en_stop_words]
    return filtered

reviews['tokenized'] = reviews['review'].apply(lambda x: tokenize_sent(x))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [15]:
# Sanity cehck for tokenizers
print(reviews.head(3))

                                              review  ...                                          tokenized
2  Some major design flaws I had such high hopes ...  ...  [major, design, flaws, high, hopes, dress, rea...
3  My favorite buy! I love, love, love this jumps...  ...  [favorite, buy, love, love, love, jumpsuit, fu...
4  Flattering shirt This shirt is very flattering...  ...  [flattering, shirt, shirt, flattering, due, ad...

[3 rows x 3 columns]


#### Encode and truncate sentence

In [16]:
# get unique words in the corpus
all_words = []
for x in reviews['tokenized']:
    all_words.extend(x)

word_set = list(set(all_words))
word_count = Counter(all_words)

# filter out words with low frequencylm
for word_list in reviews.tokenized:
    new_list = []
    for word in word_list:
        if word_count[word] > 2:
            new_list.append(word)
    word_list = new_list

# update set of words after removing the ones with low frequency
new_word_list = []
for x in reviews['tokenized']:
    new_word_list.extend(x)
word_set = list(set(new_word_list))

# map each unique words & unknown token in reviews.encoded to an index
word2index = {}
word2index['<UNK>'] = 0
word2index['<PAD>'] = 1

for i, word in enumerate(word_set, 2):
    word2index[word] = i

# encode the original sequence
def encode(sent_list):
    result = []
    for x in sent_list:
        index = word2index[x]
        result.append(index)
    return result

reviews['encoded'] = reviews.tokenized.apply(lambda x: encode(x))

print(reviews.head())

# get sequence average length
total_len = 0
for x in reviews.encoded:
    total_len += len(x)

ave_len = math.floor(total_len/reviews.shape[0])

# filter out long sequences --> encode all sequence to length = ave_len
# pad short sequence

for i, row in reviews.iterrows():
    size = min(len(row.encoded), ave_len)
    new_encoded = row.encoded[:size]
    if size < ave_len:
        for j in range(0, ave_len - len(row.encoded)):
            new_encoded.append(1)
    reviews.at[i, 'encoded'] = new_encoded

reviews['review_length'] = reviews.encoded.apply(lambda x: len(x))

                                              review  ...                                            encoded
2  Some major design flaws I had such high hopes ...  ...  [5489, 3396, 11243, 8384, 1856, 12902, 12804, ...
3  My favorite buy! I love, love, love this jumps...  ...  [1510, 11679, 4446, 4446, 4446, 11572, 14026, ...
4  Flattering shirt This shirt is very flattering...  ...  [8197, 5160, 5160, 8197, 5313, 14088, 31, 7605...
5  Not for the very petite I love tracy reese dre...  ...  [11698, 4446, 3302, 6935, 6438, 10538, 11698, ...
6  Cagrcoal shimmer fun I aded this in my basket ...  ...  [489, 8457, 14026, 6599, 10855, 1001, 12844, 4...

[5 rows x 4 columns]


In [17]:
# sanity check for encoding:
print(len(all_words))
print(ave_len)
lengths = [len(x) for x in reviews.encoded]
print(max(lengths))
print(min(lengths))
print(set(reviews.rating))
review_length = reviews.review_length
reviews.head()
print(len(word_set))

639434
32
32
32
{1, 2, 3, 4, 5}
14088


In [18]:
# Train test split from skearln
data_size = len(reviews['encoded'])
assert data_size == len(reviews['rating']) 
X, y = list(zip(list(reviews['encoded']),(list(reviews['review_length'])))), list(reviews['rating'])
# X(data, length)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43)

In [19]:
# Sanity check for length match
print(X_train[0])

([8756, 10050, 6548, 3265, 1717, 10050, 8181, 7722, 11679, 8668, 8756, 3265, 3349, 4117, 5883, 4446, 9862, 13475, 2988, 11715, 13727, 8668, 7806, 364, 3265, 3349, 10667, 331, 4893, 7806, 2611, 3349], 32)


## Dataset and Dataloader

### Clothing Review Dataset

In [20]:
class Dataset_word2vec(Dataset):
    def __init__(self, X, y): 
        self.X = X
        y_new = [i-1 for i in y]
        self.y = y_new

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return  self.X[idx], self.y[idx]

In [21]:
trainSet_w2v = Dataset_word2vec(X_train, y_train)
testSet_w2v = Dataset_word2vec(X_test, y_test)

In [22]:
print(trainSet_w2v[0])

(([8756, 10050, 6548, 3265, 1717, 10050, 8181, 7722, 11679, 8668, 8756, 3265, 3349, 4117, 5883, 4446, 9862, 13475, 2988, 11715, 13727, 8668, 7806, 364, 3265, 3349, 10667, 331, 4893, 7806, 2611, 3349], 32), 4)


In [23]:
def myCollate(batch):
    data = [item[0] for item in batch]
    label = [item[1] for item in batch]
    # sort sequence according to it's length in a batch
    data = data
    # data.sort(key=lambda x: x[1], reverse = True)

    # data (review: list, length: int)
    review = torch.tensor([x[0] for x in data], dtype = torch.long)
    label = torch.tensor(label, dtype = torch.long)
    seq_len = [x[1] for x in data]
    return review, label, seq_len

In [24]:
trainLoader_w2v = DataLoader(dataset = trainSet_w2v, batch_size = 16, collate_fn = myCollate, shuffle = True, drop_last = True)
testLoader_w2v = DataLoader(dataset = testSet_w2v, batch_size = 16, collate_fn = myCollate, shuffle = True, drop_last = True)

In [25]:
it = iter(trainLoader_w2v)
x, y, seq_len = it.next()

In [26]:
x.data

tensor([[ 4879,  8197, 11397,  2261,  2524, 12258, 13751,  9236,  4893,  1891,
           901,  5805,  3071, 13013,  6872, 10931, 13751, 10725,  8052, 10538,
         13559,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1],
        [ 7307, 11316, 11397,  7324,   141,  6820, 13359,  8560,  3487,  5160,
          3429, 12670,  1993,  4145, 10628, 12058, 10108, 14057,  2140,  3071,
          2013,  9481, 10801,  3265,  3549,     1,     1,     1,     1,     1,
             1,     1],
        [ 8378,   669, 10952,  8485, 12902,  2485,   408,  2270, 12288,  9547,
          6052, 10397, 10687,  5453,  6518,  8378, 12036, 12639,   475, 12902,
          8010, 10015,  6503, 10967,  7360, 12477,  7054, 12902, 10888,  6706,
          7603,  9855],
        [ 4893,  4462,  8384, 13078, 13372, 12104,  2808,  3158,  5121, 13183,
         11824, 11876,  3158,  2724, 13372,  7706,  5420, 13170,  8384, 13078,
           141,  9195,  6365,  9596,  6826, 10272,  4730,  

In [27]:
y

tensor([4, 2, 4, 3, 2, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 3])

In [28]:
seq_len

[32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32]

## Embedding

In [33]:
word2vec = gensim.models.KeyedVectors.load_word2vec_format(
    '/content/drive/MyDrive/McGill/Comp 550/GoogleNews-vectors-negative300.bin', binary = True)
print(type(word2vec))
weight = torch.FloatTensor(word2vec.vectors)
word2vec_embedding = nn.Embedding.from_pretrained(weight)

# sanity check for word2vec embedding
temp = word2vec['hello', 'world']
sanity_w2v = torch.Tensor(temp)
print(sanity_w2v.shape)

<class 'gensim.models.keyedvectors.Word2VecKeyedVectors'>
torch.Size([2, 300])


In [34]:
# create embedding matrix lstm
def emb_matrix(model, all_words, word2index):
    matrix_size = len(word_set) + 2
    emb_matrix = np.zeros((matrix_size, 300))
    emb_matrix[0] = np.random.uniform(-0.25, 0.25, 300) # vector for UNK
    emb_matrix[1] = np.zeros(300) # vector for padding, has no weight

    # vector for every other word in the dictionary
    for i in range(2, len(all_words)+2):
        word = all_words[i-2]
        index = word2index[word]
        try:
            vector = model[word]
        except Exception as e:
            # word does not exist in the pretrained embedding
            vector = emb_matrix[0]
        emb_matrix[index] = vector
    return emb_matrix

In [35]:
embeddings_w2v = emb_matrix(word2vec, word_set, word2index)

In [39]:
print(embeddings_w2v.shape)

(14090, 300)


In [40]:
def validation_metrics(model, valid_dl):
    print("current in vlaidaiton")
    model.eval()
    correct = 0
    total = 0
    sum_loss = 0.0
    sum_rmse = 0.0
    criterion = nn.CrossEntropyLoss()
    model = model.to(device)
    y_total = []
    y_pred_total = []
    for x, y, seq_len in valid_dl:
        x = x.long().to(device)
        y = y.long().to(device)
        y_hat = model(x, seq_len)
        loss = criterion(y_hat, y)
        
        # print(f"shape of y: {y.shape}")
        # print(f"shape of y_pred: {y_hat.shape}")
        # print(y_hat)
        pred = torch.argmax(y_hat, 1)
        y_total.extend(y.tolist())
        y_pred_total.extend(pred.tolist())
        
        # print(pred)
        correct += sum((pred == y))
        total += y.shape[0]
        sum_loss += loss.item()*y.shape[0]
        # print(len(y_total))
        # print(len(y_pred_total))

      # calculate precision, recall, and f1
    f1 = (f1_score(y_total, y_pred_total, average='weighted'))
    precision = (precision_score(y_total, y_pred_total, average='weighted'))
    recall =(recall_score(y_total, y_pred_total, average='weighted'))
    return sum_loss/total, correct/total, precision, recall, f1

In [41]:
def train_model(model, fname, epochs = 50, lr = 0.0001):
    parameters = filter(lambda p:p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameters, lr = lr)
    criterion = nn.CrossEntropyLoss()
    model = model.to(device)
    PATH = '/content/drive/MyDrive/McGill/Comp 550/' + fname
    epoch = 0
    best_val_acc = 0.0
    for i in range(epochs):
        epoch += 1
        print(f"At epoch {i}")
        batch_count = 0
        model.train()
        sum_loss = 0.0
        total = 0.0
        train_correct = 0.0
        for x, y, seq_len in tqdm(trainLoader_w2v):
            x = x.long().to(device)
            y = y.long().to(device)
            batch_count += 1
            optimizer.zero_grad()
            y_pred = model(x, seq_len)
            pred = torch.argmax(y_pred, 1)
            train_correct += sum((pred == y))
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
            sum_loss += loss.item()*y.shape[0]
            total += y.shape[0]
        val_loss, val_acc, precision, recall, f1 = validation_metrics(model, testLoader_w2v)
        print("train loss %.3f, train accuracy %.3f, val loss %.3f, val accuracy %.3f, precision %.3f, recall %.3f, F1 %.3f" 
              % (sum_loss/total, train_correct/total, val_loss, val_acc, precision, recall, f1))
        if val_acc > best_val_acc and i>=1:
            best_val_acc = val_acc
            NEW_PATH = PATH+f'_{i}_{val_acc}.pth'
            #torch.save(model.state_dict(), NEW_PATH)
            print(f"\t=> Best model saved at {i}th epoch with valication accuracy of {val_acc}")

In [47]:
weight = torch.FloatTensor(word2vec.vectors)
word2vec_embedding = nn.Embedding.from_pretrained(weight)

class LSTM_word2vec(torch.nn.Module):
    def __init__(self, emb_matrix, embedding_dim, hidden_dim):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.embeddings = nn.Embedding(emb_matrix.shape[0], embedding_dim, padding_idx = 0)
        self.embeddings.weight.data.copy_(torch.from_numpy(emb_matrix))
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.linear = nn.Linear(hidden_dim, 5)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x, seq_len):
        x = self.embeddings(x)
        # x = nn.utils.rnn.pack_padded_sequence(x, self.embedding_dim, batch_first = True)
        x = self.dropout(x)
        # x_padded = pack_padded_sequence(x, seq_len, batch_first = True)
        output, (h_n, c_n) = self.lstm(x)
        # print(h_n[-1].shape)
        # output (N, ave_len, num_class)
        # reshape to out (N, num_class)

        return self.linear(h_n[-1])

In [42]:
distil_bert = 'distilbert-base-uncased'
        #config = DistilBertConfig(dropout=0.2, attention_dropout=0.2)
        #config.output_hidden_states = False
transformer_model = DistilBertModel.from_pretrained('distilbert-base-uncased')
        
embedding =  list(transformer_model.children())[0]
bert_word_embeddings = list(embedding.children())[0]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/256M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [43]:
class LSTM_fixed_length_Bert(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim) :
        super().__init__()
        self.embeddings = bert_word_embeddings
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 5)
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x, l):
        x = self.embeddings(x)
        x = self.dropout(x)
        lstm_out, (ht, ct) = self.lstm(x)
        return self.linear(ht[-1])

In [45]:
model_bert = LSTM_fixed_length_Bert(len(word_set), 768, 100)
train_model(model_bert, fname = 'bert_lstm_clothing', epochs = 15)

At epoch 0


100%|██████████| 983/983 [00:07<00:00, 127.58it/s]


current in vlaidaiton


  _warn_prf(average, modifier, msg_start, len(result))


train loss 1.045, train accuracy 0.600, val loss 0.891, val accuracy 0.624, precision 0.554, recall 0.624, F1 0.581
At epoch 1


100%|██████████| 983/983 [00:07<00:00, 133.80it/s]


current in vlaidaiton


  _warn_prf(average, modifier, msg_start, len(result))


train loss 0.721, train accuracy 0.701, val loss 0.911, val accuracy 0.618, precision 0.597, recall 0.618, F1 0.607
	=> Best model saved at 1th epoch with valication accuracy of 0.6183673143386841
At epoch 2


100%|██████████| 983/983 [00:07<00:00, 134.13it/s]


current in vlaidaiton


  _warn_prf(average, modifier, msg_start, len(result))


train loss 0.641, train accuracy 0.740, val loss 0.956, val accuracy 0.583, precision 0.619, recall 0.583, F1 0.590
At epoch 3


100%|██████████| 983/983 [00:07<00:00, 133.59it/s]


current in vlaidaiton
train loss 0.569, train accuracy 0.770, val loss 1.004, val accuracy 0.633, precision 0.614, recall 0.633, F1 0.610
	=> Best model saved at 3th epoch with valication accuracy of 0.6331632137298584
At epoch 4


100%|██████████| 983/983 [00:07<00:00, 133.95it/s]


current in vlaidaiton
train loss 0.521, train accuracy 0.798, val loss 1.047, val accuracy 0.634, precision 0.606, recall 0.634, F1 0.616
	=> Best model saved at 4th epoch with valication accuracy of 0.6344387531280518
At epoch 5


100%|██████████| 983/983 [00:07<00:00, 133.69it/s]


current in vlaidaiton
train loss 0.481, train accuracy 0.819, val loss 1.049, val accuracy 0.611, precision 0.609, recall 0.611, F1 0.608
At epoch 6


100%|██████████| 983/983 [00:07<00:00, 133.69it/s]


current in vlaidaiton
train loss 0.447, train accuracy 0.835, val loss 1.105, val accuracy 0.609, precision 0.609, recall 0.609, F1 0.608
At epoch 7


100%|██████████| 983/983 [00:07<00:00, 133.55it/s]


current in vlaidaiton
train loss 0.420, train accuracy 0.847, val loss 1.156, val accuracy 0.625, precision 0.591, recall 0.625, F1 0.601
At epoch 8


100%|██████████| 983/983 [00:07<00:00, 133.85it/s]


current in vlaidaiton
train loss 0.392, train accuracy 0.857, val loss 1.231, val accuracy 0.603, precision 0.603, recall 0.603, F1 0.599
At epoch 9


100%|██████████| 983/983 [00:07<00:00, 133.74it/s]


current in vlaidaiton
train loss 0.378, train accuracy 0.862, val loss 1.216, val accuracy 0.624, precision 0.596, recall 0.624, F1 0.605
At epoch 10


100%|██████████| 983/983 [00:07<00:00, 134.02it/s]


current in vlaidaiton
train loss 0.352, train accuracy 0.879, val loss 1.202, val accuracy 0.601, precision 0.605, recall 0.601, F1 0.602
At epoch 11


100%|██████████| 983/983 [00:07<00:00, 134.13it/s]


current in vlaidaiton
train loss 0.334, train accuracy 0.882, val loss 1.300, val accuracy 0.577, precision 0.602, recall 0.577, F1 0.587
At epoch 12


100%|██████████| 983/983 [00:07<00:00, 134.12it/s]


current in vlaidaiton
train loss 0.316, train accuracy 0.892, val loss 1.328, val accuracy 0.610, precision 0.591, recall 0.610, F1 0.598
At epoch 13


100%|██████████| 983/983 [00:07<00:00, 133.49it/s]


current in vlaidaiton
train loss 0.300, train accuracy 0.895, val loss 1.334, val accuracy 0.621, precision 0.591, recall 0.621, F1 0.600
At epoch 14


100%|██████████| 983/983 [00:07<00:00, 133.90it/s]


current in vlaidaiton
train loss 0.288, train accuracy 0.899, val loss 1.330, val accuracy 0.601, precision 0.595, recall 0.601, F1 0.596


In [48]:
model = LSTM_word2vec(embeddings_w2v, 300, 100)
train_model(model, fname='word2vec_lstm', epochs=20)

At epoch 0


100%|██████████| 983/983 [00:03<00:00, 253.07it/s]


current in vlaidaiton


  _warn_prf(average, modifier, msg_start, len(result))


train loss 1.137, train accuracy 0.550, val loss 0.970, val accuracy 0.584, precision 0.476, recall 0.584, F1 0.477
At epoch 1


100%|██████████| 983/983 [00:03<00:00, 254.73it/s]


current in vlaidaiton


  _warn_prf(average, modifier, msg_start, len(result))


train loss 0.928, train accuracy 0.615, val loss 0.898, val accuracy 0.630, precision 0.555, recall 0.630, F1 0.572
	=> Best model saved at 1th epoch with valication accuracy of 0.6301020383834839
At epoch 2


100%|██████████| 983/983 [00:03<00:00, 253.36it/s]


current in vlaidaiton


  _warn_prf(average, modifier, msg_start, len(result))


train loss 0.855, train accuracy 0.643, val loss 0.866, val accuracy 0.630, precision 0.582, recall 0.630, F1 0.594
At epoch 3


100%|██████████| 983/983 [00:03<00:00, 255.16it/s]


current in vlaidaiton


  _warn_prf(average, modifier, msg_start, len(result))


train loss 0.813, train accuracy 0.652, val loss 0.847, val accuracy 0.640, precision 0.591, recall 0.640, F1 0.607
	=> Best model saved at 3th epoch with valication accuracy of 0.640051007270813
At epoch 4


100%|██████████| 983/983 [00:03<00:00, 254.33it/s]


current in vlaidaiton


  _warn_prf(average, modifier, msg_start, len(result))


train loss 0.781, train accuracy 0.667, val loss 0.847, val accuracy 0.628, precision 0.608, recall 0.628, F1 0.612
At epoch 5


100%|██████████| 983/983 [00:03<00:00, 254.57it/s]


current in vlaidaiton


  _warn_prf(average, modifier, msg_start, len(result))


train loss 0.752, train accuracy 0.680, val loss 0.858, val accuracy 0.638, precision 0.603, recall 0.638, F1 0.617
At epoch 6


100%|██████████| 983/983 [00:03<00:00, 253.41it/s]


current in vlaidaiton


  _warn_prf(average, modifier, msg_start, len(result))


train loss 0.730, train accuracy 0.693, val loss 0.843, val accuracy 0.646, precision 0.611, recall 0.646, F1 0.627
	=> Best model saved at 6th epoch with valication accuracy of 0.6456632614135742
At epoch 7


100%|██████████| 983/983 [00:03<00:00, 255.96it/s]


current in vlaidaiton


  _warn_prf(average, modifier, msg_start, len(result))


train loss 0.704, train accuracy 0.699, val loss 0.856, val accuracy 0.645, precision 0.608, recall 0.645, F1 0.623
At epoch 8


100%|██████████| 983/983 [00:03<00:00, 255.72it/s]


current in vlaidaiton
train loss 0.681, train accuracy 0.714, val loss 0.874, val accuracy 0.636, precision 0.629, recall 0.636, F1 0.627
At epoch 9


100%|██████████| 983/983 [00:03<00:00, 256.37it/s]


current in vlaidaiton
train loss 0.665, train accuracy 0.722, val loss 0.944, val accuracy 0.646, precision 0.611, recall 0.646, F1 0.614
	=> Best model saved at 9th epoch with valication accuracy of 0.6461734175682068
At epoch 10


100%|██████████| 983/983 [00:03<00:00, 254.43it/s]


current in vlaidaiton
train loss 0.643, train accuracy 0.734, val loss 0.900, val accuracy 0.634, precision 0.622, recall 0.634, F1 0.627
At epoch 11


100%|██████████| 983/983 [00:03<00:00, 253.97it/s]


current in vlaidaiton
train loss 0.626, train accuracy 0.744, val loss 0.900, val accuracy 0.641, precision 0.624, recall 0.641, F1 0.627
At epoch 12


100%|██████████| 983/983 [00:03<00:00, 254.72it/s]


current in vlaidaiton
train loss 0.603, train accuracy 0.754, val loss 0.945, val accuracy 0.634, precision 0.624, recall 0.634, F1 0.627
At epoch 13


100%|██████████| 983/983 [00:03<00:00, 256.17it/s]


current in vlaidaiton
train loss 0.592, train accuracy 0.765, val loss 0.966, val accuracy 0.644, precision 0.616, recall 0.644, F1 0.625
At epoch 14


100%|██████████| 983/983 [00:03<00:00, 255.40it/s]


current in vlaidaiton
train loss 0.573, train accuracy 0.771, val loss 0.963, val accuracy 0.636, precision 0.617, recall 0.636, F1 0.625
At epoch 15


100%|██████████| 983/983 [00:03<00:00, 255.22it/s]


current in vlaidaiton
train loss 0.560, train accuracy 0.778, val loss 0.970, val accuracy 0.619, precision 0.620, recall 0.619, F1 0.618
At epoch 16


100%|██████████| 983/983 [00:03<00:00, 255.89it/s]


current in vlaidaiton
train loss 0.539, train accuracy 0.790, val loss 0.989, val accuracy 0.629, precision 0.624, recall 0.629, F1 0.626
At epoch 17


100%|██████████| 983/983 [00:03<00:00, 255.07it/s]


current in vlaidaiton
train loss 0.523, train accuracy 0.795, val loss 1.021, val accuracy 0.593, precision 0.628, recall 0.593, F1 0.603
At epoch 18


100%|██████████| 983/983 [00:03<00:00, 255.05it/s]


current in vlaidaiton
train loss 0.508, train accuracy 0.802, val loss 1.017, val accuracy 0.639, precision 0.615, recall 0.639, F1 0.621
At epoch 19


100%|██████████| 983/983 [00:03<00:00, 255.19it/s]


current in vlaidaiton
train loss 0.493, train accuracy 0.813, val loss 1.034, val accuracy 0.624, precision 0.622, recall 0.624, F1 0.621
