In [2]:
# config.py
# we define all the configuration here
import pickle

with open('multi_class_dict_746.pkl', 'rb') as f:
    loaded_dict = pickle.load(f)

NB_FAMILIES = 736
MAX_LEN = 100
TRAIN_BATCH_SIZE = 256 #16
VALID_BATCH_SIZE = 128 #8
EPOCHS = 10
CHAR_INDEX_DICT = {'a': 0,
 'b': 1,
 'c': 2,
 'd': 3,
 'e': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'k': 9,
 'l': 10,
 'm': 11,
 'n': 12,
 'o': 13,
 'p': 14,
 'q': 15,
 'r': 16,
 's': 17,
 't': 18,
 'u': 19,
 'v': 20,
 'w': 21,
 'x': 22,
 'y': 23,
 'z': 24}
MULTI_CLASS_DICT = loaded_dict

In [3]:
# dataset.py
import torch
class PFAMDataset:
    def __init__(self, reviews, targets):
         """
         :param reviews: this is a numpy array
         :param targets: a vector, numpy array
         """
         self.reviews = reviews
         self.target = targets
    def __len__(self):
        # returns length of the dataset
        return len(self.reviews)

    def __getitem__(self, item):
         # for any given item, which is an int,
         # return review and targets as torch tensor
         # item is the index of the item in concern
         review = self.reviews[item, :]
         target = self.target[item]
         return {"review": torch.tensor(review, dtype=torch.long),
                 "target": torch.tensor(target, dtype=torch.float) }

In [4]:
# engine.py

import torch
import torch.nn as nn

def train(data_loader, model, optimizer, device):
     """
    This is the main training function that trains model
     for one epoch
     :param data_loader: this is the torch dataloader
     :param model: model (lstm model)
     :param optimizer: torch optimizer, e.g. adam, sgd, etc.
     :param device: this can be "cuda" or "cpu"
     """

     # set model to training mode
     model.train()
     # go through batches of data in data loader
     for data in data_loader:
         # fetch review and target from the dict
         reviews = data["review"]
         targets = data["target"]
         # move the data to device that we want to use
         reviews = reviews.to(device, dtype=torch.long)
         targets = targets.to(device, dtype=torch.float)
         # clear the gradients
         optimizer.zero_grad()
         # make predictions from the model
         predictions = model(reviews)
         # calculate the loss
         loss = nn.CrossEntropyLoss()(
         predictions,
         targets.view(-1, 1)
         )
         # compute gradient of loss w.r.t.
         # all parameters of the model that are trainable
         loss.backward()
         # single optimization step
         optimizer.step()

def evaluate(data_loader, model, device):
     # initialize empty lists to store predictions
     # and targets
     final_predictions = []
     final_targets = []
     # put the model in eval mode
     model.eval()
     # disable gradient calculation
     with torch.no_grad():
         for data in data_loader:
             reviews = data["review"]
             targets = data["target"]
             reviews = reviews.to(device, dtype=torch.long)
             targets = targets.to(device, dtype=torch.float)
             # make predictions
             predictions = model(reviews)
             # move predictions and targets to list
             # we need to move predictions and targets to cpu too
             predictions = predictions.cpu().numpy().tolist()
             targets = data["target"].cpu().numpy().tolist()
             final_predictions.extend(predictions)
             final_targets.extend(targets)
     # return final predictions and targets
     return final_predictions, final_targets

In [5]:
# lstm.py
import torch
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self, embedding_matrix):
         """
         :param embedding_matrix: numpy array with vectors for all words
         """
         super(LSTM, self).__init__()
         # number of words = number of rows in embedding matrix
         num_words = embedding_matrix.shape[0]
         # dimension of embedding is num of columns in the matrix
         embed_dim = embedding_matrix.shape[1]
         # we define an input embedding layer
         self.embedding = nn.Embedding(
         num_embeddings=num_words,
         embedding_dim=embed_dim
         )
         # embedding matrix is used as weights of
         # the embedding layer
         self.embedding.weight = nn.Parameter(
         torch.tensor(
         embedding_matrix,
         dtype=torch.float32
         )
         )
         # we dont want to train the pretrained embeddings
         self.embedding.weight.requires_grad = False
         # a simple bidirectional LSTM with
         # hidden size of 128
         self.lstm = nn.LSTM(
             embed_dim,
             128,
             bidirectional=True,
             batch_first=True,
         )
         # output layer which is a linear layer
         # we have only one output
         # input (512) = 128 + 128 for mean and same for max pooling
         self.out = nn.Linear(512, NB_FAMILIES)

    def forward(self, x):

         # pass data through embedding layer
         # the input is just the tokens
         x = self.embedding(x)
         # move embedding output to lstm
         x, _ = self.lstm(x)
         # apply mean and max pooling on lstm output
         avg_pool = torch.mean(x, 1)
         max_pool, _ = torch.max(x, 1)

         # concatenate mean and max pooling
         # this is why size is 512
         # 128 for each direction = 256
         # avg_pool = 256 and max_pool = 256
         out = torch.cat((avg_pool, max_pool), 1)
         # pass through the output layer and return the output
         out = self.out(out)
         # return linear output
         return out

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicConvResBlock(nn.Module):

    def __init__(self, input_dim=128, n_filters=256, kernel_size=3, padding=1, stride=1, shortcut=False, downsample=None):
        super(BasicConvResBlock, self).__init__()

        self.downsample = downsample
        self.shortcut = shortcut

        self.conv1 = nn.Conv1d(input_dim, n_filters, kernel_size=kernel_size, padding=padding, stride=stride)
        self.bn1 = nn.BatchNorm1d(n_filters)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv1d(n_filters, n_filters, kernel_size=kernel_size, padding=padding, stride=stride)
        self.bn2 = nn.BatchNorm1d(n_filters)

    def forward(self, x):

        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.shortcut:
            if self.downsample is not None:
                residual = self.downsample(x)
            out += residual

        out = self.relu(out)

        return out


class VDCNN(nn.Module):

    def __init__(self,embedding_matrix):
        super(VDCNN, self).__init__()

        layers = []
        fc_layers = []
        # number of words = number of rows in embedding matrix
        num_embedding = embedding_matrix.shape[0]
        # dimension of embedding is num of columns in the matrix
        embedding_dim = embedding_matrix.shape[1]

        self.embed = nn.Embedding(num_embedding, num_embedding, padding_idx=0, max_norm=None, norm_type=2, scale_grad_by_freq=False, sparse=False)
        layers.append(nn.Conv1d(embedding_dim, 64, kernel_size=3, padding=1))


        n_conv_block_64, n_conv_block_128, n_conv_block_256, n_conv_block_512 = 2, 2, 2, 2
        n_fc_neurons=2048

        layers.append(BasicConvResBlock(input_dim=64, n_filters=64, kernel_size=3, padding=1))
        for _ in range(n_conv_block_64-1):
            layers.append(BasicConvResBlock(input_dim=64, n_filters=64, kernel_size=3, padding=1))
        layers.append(nn.MaxPool1d(kernel_size=3, stride=2, padding=1)) # l = initial length / 2

        ds = nn.Sequential(nn.Conv1d(64, 128, kernel_size=1, stride=1, bias=False), nn.BatchNorm1d(128))
        layers.append(BasicConvResBlock(input_dim=64, n_filters=128, kernel_size=3, padding=1, downsample=ds))
        for _ in range(n_conv_block_128-1):
            layers.append(BasicConvResBlock(input_dim=128, n_filters=128, kernel_size=3, padding=1))
        layers.append(nn.MaxPool1d(kernel_size=3, stride=2, padding=1)) # l = initial length / 4

        ds = nn.Sequential(nn.Conv1d(128, 256, kernel_size=1, stride=1, bias=False), nn.BatchNorm1d(256))
        layers.append(BasicConvResBlock(input_dim=128, n_filters=256, kernel_size=3, padding=1, downsample=ds))
        for _ in range(n_conv_block_256 - 1):
            layers.append(BasicConvResBlock(input_dim=256, n_filters=256, kernel_size=3, padding=1))
        layers.append(nn.MaxPool1d(kernel_size=3, stride=2, padding=1))

        ds = nn.Sequential(nn.Conv1d(256, 512, kernel_size=1, stride=1, bias=False), nn.BatchNorm1d(512))
        layers.append(BasicConvResBlock(input_dim=256, n_filters=512, kernel_size=3, padding=1, downsample=ds))
        for _ in range(n_conv_block_512 - 1):
            layers.append(BasicConvResBlock(input_dim=512, n_filters=512, kernel_size=3, padding=1))

        layers.append(nn.AdaptiveMaxPool1d(8))
        fc_layers.extend([nn.Linear(8*512, n_fc_neurons), nn.ReLU()])

        fc_layers.extend([nn.Linear(n_fc_neurons, n_fc_neurons), nn.ReLU()])
        fc_layers.extend([nn.Linear(n_fc_neurons, NB_FAMILIES)])

        self.layers = nn.Sequential(*layers)
        self.fc_layers = nn.Sequential(*fc_layers)

        self.__init_weights()

    def __init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):

        out = self.embed(x)
        out = out.transpose(1, 2)

        out = self.layers(out)

        out = out.view(out.size(0), -1)

        out = self.fc_layers(out)

        return out

In [9]:
# train.py
import io
import torch
import numpy as np
import pandas as pd
# yes, we use tensorflow
# but not for training the model!
import tensorflow as tf
from sklearn import metrics

def train(data_loader, model, optimizer, device):
     """
    This is the main training function that trains model
     for one epoch
     :param data_loader: this is the torch dataloader
     :param model: model (lstm model)
     :param optimizer: torch optimizer, e.g. adam, sgd, etc.
     :param device: this can be "cuda" or "cpu"
     """

     # set model to training mode
     model.train()
     # go through batches of data in data loader
     for data in data_loader:
         # fetch review and target from the dict
         reviews = data["review"]
         targets = data["target"]
         # move the data to device that we want to use
         reviews = reviews.to(device, dtype=torch.long)
         targets = targets.to(device, dtype=torch.long)
         # clear the gradients
         optimizer.zero_grad()
         # make predictions from the model
         predictions = model(reviews)

         # calculate the loss
         loss = nn.CrossEntropyLoss()(
         predictions,
         targets
         )
         # compute gradient of loss w.r.t.
         # all parameters of the model that are trainable
         loss.backward()
         # single optimization step
         optimizer.step()

def evaluate(data_loader, model, device):
     # initialize empty lists to store predictions
     # and targets
     final_predictions = []
     final_targets = []
     # put the model in eval mode
     model.eval()
     # disable gradient calculation
     with torch.no_grad():
         for data in data_loader:
             reviews = data["review"]
             targets = data["target"]
             reviews = reviews.to(device, dtype=torch.long)
             targets = targets.to(device, dtype=torch.float)
             # make predictions
             predictions = model(reviews)
             # move predictions and targets to list
             # we need to move predictions and targets to cpu too
             predictions = predictions.cpu().numpy().tolist()
             targets = data["target"].cpu().numpy().tolist()
             final_predictions.extend(predictions)
             final_targets.extend(targets)
     # return final predictions and targets
     return final_predictions, final_targets

def create_embedding_matrix(word_index, embedding_dict):
    """
    This function creates the embedding matrix.
    :param word_index: a dictionary with word:index_value
    :param embedding_dict: a dictionary with word:embedding_vector
    :return: a numpy array with embedding vectors for all known words
    """
    # initialize matrix with zeros
    embedding_matrix = []
    # loop over all the words
    for word, i in word_index.items():
        # if word is found in pre-trained embeddings,
        # update the matrix. if the word is not found,
        # the vector is zeros!
        if word in embedding_dict:
            embedding_matrix.append(embedding_dict[word])
        # return embedding matrix
    return np.array(embedding_matrix)


def tokenizing(df, char_index_dict):
    final_string = []
    for sentence in df['sequence']:
        sequence = []
        for word in sentence:
            x = char_index_dict[word]
            sequence.append(str(x))
        final_string.append(sequence)
    df['clean_seq'] = final_string

    return df

def one_hot():
    nb_classes = len(CHAR_INDEX_DICT)
    one_hot_embedding_mat = np.eye(nb_classes)
    one_hot_embedding = {}
    for key, value in enumerate(CHAR_INDEX_DICT):
        one_hot_embedding[value] = one_hot_embedding_mat[key]
    return one_hot_embedding

def run(df, fold):
    """
    Run training and validation for a given fold
    and dataset
    :param df: pandas dataframe with kfold column
    :param fold: current fold, int
    """
    # fetch training dataframe
    train_df = df[df.kfold != fold].reset_index(drop=True)
    # fetch validation dataframe
    valid_df = df[df.kfold == fold].reset_index(drop=True)
    print("Fitting tokenizer")
    # convert training data to sequences
    # for example : "bad movie" gets converted to
    # [24, 27] where 24 is the index for bad and 27 is the
    # index for movie
    xtrain = list(tokenizing(train_df,CHAR_INDEX_DICT)['clean_seq'])
    # similarly convert validation data to
    # sequences
    xtest = list(tokenizing(valid_df,CHAR_INDEX_DICT)['clean_seq'])
    # zero pad the training sequences given the maximum length
    # this padding is done on left hand side
    # if sequence is > MAX_LEN, it is truncated on left hand side too
    xtrain = tf.keras.preprocessing.sequence.pad_sequences(
        xtrain, maxlen=MAX_LEN
    )
    # zero pad the validation sequences
    xtest = tf.keras.preprocessing.sequence.pad_sequences(
        xtest, maxlen=MAX_LEN
    )
    # initialize dataset class for training
    train_dataset = PFAMDataset(
        reviews=xtrain,
        targets=train_df.family_id.values
    )
    # create torch dataloader for training
    # torch dataloader loads the data using dataset
    # class in batches specified by batch size
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=TRAIN_BATCH_SIZE,
        num_workers=2
    )
    # initialize dataset class for validation
    valid_dataset = PFAMDataset(
        reviews=xtest,
        targets=valid_df.family_id.values
    )

    # create torch dataloader for validation
    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=VALID_BATCH_SIZE,
        num_workers=1
    )
    print("Loading embeddings")
    # load embeddings as shown previously
    embedding_dict = one_hot()
    embedding_matrix = create_embedding_matrix(
        CHAR_INDEX_DICT, embedding_dict
    )
    # create torch device, since we use gpu, we are using cuda
    print(torch.cuda.is_available())
    device = torch.device("cuda")
    # fetch our model
    # model = LSTM(embedding_matrix)
    model = VDCNN(embedding_matrix)
    # send model to device
    model.to(device)

    # initialize Adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    print("Training Model")
    # set best accuracy to zero
    best_accuracy = 0
    # set early stopping counter to zero
    early_stopping_counter = 0
    # train and validate for all epochs
    for epoch in range(EPOCHS):
        # train one epoch
        train(train_data_loader, model, optimizer, device)
        # validate
        outputs, targets = evaluate(
            valid_data_loader, model, device
        )
        # use threshold of 0.5
        # please note we are using linear layer and no sigmoid
        # you should do this 0.5 threshold after sigmoid
        outputs = np.array(outputs) 
        # calculate accuracy
        accuracy = metrics.accuracy_score(targets, np.argmax(outputs,axis=1))
        precision = metrics.precision_score(targets, np.argmax(outputs,axis=1),average='macro')
        recall = metrics.recall_score(targets, np.argmax(outputs,axis=1),average='macro')

        print(
            f"FOLD:{fold}, Epoch: {epoch}, Accuracy Score = {accuracy}, Precision Score = {precision}, Recall Score = {recall}"
        )
        # simple early stopping
        if accuracy > best_accuracy:
            best_accuracy = accuracy
        else:
            early_stopping_counter += 1
        if early_stopping_counter > 2:
            break
    return accuracy,precision, recall

if __name__ == "__main__":
     # load data
     df = pd.read_csv("/content/pfam_736_folds.csv")
     # train for all folds
     accuracy_0,precision_0, recall_0 = run(df, fold=0)
     accuracy_1,precision_1, recall_1 = run(df, fold=1)
     accuracy_2,precision_2, recall_2 = run(df, fold=2)
     accuracy_3,precision_3, recall_3 = run(df, fold=3)
     accuracy_4,precision_4, recall_4 = run(df, fold=4)
     print('Average accuracy : ' + str(np.mean(np.array([accuracy_0,accuracy_1,accuracy_2,accuracy_3,accuracy_4]))))
     print('Average precision : ' + str(np.mean(np.array([precision_0,precision_1,precision_2,precision_3,precision_4]))))
     print('Average recall : ' + str(np.mean(np.array([recall_0,recall_1,recall_2,recall_3,recall_4]))))


Fitting tokenizer
Loading embeddings
True
Training Model
FOLD:0, Epoch: 0, Accuracy Score = 0.516857720836143, Precision Score = 0.3806800471974179, Recall Score = 0.32444074598341166


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:0, Epoch: 1, Accuracy Score = 0.699637559002023, Precision Score = 0.633940114596063, Recall Score = 0.5529808799662123


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:0, Epoch: 2, Accuracy Score = 0.7962744436952124, Precision Score = 0.7575558275811801, Recall Score = 0.6941085262732394


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:0, Epoch: 3, Accuracy Score = 0.8284305461901551, Precision Score = 0.7927169216242822, Recall Score = 0.7402421266269653


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:0, Epoch: 4, Accuracy Score = 0.8174308833445718, Precision Score = 0.7792241592701943, Recall Score = 0.7338993674947555


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:0, Epoch: 5, Accuracy Score = 0.8482383681726231, Precision Score = 0.8074467376415737, Recall Score = 0.7744476756894502


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:0, Epoch: 6, Accuracy Score = 0.8388823331085637, Precision Score = 0.799614384528504, Recall Score = 0.7560973634458482


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:0, Epoch: 7, Accuracy Score = 0.8570886716115981, Precision Score = 0.8166196255918383, Recall Score = 0.7806435070546497


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


FOLD:0, Epoch: 8, Accuracy Score = 0.8562036412677007, Precision Score = 0.8237741576851157, Recall Score = 0.7833507355611838
Fitting tokenizer
Loading embeddings
True
Training Model
FOLD:1, Epoch: 0, Accuracy Score = 0.48086648685097777, Precision Score = 0.32410775818146503, Recall Score = 0.2826788210288884


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:1, Epoch: 1, Accuracy Score = 0.7418661496965611, Precision Score = 0.6865000487773386, Recall Score = 0.607276337788235


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:1, Epoch: 2, Accuracy Score = 0.7891942009440324, Precision Score = 0.7458438102952379, Recall Score = 0.6808345613425635


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:1, Epoch: 3, Accuracy Score = 0.8184844908968307, Precision Score = 0.7724877056971368, Recall Score = 0.7213141872141616


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:1, Epoch: 4, Accuracy Score = 0.8510198921105866, Precision Score = 0.8168664122497546, Recall Score = 0.774024835691776


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:1, Epoch: 5, Accuracy Score = 0.8606287929871881, Precision Score = 0.8296888034131242, Recall Score = 0.7840224639956694


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:1, Epoch: 6, Accuracy Score = 0.86083951449764, Precision Score = 0.8353477903586289, Recall Score = 0.7828767471678468


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:1, Epoch: 7, Accuracy Score = 0.8714177343223196, Precision Score = 0.8439990965506282, Recall Score = 0.802899131007543
FOLD:1, Epoch: 8, Accuracy Score = 0.8916048550236008, Precision Score = 0.858254159863566, Recall Score = 0.8311478415378781
FOLD:1, Epoch: 9, Accuracy Score = 0.8927427511800404, Precision Score = 0.8595152544478603, Recall Score = 0.8331426125843615
Fitting tokenizer
Loading embeddings
True
Training Model
FOLD:2, Epoch: 0, Accuracy Score = 0.5627950101146325, Precision Score = 0.4108494958557723, Recall Score = 0.360979037677768


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:2, Epoch: 1, Accuracy Score = 0.7799224544841538, Precision Score = 0.7220646973398166, Recall Score = 0.6671100968245437


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:2, Epoch: 2, Accuracy Score = 0.8264497639919083, Precision Score = 0.7859866976945566, Recall Score = 0.7441480305806286


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:2, Epoch: 3, Accuracy Score = 0.8112356709372893, Precision Score = 0.7846705472778652, Recall Score = 0.7224147066214869


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:2, Epoch: 4, Accuracy Score = 0.8644217801753203, Precision Score = 0.8297676142101126, Recall Score = 0.7966155232736033


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:2, Epoch: 5, Accuracy Score = 0.8649275118004046, Precision Score = 0.8304745169986332, Recall Score = 0.799218612459519


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:2, Epoch: 6, Accuracy Score = 0.8747471341874579, Precision Score = 0.8470331748032732, Recall Score = 0.809144943877194
FOLD:2, Epoch: 7, Accuracy Score = 0.8839345920431557, Precision Score = 0.8579806584868214, Recall Score = 0.821112852379825
FOLD:2, Epoch: 8, Accuracy Score = 0.8924477410654079, Precision Score = 0.8594469802899457, Recall Score = 0.8375419613665805
FOLD:2, Epoch: 9, Accuracy Score = 0.8870111260957518, Precision Score = 0.8581243716204222, Recall Score = 0.8292108878232847
Fitting tokenizer
Loading embeddings
True
Training Model
FOLD:3, Epoch: 0, Accuracy Score = 0.5341790289952798, Precision Score = 0.3956970309823497, Recall Score = 0.34266127536756935


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:3, Epoch: 1, Accuracy Score = 0.759313890761969, Precision Score = 0.6831661627817218, Recall Score = 0.6378883398051319


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:3, Epoch: 2, Accuracy Score = 0.8250168577208361, Precision Score = 0.7750833142335213, Recall Score = 0.7326489830220558


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:3, Epoch: 3, Accuracy Score = 0.8431810519217802, Precision Score = 0.7946797588004786, Recall Score = 0.7628025111451059


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:3, Epoch: 4, Accuracy Score = 0.8599966284558328, Precision Score = 0.826886724517677, Recall Score = 0.7891578834399822
FOLD:3, Epoch: 5, Accuracy Score = 0.8646325016857721, Precision Score = 0.8275176004025688, Recall Score = 0.7995763619776513


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:3, Epoch: 6, Accuracy Score = 0.8702376938637896, Precision Score = 0.8378406559676593, Recall Score = 0.8034940031121913
FOLD:3, Epoch: 7, Accuracy Score = 0.8858310856372218, Precision Score = 0.8493854289221688, Recall Score = 0.8265309827341949


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:3, Epoch: 8, Accuracy Score = 0.8851989211058665, Precision Score = 0.8513899253242248, Recall Score = 0.8265555687210909


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:3, Epoch: 9, Accuracy Score = 0.8937542144302091, Precision Score = 0.8661301436565654, Recall Score = 0.834095357623222
Fitting tokenizer
Loading embeddings
True
Training Model
FOLD:4, Epoch: 0, Accuracy Score = 0.49591200269723534, Precision Score = 0.35217967632489866, Recall Score = 0.32852362493469656


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:4, Epoch: 1, Accuracy Score = 0.725472016183412, Precision Score = 0.6802517345296605, Recall Score = 0.6084176983339175


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:4, Epoch: 2, Accuracy Score = 0.7979180714767363, Precision Score = 0.7521993399879423, Recall Score = 0.7014878968471556


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:4, Epoch: 3, Accuracy Score = 0.8256911665542819, Precision Score = 0.7919235645843372, Recall Score = 0.739892871807074


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:4, Epoch: 4, Accuracy Score = 0.8406945380984491, Precision Score = 0.8066494181789506, Recall Score = 0.7565959231185685


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:4, Epoch: 5, Accuracy Score = 0.8516099123398516, Precision Score = 0.825632460862489, Recall Score = 0.7747660944294829


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:4, Epoch: 6, Accuracy Score = 0.8751264329062711, Precision Score = 0.8469977381393384, Recall Score = 0.8050646605022543
FOLD:4, Epoch: 7, Accuracy Score = 0.8335300067430883, Precision Score = 0.8200003853826744, Recall Score = 0.7470035824888012


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:4, Epoch: 8, Accuracy Score = 0.8815323668240054, Precision Score = 0.8575186839063172, Recall Score = 0.8173681085013211


  _warn_prf(average, modifier, msg_start, len(result))


FOLD:4, Epoch: 9, Accuracy Score = 0.8787508428860418, Precision Score = 0.8556219307614149, Recall Score = 0.8100845748559438
Average accuracy : 0.8816925151719488
Average precision : 0.8526331716342757
Average recall : 0.8179768336895993
