<a href="https://colab.research.google.com/github/alexandrosXe/context_toxicity/blob/master/BILSTMS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Pytorch Implemantation for Context Toxicity**

# Install Progress bar

In [2]:
!pip install pkbar

Collecting pkbar
  Downloading https://files.pythonhosted.org/packages/e3/43/367098af2862f7b4e6aa871494ed3d66c57af849a5962a92baa2dd99b652/pkbar-0.4-py3-none-any.whl
Installing collected packages: pkbar
Successfully installed pkbar-0.4


# Classifiers 

# Imports

In [0]:
import torch.nn.functional as F
import torch
from torch import nn
from torch import optim
import torchtext
from torchtext.data import Field, BucketIterator
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.metrics import *
import pkbar
import numpy as np
import pandas as pd
from torch.utils.data import  DataLoader
from torch.utils.data import TensorDataset
from tensorflow.keras.preprocessing import sequence
import torch.autograd
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
# import warnings
# warnings.filterwarnings('ignore')
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from sklearn.model_selection import StratifiedShuffleSplit
from torch.utils.data import WeightedRandomSampler


# **Load Embeddings**

In [0]:
def load_embeddings_index():
  embeddings_index = dict()
  with open('embeddings/glove.6B.100d.txt', 'r') as glove_in:
    for line in glove_in.readlines():
      values = line.split()
      word = values[0]
      coefs = np.asarray(values[1:], dtype='float32')
      embeddings_index[word] = coefs
  return embeddings_index

def Compute_Vocab_Size(train,augmented_vocabulary=False):
  texts = train.text if not augmented_vocabulary else train.text + train.parent
  tokenizer = Tokenizer()
  tokenizer.fit_on_texts(texts)
  vocab_size = len(tokenizer.word_index) + 1
  print('Vocabulary Size: %d' % vocab_size)
  return vocab_size


# **Early Stoping**

In [0]:
class EarlyStopping:
  def __init__(self, patience=7, verbose=False, delta=0):
    self.patience = patience
    self.verbose = verbose
    self.counter = 0
    self.best_score = None
    self.early_stop = False
    self.val_loss_min = np.Inf
    self.delta = delta

  def __call__(self, val_loss, model):

    score = val_loss

    if self.best_score is None:
      self.best_score = score
      self.save_checkpoint(val_loss, model)
    elif score <= self.best_score + self.delta:
      self.counter += 1
      print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
      if self.counter >= self.patience:
        self.early_stop = True
    else:
      self.best_score = score
      self.save_checkpoint(val_loss, model)
      self.counter = 0

  def save_checkpoint(self, val_loss, model):
    if self.verbose:
      print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
      torch.save(model.state_dict(), 'checkpoint.pt')
      self.val_loss_min = val_loss

# LSTM Classifier

In [0]:

class LSTM_CLF(nn.Module):
  def __init__(self,vocab_size ,stacks=0, verbose=1, batch_size=128, n_epochs=100, max_length=512,
                 loss=nn.BCELoss(),monitor_loss="val_loss",patience=3, 
                 prefix="vanilla",
                 hidden_size=128,
                 word_embedding_size=200,
                 seed=42,
                 augmented_vocabulary = False,
                 no_sigmoid=False):
    super(LSTM_CLF, self).__init__()
    np.random.seed(seed)
    self.verbose = verbose
    self.augmented_vocabulary = augmented_vocabulary
    self.patience = patience
    self.batch_size = batch_size
    self.n_epochs = n_epochs
    self.no_sigmoid = no_sigmoid
    self.stacks=stacks
    self.max_length = max_length
    self.tokenizer = Tokenizer()
    self.loss = loss
    self.word_embedding_size = word_embedding_size
    self.hidden_size=hidden_size
    self.prefix = prefix
    self.monitor_loss = monitor_loss
    self.name = f'{prefix}-b{batch_size}.e{n_epochs}.len{max_length}.rnn'
    self.vocab_size=vocab_size

    #Layers
    self.word_embeds = nn.Embedding(self.vocab_size+2,self.word_embedding_size,padding_idx=0)
    #self.embeds_bn=nn.BatchNorm1d(self.max_length)
    if(stacks!=0):
      self.stacked_lstm = nn.LSTM(self.word_embedding_size,self.hidden_size,num_layers=self.stacks,bidirectional=True)
      self.dense=nn.Linear(self.hidden_size*2*self.stacks,128)
    else:
      self.stacked_lstm = nn.LSTM(self.word_embedding_size,self.hidden_size,bidirectional=True)
      self.lstm_bn = nn.BatchNorm1d(2*self.hidden_size)
      self.lstm_ln = nn.LayerNorm(2*self.hidden_size,elementwise_affine=False)
      self.dense=nn.Linear(2*self.hidden_size,128)
      self.dense1_bn = nn.BatchNorm1d(128)
      #self.dense1_ln = nn.LayerNorm(self.hidden_size,elementwise_affine=False)
    self.out=nn.Linear(128,1)
    self.tanh = nn.Tanh()
    self.sigmoid=nn.Sigmoid()

  def init_weights(self,bias):
        initrange = 0.5
        self.word_embeds.weight.data.uniform_(-initrange, initrange)
        #self.stacked_lstm.weight.data.uniform_(-initrange, initrange)
        self.dense.weight.data.uniform_(-initrange, initrange)
        self.dense.bias.data.zero_()
        self.out.weight.data.uniform_(-initrange, initrange)
        self.out.bias.data=bias
        

  def init_hidden(self,batch_size):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers * num_directions, minibatch_size, hidden_dim)
        return (Variable(torch.zeros(2, batch_size, self.hidden_size)).cuda(),   
                Variable(torch.zeros(2, batch_size, self.hidden_size).cuda()))    # <- change here: first dim of hidden needs to be doubled

  def forward(self, Text):
    hidden=self.init_hidden(len(Text))
    embeds=self.word_embeds(Text.to(torch.long))
    #embeds=self.embeds_bn(embeds)
    stacks, hidden =self.stacked_lstm(embeds.view(self.max_length,len(Text),self.word_embedding_size),hidden)
    concatenated=stacks.squeeze()[-1,:]
    #concatenated=self.lstm_ln(concatenated)
    output=self.dense(concatenated)
    #output=self.dense1_bn(output)
    output=self.tanh(output)
    output=self.out(output)
    output=self.sigmoid(output)
    #print(output)
    return output.to(torch.float)

  def model_show(self):
    print(self)

  def load_embeddings(self, pretrained_dict):
    self.embedding_matrix = np.zeros((self.vocab_size + 2, 100))
    for word, index in self.tokenizer.word_index.items():
      embedding_vector = pretrained_dict.get(word)
      if embedding_vector is not None:
        self.embedding_matrix[index + 1] = embedding_vector
    return torch.from_numpy(self.embedding_matrix)

  
  def text_process(self, texts):
    x1 = self.tokenizer.texts_to_sequences(texts.to_numpy())
    x1 = sequence.pad_sequences(x1, maxlen=self.max_length)  # padding
    return x1
    #return torch.tensor(x1, dtype=torch.long)
  

  def trainin(self,X_train,Y_train, optimizer,device):
    #UnderSampling
    majority_weight = 1/(6000-59)
    minority_weight = 1/59
    sample_weights = np.array([majority_weight, minority_weight])
    weights = sample_weights[Y_train]
    sampler = WeightedRandomSampler(weights=weights, num_samples=len(weights), replacement=True)
    train_ds = TensorDataset(X_train,Y_train)
    train_dl = DataLoader(train_ds, batch_size=self.batch_size, sampler=sampler)
    # train_dl = DataLoader(train_ds, batch_size=self.batch_size, shuffle=True)
    epoch_loss=0 
    epoch_Auc=0
    epoch_accuracy=0
    epoch_recall=0
    epoch_precision=0
    epoch_F1=0
    Y=torch.empty(0) #create empty torch to append predictions
    self.train()
    for xb, yb in train_dl:
      xb, yb = xb.to(device), yb.to(device)  #move batches to GPU or CPU
      # self.zero_grad() #DES TO
      predictions =self(xb)  #run model for mini batch
      predictions=predictions.squeeze()
      weight = torch.tensor([0.1, 0.9]).to(device)
      weight_ = weight[yb.data.view(-1).long()].view_as(yb)
      criterion = nn.BCELoss(reduction='none')   #(reduce=False)
      loss = criterion(predictions, yb.to(torch.float))
      loss_class_weighted = loss * weight_
      loss_class_weighted = loss_class_weighted.mean()
      #loss=self.loss(predictions,yb.to(torch.float))#compute loss of mini batch
      #epoch_loss+=loss.item()
      epoch_loss+=loss_class_weighted.item()
      Y=torch.cat((Y,predictions.cpu()),0)
      loss_class_weighted.backward() #compute gradients
      # loss.backward() #compute gradients
      optimizer.step() #update parameters
      optimizer.zero_grad() 
    with torch.no_grad():
      predictions=Y
      rounded=torch.round(predictions).squeeze().cpu().detach().numpy()
      predictions=predictions.cpu().detach().numpy()
      epoch_Auc=roc_auc_score(Y_train.cpu().numpy(),predictions)
      epoch_accuracy=accuracy_score(Y_train.cpu().numpy(),rounded)
      tn, fp, fn, tp =confusion_matrix(Y_train.cpu().numpy(),rounded,labels=[0,1]).ravel()
      epoch_precision+=tp/(tp+fp)
      epoch_recall+=tp/(tp+fn)
      epoch_F1+=2*((epoch_precision*epoch_recall)/(epoch_precision+epoch_recall))
    return epoch_loss,epoch_Auc,epoch_accuracy,epoch_recall,epoch_precision,epoch_F1
    #return epoch_loss,epoch_Auc,epoch_accuracy
  

  def evaluate(self,X_dev,Y_dev,device):
    dev_ds = TensorDataset(X_dev,Y_dev)
    dev_dl = DataLoader(dev_ds, batch_size=self.batch_size)
    epoch_loss=0
    epoch_Auc=0
    epoch_accuracy=0
    epoch_recall=0
    epoch_precision=0
    epoch_F1=0
    self.eval()
    with torch.no_grad():   # compute validation loss
      for xb, yb in dev_dl:
        xb,yb=xb.to(device), yb.to(device) #move to Gpu
        predictions =self(xb)
        weight = torch.tensor([0.1, 0.9]).to(device)
        weight_ = weight[yb.data.view(-1).long()].view_as(yb)
        criterion = nn.BCELoss(reduction='none')  #(reduce=False)
        loss = criterion(predictions, yb.to(torch.float))
        loss_class_weighted = loss * weight_
        loss_class_weighted = loss_class_weighted.mean()
        #val_loss=self.loss(predictions.squeeze(), yb.to(torch.float))
        val_loss=loss_class_weighted
        epoch_loss+=val_loss
      predictions=self(X_dev.to(device))
      rounded=torch.round(predictions).squeeze().cpu().detach().numpy()
      predictions=predictions.cpu().detach().numpy()
      epoch_Auc=roc_auc_score(Y_dev.cpu().numpy(),predictions)
      epoch_accuracy=accuracy_score(Y_dev.cpu().numpy(),rounded)
      tn, fp, fn, tp =confusion_matrix(Y_dev.cpu().numpy(),rounded,labels=[0,1]).ravel()
      epoch_precision+=tp/(tp+fp)
      epoch_recall+=tp/(tp+fn)
      epoch_F1+=2*((epoch_precision*epoch_recall)/(epoch_precision+epoch_recall))
      return epoch_loss,epoch_Auc,epoch_accuracy,epoch_recall,epoch_precision,epoch_F1
      #print(epoch_loss,epoch_Auc,epoch_accuracy)
      #return epoch_loss,epoch_Auc,epoch_accuracy

  
  def fit(self, train, dev,optimizer, pretrained_embeddings, class_weights={0: 1, 1: 1}):
    np.seterr(over='raise')
    texts = train.text if not self.augmented_vocabulary else train.text + train.parent
    self.tokenizer.fit_on_texts(texts)
    self.vocab_size = len(self.tokenizer.word_index) + 1
    print('Vocabulary Size: %d' % self.vocab_size)
    pos = sum(train.label)
    neg = len(train.label)-pos
    bias = np.log(pos/neg)
    print(bias,"BIASSSS")
    bias=torch.tensor(bias)
    #self.init_weights(bias.to(torch.float))
    with torch.no_grad():
      self.out.bias=torch.nn.Parameter(bias.to(torch.float)) # set bias of last dense layer log(T/N)
    self.out.bias.requires_grad_(False)
    X_train=self.text_process(train.text)
    X_train=torch.from_numpy(X_train)
    Y_train=torch.from_numpy(train.label.to_numpy())
    X_dev=self.text_process(dev.text)
    X_dev=torch.from_numpy(X_dev)
    Y_dev=torch.from_numpy(dev.label.to_numpy())
    early_stopping = EarlyStopping(patience=self.patience, verbose=True)     #EARLYY STOPING
    i=1 #for progress bar
    if torch.cuda.is_available():
      device = torch.device("cuda:0")  # you can continue going on here, like cuda:1 cuda:2....etc. 
      print("Running on the GPU")
    else:
      device = torch.device("cpu")
      print("Running on the CPU")
    self.to(device)
    for epoch in range(self.n_epochs):
      print('\nEpoch: %d/%d' % (epoch + 1, self.n_epochs))
      kbar = pkbar.Kbar(target=self.n_epochs, width=10)
      kbar_val=pkbar.Kbar(target=self.n_epochs, width=10)
      epoch_loss,epoch_Auc,epoch_accuracy,epoch_recall,epoch_precision,epoch_F1=self.trainin(X_train,Y_train,optimizer,device)
      kbar.update(i, values=[("loss",epoch_loss), ("accuracy",epoch_accuracy),("AUC_score",epoch_Auc),("precision",epoch_precision),("recall",epoch_recall),("F1",epoch_F1)])
      val_loss,val_AUC_score,val_accuracy,val_recall,val_precision,val_F1=self.evaluate(X_dev,Y_dev,device)
      print("Val auc score in epoch ",epoch+1, ":",val_AUC_score)
      early_stopping(val_AUC_score,self)   
      if early_stopping.early_stop: #check for early stopping
        print("Early stopping")
        print("_________________________________________________-")
        break
      kbar_val.update(i,values=[("val_loss",val_loss), ("val_accuracy",val_accuracy),("val_AUC_score",val_AUC_score),("val_precision",val_precision),("val_recall",val_recall),("val_F1",val_F1)])
      i+=1

  def predict(self, test):
    test=self.text_process(test.text)
    predictions = self(torch.from_numpy(test).to(torch.device("cuda:0")))
    return predictions

  def save(self,PATH):  #save model weights
    torch.save(self.state_dict(), PATH)

  def load(self,PATH): #load model weights
    model.load_state_dict(torch.load(PATH))
    model.eval()  
        
  
  

# **CA BILSTM**

In [0]:
class LSTM_IC1_CLF(LSTM_CLF):
    # RNN classification of the target text, with context representation concatenated.
    # The resulting representation of the target text is concatenated with the representation
    # of the parent text. The parent text representation comes from a single-level Bidirectional RNN.
    # The target text representation comes from a stacked LSTM.

  def __init__(self, prefix="IC1", **kwargs):
    super(LSTM_IC1_CLF, self).__init__(**kwargs)
    self.prefix = prefix

    #parent BILSTM Layer
    self.parent_emb=nn.Embedding(self.vocab_size+2,100,padding_idx=0)
    self.parent_encoder=nn.LSTM(100,self.hidden_size,bidirectional=True)
    #new dense layer
    self.dense=nn.Linear(self.hidden_size*4,self.hidden_size)

  # def init_Parent_hidden(self,batch_size):
  #       # Before we've done anything, we dont have any hidden state.
  #       # Refer to the Pytorch documentation to see exactly
  #       # why they have this dimensionality.
  #       # The axes semantics are (num_layers * num_directions, minibatch_size, hidden_dim)
  #       return (Variable(torch.zeros(2, batch_size, 64)).cuda(),   
  #               Variable(torch.zeros(2, batch_size, 64).cuda()))    # <- change here: first dim of hidden needs to be doubled


  #Forward pass

  def forward(self,target,parent):
    #take parent represantation
    batch_size=len(target)
    hidden=self.init_hidden(batch_size) #initialize hidden state of parent BILSTM
    parent_embds=self.parent_emb(parent.to(torch.long))
    parent_encoding,_=self.parent_encoder(parent_embds.view(self.max_length,batch_size,100),hidden)

    #take output of last hidden state of parent encoder
    parent=parent_encoding.squeeze()[-1,:]
    
    #take child represantation
    hidden=self.init_hidden(batch_size) #initialize initial hidden state of Target BILTM
    embeds=self.word_embeds(target.to(torch.long))
    embeds=self.embeds_bn(embeds)
    stacks, _ =self.stacked_lstm(embeds.view(self.max_length,batch_size,self.word_embedding_size),hidden)
    concatenated=stacks.squeeze()[-1,:]
    target=self.lstm_bn(concatenated)

    #concatenate parent and child represantation 
    concatenated=torch.cat((parent,target),dim=1)  
    output=self.dense(concatenated)
    output=self.tanh(output)
    output=self.dense1_bn(output)
    output=self.out(output)
    output=self.sigmoid(output)
    #print(output)
    return output.to(torch.float)

  def text_process(self, texts, parents):
        target_x = self.tokenizer.texts_to_sequences(texts.to_numpy())
        target_x = sequence.pad_sequences(target_x, maxlen=self.max_length)  # padding
        parent_x = self.tokenizer.texts_to_sequences(parents.to_numpy())
        parent_x = sequence.pad_sequences(parent_x, maxlen=self.max_length)  # padding
        #return [target_x, parent_x]
        return torch.from_numpy(target_x),torch.from_numpy(parent_x)
  

  
  def trainin(self,X_train_target,X_train_parent,Y_train, optimizer,device):
    train_ds = TensorDataset(X_train_target,X_train_parent,Y_train)
    train_dl = DataLoader(train_ds, batch_size=self.batch_size, shuffle=True)
    epoch_loss=0
    epoch_Auc=0
    epoch_accuracy=0
    epoch_recall=0
    epoch_precision=0
    epoch_F1=0
    Y=torch.empty(0) #create empty torch to append predictions
    self.train()
    for xb,xp ,yb in train_dl:
        xb, xp,yb = xb.to(device),xp.to(device), yb.to(device)  #move batches to GPU or CPU
        # self.zero_grad() #DES TO
        predictions =self(xb,xp)  #run model for mini batch
        predictions=predictions.squeeze()
        loss=self.loss(predictions,yb.to(torch.float))#self.loss(predictions,yb.to(torch.float))#.view(len(xb),1)) #compute loss of mini batch
        epoch_loss+=loss.item()
        Y=torch.cat((Y,predictions.cpu()),0)
        loss.backward() #compute gradients
        nn.utils.clip_grad_norm_(self.parameters(), max_norm=1.0)
        optimizer.step() #update parameters
        optimizer.zero_grad() 
        #epoch_loss.detach().item()
    with torch.no_grad():
      predictions=Y
      rounded=torch.round(predictions).squeeze().cpu().detach().numpy()
      predictions=predictions.cpu().detach().numpy()
      epoch_Auc=roc_auc_score(Y_train.cpu().numpy(),predictions)
      epoch_accuracy=accuracy_score(Y_train.cpu().numpy(),rounded)
      tn, fp, fn, tp =confusion_matrix(Y_train.cpu().numpy(),rounded,labels=[0,1]).ravel()
      epoch_precision+=tp/(tp+fp)
      epoch_recall+=tp/(tp+fn)
      epoch_F1+=2*((epoch_precision*epoch_recall)/(epoch_precision+epoch_recall))
    return epoch_loss,epoch_Auc,epoch_accuracy,epoch_recall,epoch_precision,epoch_F1
    #return epoch_loss,epoch_Auc,epoch_accuracy



  def evaluate(self,X_dev_target,X_dev_parent,Y_dev,device):
    dev_ds = TensorDataset(X_dev_target,X_dev_parent,Y_dev)
    dev_dl = DataLoader(dev_ds, batch_size=self.batch_size)
    epoch_loss=0
    epoch_recall=0
    epoch_precision=0
    epoch_F1=0
    Y=torch.empty(0) #create empty torch to append predictions
    self.eval()
    with torch.no_grad():   # compute validation loss
      for xb, xp,yb in dev_dl:
        xb,xp,yb=xb.to(device), xp.to(device) ,yb.to(device) #move to Gpu
        predictions =self(xb,xp)
        Y=torch.cat((Y,predictions.cpu()),0)
        val_loss=self.loss(predictions.squeeze(), yb.to(torch.float))
        epoch_loss+=val_loss
      predictions=Y
      # predictions=self(X_dev_target.to(device),X_dev_parent.to(device))
      rounded=torch.round(predictions).squeeze().cpu().detach().numpy()
      predictions=predictions.cpu().detach().numpy()
      epoch_Auc=roc_auc_score(Y_dev.cpu().numpy(),predictions)
      epoch_accuracy=accuracy_score(Y_dev.cpu().numpy(),rounded)
      tn, fp, fn, tp =confusion_matrix(Y_dev.cpu().numpy(),rounded,labels=[0,1]).ravel()
      epoch_precision+=tp/(tp+fp)
      epoch_recall+=tp/(tp+fn)
      epoch_F1+=2*((epoch_precision*epoch_recall)/(epoch_precision+epoch_recall))
      return epoch_loss,epoch_Auc,epoch_accuracy,epoch_recall,epoch_precision,epoch_F1
      #print(epoch_loss,epoch_Auc,epoch_accuracy)
      #return epoch_loss,epoch_Auc,epoch_accuracy



  def fit(self, train, dev,optimizer, pretrained_embeddings, class_weights={0: 1, 1: 1}):
    texts = train.text if not self.augmented_vocabulary else train.text + train.parent
    self.tokenizer.fit_on_texts(texts)
    self.vocab_size = len(self.tokenizer.word_index) + 1
    print('Vocabulary Size: %d' % self.vocab_size)
    X_train_target,X_train_parent=self.text_process(train.text,train.parent)
    Y_train=torch.from_numpy(train.label.to_numpy())
    X_dev_target,X_dev_parent=self.text_process(dev.text,dev.parent)
    Y_dev=torch.from_numpy(dev.label.to_numpy())
    early_stopping = EarlyStopping(patience=self.patience, verbose=True)     #EARLYY STOPING
    self.load_embeddings(pretrained_embeddings)
    pos = sum(train.label)
    neg = len(train.label)-pos
    bias = np.log(pos/neg)
    print(bias,"BIASSSS")
    bias=torch.tensor(bias)
    #self.init_weights(bias.to(torch.float))
    with torch.no_grad():
      self.out.bias=torch.nn.Parameter(bias.to(torch.float)) # set bias of last dense layer log(T/N)
    self.out.bias.requires_grad_(False)
    i=1 #for progress bar
    if torch.cuda.is_available():
      device = torch.device("cuda:0")  # you can continue going on here, like cuda:1 cuda:2....etc. 
      print("Running on the GPU")
    else:
      device = torch.device("cpu")
      print("Running on the CPU")
    self.to(device)
    for epoch in range(self.n_epochs):
      print('\nEpoch: %d/%d' % (epoch + 1, self.n_epochs))
      kbar = pkbar.Kbar(target=self.n_epochs, width=10)
      kbar_val=pkbar.Kbar(target=self.n_epochs, width=10)
      epoch_loss,epoch_Auc,epoch_accuracy,epoch_recall,epoch_precision,epoch_F1=self.trainin(X_train_target,X_train_parent,Y_train,optimizer,device)
      kbar.update(i, values=[("loss",epoch_loss), ("accuracy",epoch_accuracy),("AUC_score",epoch_Auc),("precision",epoch_precision),("recall",epoch_recall),("F1",epoch_F1)])
      val_loss,val_AUC_score,val_accuracy,val_recall,val_precision,val_F1=self.evaluate(X_dev_target,X_dev_parent,Y_dev,device)
      print("Val auc score in epoch ",epoch+1, ":",val_AUC_score)
      early_stopping(val_AUC_score,self)   
      if early_stopping.early_stop: #check for early stopping
        print("Early stopping")
        print("_________________________________________________-")
        break
      kbar_val.update(i,values=[("val_loss",val_loss), ("val_accuracy",val_accuracy),("val_AUC_score",val_AUC_score),("val_precision",val_precision),("val_recall",val_recall),("val_F1",val_F1)])
      i+=1

  def predict(self, test):
    test_target,test_parent=self.text_process(test.text,test.parent)
    predictions = self(test_target.to(torch.device("cuda:0")),test_parent.to(torch.device("cuda:0")))
    return predictions


# **Monte Carlo Validation**

In [0]:
def MC_Validation(dataset,k=5):
  avgscore=0
  for i in range(k):
    model= LSTM_CLF(vocab_size=size,n_epochs=20)
    #model=LSTM_IC1_CLF(vocab_size=size,n_epochs=10)
    optimizer=optim.Adam(model.parameters(), lr=1e-03)

    # train @N models
    X_train, X_test = train_test_split(dataset, test_size=0.4,random_state=i)
    #X_train, X_val = train_test_split(X_train,test_size=0.25,random_state=i) # 0.25 x 0.8 = 0.2
    XC_train,X_test=train_test_split(dataC, test_size=0.2,random_state=i) # test dataset with C to train and test
    XC_train,X_val= train_test_split(XC_train,test_size=0.25,random_state=i) # 0.25 x 0.8 = 0.2

    # train @C models
    # X_train, X_test = train_test_split(dataset, test_size=0.4,random_state=i)
    # #X_train, X_val = train_test_split(X_train,test_size=0.25,random_state=i) # 0.25 x 0.8 = 0.2
    # X_val,X_test=train_test_split(X_test, test_size=0.5,random_state=i) # test dataset with C to train and test

    # Perform MC Validation
    model.fit(X_train,X_val,optimizer,pretrained_embeddings=embeddings)  #bert_weights="bert_weights.h5"
    preds=model.predict(X_test)#X_test.text.to_numpy())
    preds=preds.squeeze().cpu().detach().numpy()
    gold=X_test.label
    print("\n__________________________________\n") 
    score = roc_auc_score(gold, preds)
    print("AUC score in ",i+1," fold ",score)
    print("\n__________________________________") #to see results
    avgscore+=score
  avgscore/=k
  return avgscore


## **Load Dataset and Embeddings**

In [7]:
dataN=pd.read_csv("dataset/oc.csv",header=0)
dataC=pd.read_csv("dataset/wc.csv",header=0)
size=Compute_Vocab_Size(dataN)
embeddings=load_embeddings_index()



Vocabulary Size: 21658


# Seperate toxic comments for the rest

In [55]:
#Get toxic comments and give them always to train set
toxics=dataN.loc[dataN['label']==1]
Non_toxics=dataN[dataN.label != 1]
print(Non_toxics.shape)
# Non_toxics=dataN.loc[dataN['label']==0]
#Non_toxics=Non_toxics[0:59]
# #print(Non_toxics.label.head(100))
# frames = [toxics,Non_toxics]
# data = pd.concat(frames)
#print(data.shape)


(9941, 4)


# Run 5-fold MC Validation(Make sure all toxic comments are always in training set)

In [0]:

# toxics=dataN.loc[dataN['label']==1]
# #toxics=toxics[0:5]
# #print(toxics.label.head(100))
# # print(toxics.label.head(5))
# Non_toxics=dataN.loc[dataN['label']==0]
# Non_toxics=Non_toxics[0:59]
# #print(Non_toxics.label.head(100))
# frames = [toxics,Non_toxics]
# data = pd.concat(frames)
# avgscore=0
# print(data.shape)


#Make 5-fold Monte carlo cross Validation (Train set has always all the toxic comments of @N dataset)

avgscore=0
for i in range(5):
  model= LSTM_CLF(vocab_size=size,n_epochs=20)
  #model=LSTM_IC1_CLF(vocab_size=size,n_epochs=10)
  optimizer=optim.Adam(model.parameters(), lr=1e-03)
  # train @N models
  X_train, X_val = train_test_split(Non_toxics, test_size=0.4023,random_state=0)
  #Give to train set all toxic comments
  frames = [toxics,X_train]
  X_train = pd.concat(frames)
  XC_train,X_test=train_test_split(dataC, test_size=0.2,random_state=i) # test dataset with C to train and test
  XC_train,X_val= train_test_split(XC_train,test_size=0.25,random_state=i) # 0.25 x 0.8 = 0.2
  model.fit(X_train,X_val,optimizer,pretrained_embeddings=embeddings)  #bert_weights="bert_weights.h5"
  preds=model.predict(X_test)
  preds=preds.squeeze().cpu().detach().numpy()
  #preds=torch.round(preds).squeeze().cpu().detach().numpy()
  gold=X_test.label
  print("\n__________________________________\n") 
  score = roc_auc_score(gold, preds)
  print("AUC score in ",i+1," fold ",score)
  print("\n__________________________________") #to see results
  avgscore+=score
result=avgscore/5
print("Average AUC score over 5fold MC validation is ",result) 
  

# **Stratified Split And MC(5-fold)**

In [8]:
#Make stratified split in @N dataset for train data

sss = StratifiedShuffleSplit(n_splits=5, test_size=0.4)   #random_state=0)
X=dataN.text.to_numpy()
y=dataN.label.to_numpy()
train=list(sss.split(X,y))

#Make stratified split in @C dataset for val and test data
sss_Val = StratifiedShuffleSplit(n_splits=1, test_size=0.4)    #random_state=0)
X_val=dataC.text.to_numpy()
y_val=dataC.label.to_numpy()
val_and_test=list(sss_Val.split(X_val,y_val))
train_index,test_index = val_and_test[0]
X_train, X_val_test = X_val[train_index], X_val[test_index]
y_train, y_val_test = y_val[train_index], y_val[test_index]

# Now make stratified split in 40% of C dataset for val(20%) and test(20%)
sss_Val= StratifiedShuffleSplit(n_splits=5, test_size=0.5)#, random_state=0)
val_and_test=list( sss_Val.split(X_val_test,y_val_test))
avgscore=0
for i in range(5):
  train_index, _ = train[i]
  val_index , test_index = val_and_test[i]
  X_train,y_train=X[train_index],y[train_index]
  X_val,y_val=X_val_test[val_index],y_val_test[val_index]
  X_test,y_test=X_val_test[test_index],y_val_test[test_index]
  
  # Create X_train , X_val and X_test Dataframes
  X_train=pd.DataFrame({'text': X_train,'label': y_train})
  X_val=pd.DataFrame({'text': X_val,'label': y_val})
  X_test=pd.DataFrame({'text': X_test,'label': y_test})
  print(X_train.loc[X_train['label']==1].shape)
  # print(X_val.shape)
  # print(X_test.shape)
  model= LSTM_CLF(vocab_size=size,n_epochs=50)
  #model=LSTM_IC1_CLF(vocab_size=size,n_epochs=10)
  optimizer=optim.Adam(model.parameters(), lr=1e-03)
  model.fit(X_train,X_val,optimizer,pretrained_embeddings=embeddings)  #bert_weights="bert_weights.h5"
  preds=model.predict(X_test)
  preds=preds.squeeze().cpu().detach().numpy()
  #preds=torch.round(preds).squeeze().cpu().detach().numpy()
  gold=X_test.label.to_numpy()
  print("\n__________________________________\n") 
  score = roc_auc_score(gold, preds)
  print("AUC score in ",i+1," fold ",score)
  print("\n__________________________________") #to see results
  avgscore+=score
result=avgscore/5
print("Average AUC score over 5fold MC validation is ",result) 
  
  




(35, 2)
Vocabulary Size: 15790
-5.138316273042602 BIASSSS
Running on the GPU

Epoch: 1/50
 1/50 [..........] - ETA: 2:25 - loss: 28.5768 - accuracy: 0.3678 - AUC_score: 0.5649 - precision: 0.0071 - recall: 0.7714 - F1: 0.0140

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  1 : 0.4764720812182741
Validation loss decreased (inf --> 0.476472).  Saving model ...
 1/50 [..........] - ETA: 2:49 - val_loss: 3.5933 - val_accuracy: 0.0150 - val_AUC_score: 0.4765 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 2/50
 2/50 [..........] - ETA: 1:03 - loss: 8.1823 - accuracy: 0.0058 - AUC_score: 0.4939 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  2 : 0.4745769881556684
EarlyStopping counter: 1 out of 3
 2/50 [..........] - ETA: 1:14 - val_loss: 3.3387 - val_accuracy: 0.0150 - val_AUC_score: 0.4746 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 3/50
 3/50 [..........] - ETA: 42s - loss: 8.1515 - accuracy: 0.0058 - AUC_score: 0.5333 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  3 : 0.47117597292724195
EarlyStopping counter: 2 out of 3
 3/50 [..........] - ETA: 49s - val_loss: 3.1029 - val_accuracy: 0.0150 - val_AUC_score: 0.4712 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 4/50
 4/50 [..........] - ETA: 31s - loss: 8.1568 - accuracy: 0.0058 - AUC_score: 0.5066 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  4 : 0.45671742808798654
EarlyStopping counter: 3 out of 3
Early stopping
_________________________________________________-

__________________________________

AUC score in  1  fold  0.41884940778341795

__________________________________
(35, 2)
Vocabulary Size: 15884
-5.138316273042602 BIASSSS
Running on the GPU

Epoch: 1/50
 1/50 [..........] - ETA: 2:12 - loss: 29.5088 - accuracy: 0.3643 - AUC_score: 0.4831 - precision: 0.0058 - recall: 0.6286 - F1: 0.0114

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  1 : 0.5515736040609137
Validation loss decreased (inf --> 0.551574).  Saving model ...
 1/50 [..........] - ETA: 2:35 - val_loss: 3.6000 - val_accuracy: 0.0150 - val_AUC_score: 0.5516 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 2/50
 2/50 [..........] - ETA: 1:04 - loss: 8.1658 - accuracy: 0.0058 - AUC_score: 0.4874 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  2 : 0.5380033840947547
EarlyStopping counter: 1 out of 3
 2/50 [..........] - ETA: 1:15 - val_loss: 3.4243 - val_accuracy: 0.0150 - val_AUC_score: 0.5380 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 3/50
 3/50 [..........] - ETA: 42s - loss: 8.1741 - accuracy: 0.0058 - AUC_score: 0.4652 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  3 : 0.5375042301184433
EarlyStopping counter: 2 out of 3
 3/50 [..........] - ETA: 49s - val_loss: 3.1733 - val_accuracy: 0.0150 - val_AUC_score: 0.5375 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 4/50
 4/50 [..........] - ETA: 31s - loss: 8.1650 - accuracy: 0.0058 - AUC_score: 0.5087 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  4 : 0.534864636209814
EarlyStopping counter: 3 out of 3
Early stopping
_________________________________________________-

__________________________________

AUC score in  2  fold  0.43263113367174283

__________________________________
(35, 2)
Vocabulary Size: 15803
-5.138316273042602 BIASSSS
Running on the GPU

Epoch: 1/50
 1/50 [..........] - ETA: 2:11 - loss: 28.4763 - accuracy: 0.3460 - AUC_score: 0.4788 - precision: 0.0061 - recall: 0.6857 - F1: 0.0121

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  1 : 0.5150084602368866
Validation loss decreased (inf --> 0.515008).  Saving model ...
 1/50 [..........] - ETA: 2:35 - val_loss: 3.3635 - val_accuracy: 0.0150 - val_AUC_score: 0.5150 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 2/50
 2/50 [..........] - ETA: 1:05 - loss: 8.1730 - accuracy: 0.0058 - AUC_score: 0.5349 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  2 : 0.5090439932318105
EarlyStopping counter: 1 out of 3
 2/50 [..........] - ETA: 1:15 - val_loss: 3.2404 - val_accuracy: 0.0150 - val_AUC_score: 0.5090 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 3/50
 3/50 [..........] - ETA: 42s - loss: 8.1670 - accuracy: 0.0058 - AUC_score: 0.4759 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  3 : 0.5110321489001692
EarlyStopping counter: 2 out of 3
 3/50 [..........] - ETA: 49s - val_loss: 3.4383 - val_accuracy: 0.0150 - val_AUC_score: 0.5110 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 4/50
 4/50 [..........] - ETA: 30s - loss: 8.1610 - accuracy: 0.0058 - AUC_score: 0.5161 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  4 : 0.5091624365482234
EarlyStopping counter: 3 out of 3
Early stopping
_________________________________________________-

__________________________________

AUC score in  3  fold  0.526497461928934

__________________________________
(35, 2)
Vocabulary Size: 15680
-5.138316273042602 BIASSSS
Running on the GPU

Epoch: 1/50
 1/50 [..........] - ETA: 2:10 - loss: 29.0758 - accuracy: 0.3473 - AUC_score: 0.5930 - precision: 0.0071 - recall: 0.8000 - F1: 0.0141

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  1 : 0.48771573604060914
Validation loss decreased (inf --> 0.487716).  Saving model ...
 1/50 [..........] - ETA: 2:35 - val_loss: 3.2882 - val_accuracy: 0.0150 - val_AUC_score: 0.4877 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 2/50
 2/50 [..........] - ETA: 1:04 - loss: 8.1579 - accuracy: 0.0058 - AUC_score: 0.5249 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  2 : 0.4811082910321489
EarlyStopping counter: 1 out of 3
 2/50 [..........] - ETA: 1:15 - val_loss: 3.3082 - val_accuracy: 0.0150 - val_AUC_score: 0.4811 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 3/50
 3/50 [..........] - ETA: 42s - loss: 8.1553 - accuracy: 0.0058 - AUC_score: 0.4220 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  3 : 0.47976311336717425
EarlyStopping counter: 2 out of 3
 3/50 [..........] - ETA: 49s - val_loss: 3.3331 - val_accuracy: 0.0150 - val_AUC_score: 0.4798 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 4/50
 4/50 [..........] - ETA: 30s - loss: 8.1625 - accuracy: 0.0058 - AUC_score: 0.4508 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  4 : 0.48019458544839255
EarlyStopping counter: 3 out of 3
Early stopping
_________________________________________________-

__________________________________

AUC score in  4  fold  0.5151184433164129

__________________________________
(35, 2)
Vocabulary Size: 15809
-5.138316273042602 BIASSSS
Running on the GPU

Epoch: 1/50
 1/50 [..........] - ETA: 2:13 - loss: 29.4926 - accuracy: 0.3735 - AUC_score: 0.5759 - precision: 0.0074 - recall: 0.8000 - F1: 0.0147

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  1 : 0.4775380710659898
Validation loss decreased (inf --> 0.477538).  Saving model ...
 1/50 [..........] - ETA: 2:36 - val_loss: 3.6346 - val_accuracy: 0.0150 - val_AUC_score: 0.4775 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 2/50
 2/50 [..........] - ETA: 1:04 - loss: 8.1734 - accuracy: 0.0058 - AUC_score: 0.5042 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  2 : 0.4818104906937394
Validation loss decreased (0.477538 --> 0.481810).  Saving model ...
 2/50 [..........] - ETA: 1:16 - val_loss: 3.2420 - val_accuracy: 0.0150 - val_AUC_score: 0.4818 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 3/50
 3/50 [..........] - ETA: 42s - loss: 8.1571 - accuracy: 0.0058 - AUC_score: 0.5026 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  3 : 0.4836548223350254
Validation loss decreased (0.481810 --> 0.483655).  Saving model ...
 3/50 [..........] - ETA: 50s - val_loss: 3.2698 - val_accuracy: 0.0150 - val_AUC_score: 0.4837 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 4/50
 4/50 [..........] - ETA: 30s - loss: 8.1715 - accuracy: 0.0058 - AUC_score: 0.4857 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  4 : 0.4929949238578679
Validation loss decreased (0.483655 --> 0.492995).  Saving model ...
 4/50 [..........] - ETA: 36s - val_loss: 3.2896 - val_accuracy: 0.0150 - val_AUC_score: 0.4930 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 5/50
 5/50 [>.........] - ETA: 24s - loss: 8.1692 - accuracy: 0.0058 - AUC_score: 0.4780 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  5 : 0.48606598984771565
EarlyStopping counter: 1 out of 3
 5/50 [>.........] - ETA: 28s - val_loss: 3.1060 - val_accuracy: 0.0150 - val_AUC_score: 0.4861 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 6/50
 6/50 [>.........] - ETA: 20s - loss: 8.1903 - accuracy: 0.0058 - AUC_score: 0.5662 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  6 : 0.4859898477157361
EarlyStopping counter: 2 out of 3
 6/50 [>.........] - ETA: 23s - val_loss: 3.1543 - val_accuracy: 0.0150 - val_AUC_score: 0.4860 - val_precision: 0.0150 - val_recall: 1.0000 - val_F1: 0.0296

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)



Epoch: 7/50
 7/50 [>.........] - ETA: 16s - loss: 8.1585 - accuracy: 0.0058 - AUC_score: 0.4947 - precision: 0.0058 - recall: 1.0000 - F1: 0.0116

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Val auc score in epoch  7 : 0.4873181049069374
EarlyStopping counter: 3 out of 3
Early stopping
_________________________________________________-

__________________________________

AUC score in  5  fold  0.5395854483925551

__________________________________
Average AUC score over 5fold MC validation is  0.48653637901861246


# Create a balanced small dataset (with all toxic comments and try to overfit on it )

In [0]:
#Get toxic comments and give them always to train set
toxics=dataN.loc[dataN['label']==1]
Non_toxics=dataN[dataN.label != 1]
print(Non_toxics.shape)
# Non_toxics=dataN.loc[dataN['label']==0]
Non_toxics=Non_toxics[0:59]
# #print(Non_toxics.label.head(100))
frames = [toxics,Non_toxics]
data = pd.concat(frames)
print(data.shape)




avgscore=0
for i in range(5):
  model= LSTM_CLF(vocab_size=size,n_epochs=20)
  #model=LSTM_IC1_CLF(vocab_size=size,n_epochs=10)
  optimizer=optim.Adam(model.parameters(), lr=1e-03)
  X_train, X_val = train_test_split(data, test_size=0.2,random_state=i)
  model.fit(data,X_val,optimizer,pretrained_embeddings=embeddings)  #bert_weights="bert_weights.h5"
  preds=model.predict(data)
  preds=preds.cpu().detach().numpy()
  gold=data.label.to_numpy()
  print("\n__________________________________\n") 
  score = roc_auc_score(gold, preds)
  print("AUC score in ",i+1," fold ",score)
  print("\n__________________________________") #to see results
  avgscore+=score
result=avgscore/5
print("Average AUC score over 5fold MC validation is ",result) 

In [0]:
print("Average score after 5 MC is ",MC_Validation(dataset=dataN,k=5))