# Machine Learning Challenge
## Sentiment Analysis for Text => Toxic Comment Classification

In [1]:
__author__ = "Carolina Jiménez Moreno <cjimenezm0794@gmail.com>"
__version__ = "1.0.0"

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Load dataset

In [3]:
import pandas as pd

In [4]:
train_df = pd.read_csv('/content/drive/MyDrive/train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/test.csv')
test_labels = pd.read_csv('/content/drive/MyDrive/test_labels.csv')

In [5]:
test_df = test_df.merge(test_labels,how='inner',on='id')
test_df = test_df[test_df['toxic']!=-1]

In [6]:
# Let's label datasets as train and test and then divide
test_df['type'] = 'test'
train_df['type'] = 'train'

In [7]:
# Let's concatenate datasets to use the words from both files as the universe
train_df = pd.concat([train_df,test_df],ignore_index=True)

In [8]:
import nltk
from nltk.stem import WordNetLemmatizer
import re

nltk.download('omw-1.4')
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    # Lemmatizing the texts
    # removing aphostrophe words
    text = text.lower()
    text = re.sub(r"what's", "what is ",str(text)) 
    text = re.sub(r"'s", " ", str(text)) 
    text = re.sub(r"'ve", " have ", str(text)) 
    text = re.sub(r"can't", "cannot ", str(text)) 
    text = re.sub(r"ain't", 'is not', str(text)) 
    text = re.sub(r"won't", 'will not', str(text)) 
    text = re.sub(r"n't", " not ", str(text)) 
    text = re.sub(r"i'm", "i am ", str(text)) 
    text = re.sub(r"'re", " are ", str(text)) 
    text = re.sub(r"'d", " would ", str(text)) 
    text = re.sub(r"'ll", " will ", str(text)) 
    text = re.sub(r"'scuse", " excuse ", str(text)) 
    text = re.sub('W', ' ', str(text)) 
    text = re.sub(' +', ' ', str(text))
    # Remove hyperlinks
    text = re.sub(r"https?://\S+|www\.\S+", ' ', str(text))
    # Remove punctuations, numbers and special characters
    text = re.sub('[^A-Za-z0-9]+', ' ', str(text))
    text = lemmatizer.lemmatize(text)
    text = text.strip(' ')
    return text

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [9]:
# Let's clean up the comment_text in train
train_df['comment_text'] = train_df['comment_text'].map(lambda comment : clean_text(comment))

In [10]:
# Let's create splitted field with lists of the comment words
train_df['splitted'] = train_df['comment_text'].apply(lambda x :x.split()  )

In [11]:
# Let's remove single letter words from the list
def remove_singles(x):
  return [i for i in x if len(i)>1]
train_df['splitted'] = train_df['splitted'].apply(lambda x :  remove_singles(x))

In [12]:
# Let's remove the numbers
def remove_numbers(x):
  return [i for i in x if not i.isdigit()]
train_df['splitted'] = train_df['splitted'].apply(lambda x :  remove_numbers(x))

In [13]:
# Let's remove what has letters and numbers
def remove_alphanumbers(my_list):
  return [x for x in my_list if not any(c.isdigit() for c in x)]
train_df['splitted'] = train_df['splitted'].apply(lambda x: remove_alphanumbers(x))

In [14]:
# Let's create a field with the length of each list of words
train_df['len'] = train_df['splitted'].apply(lambda x :len(x))

In [15]:
train_df

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate,type,splitted,len
0,0000997932d777bf,explanation why the edits made under my userna...,0,0,0,0,0,0,train,"[explanation, why, the, edits, made, under, my...",44
1,000103f0d9cfb60f,d aww he matches this background colour i am s...,0,0,0,0,0,0,train,"[aww, he, matches, this, background, colour, a...",14
2,000113f07ec002fd,hey man i am really not trying to edit war it ...,0,0,0,0,0,0,train,"[hey, man, am, really, not, trying, to, edit, ...",42
3,0001b41b1c6bb37e,more i cannot make any real suggestions on imp...,0,0,0,0,0,0,train,"[more, cannot, make, any, real, suggestions, o...",105
4,0001d958c54c6e35,you sir are my hero any chance you remember wh...,0,0,0,0,0,0,train,"[you, sir, are, my, hero, any, chance, you, re...",13
...,...,...,...,...,...,...,...,...,...,...,...
223544,fff8f64043129fa2,jerome i see you never got around to this i m ...,0,0,0,0,0,0,test,"[jerome, see, you, never, got, around, to, thi...",101
223545,fff9d70fe0722906,lucky bastard heh you are famous now i kida en...,0,0,0,0,0,0,test,"[lucky, bastard, heh, you, are, famous, now, k...",11
223546,fffa8a11c4378854,shame on you all you want to speak about gays ...,0,0,0,0,0,0,test,"[shame, on, you, all, you, want, to, speak, ab...",14
223547,fffac2a094c8e0e2,mel gibson is a nazi bitch who makes shitty mo...,1,0,1,0,1,0,test,"[mel, gibson, is, nazi, bitch, who, makes, shi...",25


## Create dictionary

In [16]:
import torch.nn as nn
from torch.utils.data import DataLoader,TensorDataset,Dataset
import torchvision
import torch
from IPython.display import clear_output

In [17]:
# Filter comments that have garbage
train_df = train_df[train_df['len']>0]

In [18]:
# Let's create a list with all the words in the comments
word_list = []
for i in train_df.index:
  for w in train_df.loc[i,'splitted']:
    word_list.append(w)

In [19]:
# Create a df with universe of words
words_df = pd.DataFrame({'w':word_list})

In [20]:
# Remove repeated words
word_corpus = list(words_df['w'].unique())

In [21]:
# Sort the list
word_corpus.sort()

In [22]:
# Create a dictionary of the form 'word': 'number'
word_dict = {word_corpus[i]:i for i in range(len(word_corpus))}

In [23]:
word_dict

{'aa': 0,
 'aaa': 1,
 'aaaa': 2,
 'aaaaa': 3,
 'aaaaaaaa': 4,
 'aaaaaaaaaaaaaaaaaaaaaaaaa': 5,
 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaalllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll': 6,
 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaahhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh': 7,
 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaany': 8,
 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaahhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh': 9,
 'aaaaaaaaaaaaaaaaaaaaaaaaaaahhhhhhhhhhhhhhhhhhhhhhhhhhhhh': 10,
 'aaaaaaaaaaaaaaaaaaaalllllllllllllllllllllll': 11,
 'aaaaaaaaaaaaaaaaaaggggggggggggggggggggggggggggggggggggggggggggggg': 12,
 'aaaaaaaaaaaahahahahahahaaaaaaaaaaaaaahahahahahaaaaaaaaaaaaaaahahahahaaaaaaaaaaaaaaaaaaaaaaa': 13,
 'aaaaaaaaaaarrrrrrrrrggggggg': 14,
 'aaaaaaaaaah

In [54]:
# A column is created with same list of words but this time coded with numbers
def coded_words(x,word_dict):
    return [word_dict[w] for w in x if w in word_dict]

In [25]:
train_df['coded'] = train_df['splitted'].apply(lambda x: coded_words(x,word_dict))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['coded'] = train_df['splitted'].apply(lambda x: coded_words(x,word_dict))


In [26]:
train_df

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate,type,splitted,len,coded
0,0000997932d777bf,explanation why the edits made under my userna...,0,0,0,0,0,0,train,"[explanation, why, the, edits, made, under, my...",44,"[62365, 205816, 186656, 55444, 111408, 195291,..."
1,000103f0d9cfb60f,d aww he matches this background colour i am s...,0,0,0,0,0,0,train,"[aww, he, matches, this, background, colour, a...",14,"[14338, 80784, 114535, 187579, 14924, 35721, 6..."
2,000113f07ec002fd,hey man i am really not trying to edit war it ...,0,0,0,0,0,0,train,"[hey, man, am, really, not, trying, to, edit, ...",42,"[82303, 112707, 6225, 153670, 129959, 192451, ..."
3,0001b41b1c6bb37e,more i cannot make any real suggestions on imp...,0,0,0,0,0,0,train,"[more, cannot, make, any, real, suggestions, o...",105,"[121816, 28011, 112173, 8798, 153588, 180313, ..."
4,0001d958c54c6e35,you sir are my hero any chance you remember wh...,0,0,0,0,0,0,train,"[you, sir, are, my, hero, any, chance, you, re...",13,"[211069, 171270, 10398, 124141, 82021, 8798, 3..."
...,...,...,...,...,...,...,...,...,...,...,...,...
223544,fff8f64043129fa2,jerome i see you never got around to this i m ...,0,0,0,0,0,0,test,"[jerome, see, you, never, got, around, to, thi...",101,"[95471, 166338, 211069, 127341, 75620, 10971, ..."
223545,fff9d70fe0722906,lucky bastard heh you are famous now i kida en...,0,0,0,0,0,0,test,"[lucky, bastard, heh, you, are, famous, now, k...",11,"[110315, 16875, 81229, 211069, 10398, 63747, 1..."
223546,fffa8a11c4378854,shame on you all you want to speak about gays ...,0,0,0,0,0,0,test,"[shame, on, you, all, you, want, to, speak, ab...",14,"[168278, 133374, 211069, 5424, 211069, 203435,..."
223547,fffac2a094c8e0e2,mel gibson is a nazi bitch who makes shitty mo...,1,0,1,0,1,0,test,"[mel, gibson, is, nazi, bitch, who, makes, shi...",25,"[116523, 73578, 92901, 125908, 20894, 205675, ..."


## Configure train and test set

In [27]:
# 10-column embed model
embedding = nn.Embedding(len(word_dict), 10)

In [28]:
# Let's filter train and test again
df_train = train_df[train_df['type']=='train']
df_test = train_df[train_df['type']=='test']

In [29]:
# Get labels of each group
Y_train = torch.tensor(torch.tensor(df_train[df_train.columns[2:8]].to_numpy()))
Y_test = torch.tensor(torch.tensor(df_test[df_test.columns[2:8]].to_numpy()))

  Y_train = torch.tensor(torch.tensor(df_train[df_train.columns[2:8]].to_numpy()))
  Y_test = torch.tensor(torch.tensor(df_test[df_test.columns[2:8]].to_numpy()))


In [30]:
Y_test

tensor([[0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        ...,
        [0, 0, 0, 0, 0, 0],
        [1, 0, 1, 0, 1, 0],
        [0, 0, 0, 0, 0, 0]])

In [31]:
# Embed columns are created
df_train['vect_tensor'] = df_train['coded'].apply(lambda x: embedding(torch.tensor(x).long()))
df_test['vect_tensor'] = df_test['coded'].apply(lambda x: embedding(torch.tensor(x).long()))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['vect_tensor'] = df_train['coded'].apply(lambda x: embedding(torch.tensor(x).long()))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['vect_tensor'] = df_test['coded'].apply(lambda x: embedding(torch.tensor(x).long()))


In [32]:
df_test

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate,type,splitted,len,coded,vect_tensor
159571,0001ea8717f6de06,thank you for understanding i think very highl...,0,0,0,0,0,0,test,"[thank, you, for, understanding, think, very, ...",15,"[186525, 211069, 67795, 195460, 187476, 200636...","[[tensor(0.8067, grad_fn=<UnbindBackward0>), t..."
159572,000247e83dcc1211,dear god this site is horrible,0,0,0,0,0,0,test,"[dear, god, this, site, is, horrible]",6,"[44625, 74791, 187579, 171388, 92901, 84655]","[[tensor(0.8508, grad_fn=<UnbindBackward0>), t..."
159573,0002f87b16116a7f,somebody will invariably try to add religion r...,0,0,0,0,0,0,test,"[somebody, will, invariably, try, to, add, rel...",69,"[174049, 207037, 92111, 192434, 189276, 1993, ...","[[tensor(0.4611, grad_fn=<UnbindBackward0>), t..."
159574,0003e1cccfd5a40a,it says it right there that it is a type the t...,0,0,0,0,0,0,test,"[it, says, it, right, there, that, it, is, typ...",81,"[93472, 164338, 93472, 158979, 187099, 186592,...","[[tensor(-0.7552, grad_fn=<UnbindBackward0>), ..."
159575,00059ace3e3e9a53,before adding a new product to the list make s...,0,0,0,0,0,0,test,"[before, adding, new, product, to, the, list, ...",51,"[17994, 2044, 127382, 147905, 189276, 186656, ...","[[tensor(-0.2818, grad_fn=<UnbindBackward0>), ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
223544,fff8f64043129fa2,jerome i see you never got around to this i m ...,0,0,0,0,0,0,test,"[jerome, see, you, never, got, around, to, thi...",101,"[95471, 166338, 211069, 127341, 75620, 10971, ...","[[tensor(0.7361, grad_fn=<UnbindBackward0>), t..."
223545,fff9d70fe0722906,lucky bastard heh you are famous now i kida en...,0,0,0,0,0,0,test,"[lucky, bastard, heh, you, are, famous, now, k...",11,"[110315, 16875, 81229, 211069, 10398, 63747, 1...","[[tensor(0.8081, grad_fn=<UnbindBackward0>), t..."
223546,fffa8a11c4378854,shame on you all you want to speak about gays ...,0,0,0,0,0,0,test,"[shame, on, you, all, you, want, to, speak, ab...",14,"[168278, 133374, 211069, 5424, 211069, 203435,...","[[tensor(0.2971, grad_fn=<UnbindBackward0>), t..."
223547,fffac2a094c8e0e2,mel gibson is a nazi bitch who makes shitty mo...,1,0,1,0,1,0,test,"[mel, gibson, is, nazi, bitch, who, makes, shi...",25,"[116523, 73578, 92901, 125908, 20894, 205675, ...","[[tensor(1.3340, grad_fn=<UnbindBackward0>), t..."


In [33]:
# Function that calculates average vector

def average_tensor(x):
  tensor_d = torch.zeros((1,10))
  for t in x:
    tensor_d += t
  return tensor_d/x.shape[0]

### Create train tensor

In [36]:
# Training tensor (it's the average of all words of each comment)

X_train_tensor = torch.zeros((df_train.shape[0],10))
j = 0
for i in df_train.index:
  dtensor = df_train.loc[i,'vect_tensor']
  X_train_tensor[j,:] = average_tensor(dtensor)
  j+=1

### Tensors must be saved in order to continue with the execution because the training tensor creation process consumes a lot of RAM

In [37]:
torch.save(X_train_tensor,'/content/drive/MyDrive/ML/train_tensor.pt') # guardar tensor

In [38]:
torch.save(Y_train,'/content/drive/MyDrive/ML/train_labels.pt')# guardar tensor

# Session must be restarted and rerun everything except the training tensor creation

## Create test tensor

In [35]:
# Let's create test tensor
X_test_tensor = torch.zeros((df_test.shape[0],10))
j = 0
for i in df_test.index:
  dtensor = df_test.loc[i,'vect_tensor']
  X_test_tensor[j,:] = average_tensor(dtensor)
  j += 1

In [36]:
# Save tensors just in case
torch.save(X_test_tensor,'/content/drive/MyDrive/ML/test_tensor.pt')
torch.save(Y_test,'/content/drive/MyDrive/ML/test_labels.pt')

## Load all tensors

In [37]:
X_train_tensor=torch.load('/content/drive/MyDrive/ML/train_tensor.pt')
Y_train_tensor=torch.load('/content/drive/MyDrive/ML/train_labels.pt')
X_test_tensor=torch.load('/content/drive/MyDrive/ML/test_tensor.pt')
Y_test_tensor=torch.load('/content/drive/MyDrive/ML/test_labels.pt')

In [38]:
ref = Y_train_tensor.sum(axis=1)

In [39]:
import numpy as np

# Get 20 thousand random samples of 0,0,0,0,0,0 (indices only)
i =0
chosen = []
while len(chosen)<=20000:
    n = np.random.choice([i for i in range(len(Y_train_tensor))])
    if ref[n] == 0 and n not in chosen:
        chosen.append(n)
        #print(len(chosen))

In [40]:
import pandas as pd

pd.DataFrame({'n':chosen}).to_csv('/content/drive/MyDrive/ML/ceros.csv')

In [41]:
# Indices of those that are not only 0,0,0,0,0,0 are saved
chosen_2 = []
for i in range(len(ref)):
    if ref[i] >= 1:
        chosen_2.append(i)

In [42]:
# Let's join the indices
final_chosen = chosen+chosen_2
final_chosen.sort()

In [43]:
# Get the balanced dataset
X_train_tensor = X_train_tensor[final_chosen]
Y_train_tensor = Y_train_tensor[final_chosen]

## Train model

In [44]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


### Model

In [45]:
# Base model: network of a neuron with 6 outputs
class base_line(nn.Module):
  def __init__(self,fin,out):
    super(base_line,self).__init__()
    self.out = out
    self.fin = fin
    self.fc1 = nn.Linear(self.fin,2048)
    self.fc2 = nn.Linear(2048,1024)
    self.fc3 = nn.Linear(1024,512)
    self.relu = nn.ReLU()
    self.fc4 = nn.Linear(512,self.out)
    self.sigmoid = nn.Sigmoid()

  def forward(self,x):
    out = self.fc1(x)
    out = self.fc2(out)
    out = self.fc3(out)
    our = self.relu(out)
    out = self.fc4(out)
    out = self.sigmoid(out)
    return out

In [46]:
import numpy as np
import math

# Let's check that there are no NaNs in train
for i in range(len(X_train_tensor)):
  if math.isnan(X_train_tensor[i].mean()):
    print(i)

In [47]:
# Let's check that there are no NaNs in test
for i in range(len(X_test_tensor)):
  if math.isnan(X_test_tensor[i].mean()):
    print(i)

In [48]:
X_train_tensor = X_train_tensor.detach()
Y_train_tensor = Y_train_tensor.detach()

In [49]:
X_train_tensor.shape

torch.Size([36221, 10])

In [50]:
# Create datasets
X_train = TensorDataset(X_train_tensor,Y_train_tensor)
X_test = TensorDataset(X_test_tensor,Y_test_tensor)

In [51]:
# Create dataloaders
train_dataloader = DataLoader(X_train,batch_size=32,shuffle=True)
test_dataloader = DataLoader(X_test,batch_size=32,shuffle=True)

In [52]:
# Define optimizing model and times
model = base_line(10,6).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.BCELoss()

In [53]:
# Training
epochs = 30
max_acc = 0.75

for e in range(epochs):
  loss_run_train = []
  acc_run_train = []

  print(f'Epoch {e}/{epochs}')
  model = model.train()
  for x,y in train_dataloader:
    x = x.to(device)
    y = y.to(device)
    z = model(x)
    
    loss = loss_fn(z,y.float())
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    loss_run_train.append(loss.item())

    preds = []
    for t in z:
      preds.append([1 if i>0.5 else 0 for i in t])

    corrects = 0
    for p,yo in zip(y,torch.tensor(preds).to(device)):
      if torch.sum(p==yo).item()==6:
        #print(p,yo)
        corrects += 1
    #print(f'corrects:{corrects}/{y.shape[0]}')
    
    acc_run_train.append(corrects/y.shape[0])
  
  model.eval()
  loss_run_test = []
  acc_run_test = []
  for x,y in test_dataloader:
      x = x.to(device)
      y = y.to(device)
      z = model(x)
      loss = loss_fn(z,y.float())
      loss_run_test.append(loss.item())
      preds = []

      # las predicciones se cuentan correctas cuando todo el tensor de salida de 6 posiciones es identico al ground truth
      ######
      for t in z:
          preds.append([1 if i>0.5 else 0 for i in t])
          corrects=0
      for p,yo in zip(y,torch.tensor(preds).to(device)):
          if torch.sum(p==yo).item()==6:
              #print(p,yo)
              corrects += 1
      ########
      acc_run_test.append(corrects/y.shape[0])
  if np.mean(acc_run_test)>max_acc:
      print('Saving best model!')
      max_acc = np.mean(acc_run_test)
      torch.save(model.state_dict(), f'/content/drive/MyDrive/ML/model_{e}_{round(100*max_acc,2)}.pth')

  print(f' + [train] loss:{np.mean(loss_run_train)} acc:{np.mean(acc_run_train)}')
  print(f' - [test] loss:{np.mean(loss_run_test)} acc:{np.mean(acc_run_test)}')

Epoch 0/30
Saving best model!
 + [train] loss:0.3547021203243269 acc:0.5027482256000975
 - [test] loss:0.23396374876763418 acc:0.812993073047859
Epoch 1/30
Saving best model!
 + [train] loss:0.34894296363680605 acc:0.5159381853905203
 - [test] loss:0.19348287638323733 acc:0.8577078085642317
Epoch 2/30
 + [train] loss:0.34952715656624667 acc:0.5127082825636652
 - [test] loss:0.2083922236181927 acc:0.8260125944584383
Epoch 3/30
Saving best model!
 + [train] loss:0.34779263676107985 acc:0.5187901714999391
 - [test] loss:0.19161069544421636 acc:0.8635887909319899
Epoch 4/30
 + [train] loss:0.3466790549641363 acc:0.5211033645059095
 - [test] loss:0.22428721631654264 acc:0.8159370277078085
Epoch 5/30
 + [train] loss:0.3467622251634884 acc:0.5182599457779944
 - [test] loss:0.21372695878260684 acc:0.8120730478589422
Epoch 6/30
 + [train] loss:0.3462338454152586 acc:0.5202894632630681
 - [test] loss:0.21625586884748424 acc:0.8380673803526448
Epoch 7/30
 + [train] loss:0.3456705396751422 acc:0.5

# Eval model

In [55]:
def converter(words):
    return average_tensor( embedding(torch.tensor(coded_words(words.split(), word_dict))))

In [56]:
text = "If ya not... still fu*k u"
text = clean_text(text)
out = converter(text)
res = model(out.to(device))
res

tensor([[0.4134, 0.0573, 0.2934, 0.0075, 0.2001, 0.0347]], device='cuda:0',
       grad_fn=<SigmoidBackward0>)

In [59]:
res[0][0].item()

0.41337257623672485

In [60]:
res[0][1].item()

0.057300958782434464

In [62]:
import json

with open("/content/drive/MyDrive/ML/word_dict.json", "w") as write_file:
    json.dump(word_dict, write_file, indent=4)