In [1]:
!pip install PyDrive

import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from __future__ import division
from __future__ import print_function

import os
import random

import torch
import torch.nn as nn
import torch.optim as optim
from treelstm import Constants
from treelstm import Vocab
from treelstm import HATEDataset
from treelstm import Metrics
from treelstm import utils
from config import parse_args

from tqdm import tqdm

import torch
import torch.nn.functional as F

from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score


In [30]:
class ChildSumTreeLSTM(nn.Module):
    def __init__(self, in_dim, mem_dim):
        super(ChildSumTreeLSTM, self).__init__()
        self.in_dim = in_dim
        self.mem_dim = mem_dim
        self.ioux = nn.Linear(self.in_dim, 3 * self.mem_dim)
        self.iouh = nn.Linear(self.mem_dim, 3 * self.mem_dim)
        self.fx = nn.Linear(self.in_dim, self.mem_dim)
        self.fh = nn.Linear(self.mem_dim, self.mem_dim)

    def node_forward(self, inputs, child_c, child_h):
        child_h_sum = torch.sum(child_h, dim=0, keepdim=True)

        iou = self.ioux(inputs) + self.iouh(child_h_sum)
        i, o, u = torch.split(iou, iou.size(1) // 3, dim=1)
        i, o, u = torch.sigmoid(i), torch.sigmoid(o), torch.tanh(u)

        f = torch.sigmoid(
            self.fh(child_h) +
            self.fx(inputs).repeat(len(child_h), 1)
        )
        fc = torch.mul(f, child_c)

        c = torch.mul(i, u) + torch.sum(fc, dim=0, keepdim=True)
        h = torch.mul(o, torch.tanh(c))
        return c, h

    def forward(self, tree, inputs):
        for idx in range(tree.num_children):
            self.forward(tree.children[idx], inputs)

        if tree.num_children == 0:
            child_c = inputs[0].detach().new(1, self.mem_dim).fill_(0.).requires_grad_()
            child_h = inputs[0].detach().new(1, self.mem_dim).fill_(0.).requires_grad_()
        else:
            child_c, child_h = zip(* map(lambda x: x.state, tree.children))
            child_c, child_h = torch.cat(child_c, dim=0), torch.cat(child_h, dim=0)
        tree.state = self.node_forward(inputs[tree.idx], child_c, child_h)
        return tree.state


In [31]:
class Predict(nn.Module):
    def __init__(self, mem_dim,hidden_dim,num_classes):
        super(Predict, self).__init__()
        self.mem_dim = mem_dim
        self.hidden_dim = hidden_dim 
        self.num_classes = num_classes
        self.wh = nn.Linear(self.mem_dim, self.hidden_dim)
        self.wp = nn.Linear(self.hidden_dim, self.num_classes)
    def forward(self,vec):
        out = F.relu(self.wh(vec))
        out = self.wp(out)
        return out



class SimilarityTreeLSTM(nn.Module):
    def __init__(self, vocab_size, in_dim, mem_dim, hidden_dim, num_classes, sparsity, freeze):
        super(SimilarityTreeLSTM, self).__init__()
        self.emb = nn.Embedding(vocab_size, in_dim, padding_idx=Constants.PAD, sparse=sparsity)
        if freeze:
            self.emb.weight.requires_grad = False
        self.childsumtreelstm = ChildSumTreeLSTM(in_dim, mem_dim)
        self.predict = Predict(mem_dim, hidden_dim, num_classes)

    def forward(self, ltree, linputs):
        linputs = self.emb(linputs)
        lstate, lhidden = self.childsumtreelstm(ltree, linputs)
        output = self.predict(lstate)
        return output

## Initialization

In [8]:
class args_init():
    def __init__(self):
        self.seed=123
        self.data='hate_data/'
        self.glove='/content/drive/MyDrive/'
        self.save='checkpoints/attention/'
        self.expname='test'
        self.input_dim=300
        self.mem_dim=150
        self.hidden_dim=50
        self.num_classes=3
        self.epochs=15
        self.batch_size=15
        self.lr=0.01
        self.sparse=False
        self.wd=1e-4
        self.freeze_embed=False
        self.optim='adagrad'
        self.cuda=True

In [9]:
class Trainer(object):
    def __init__(self, args, model, criterion, optimizer, device):
        super(Trainer, self).__init__()
        self.args = args
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.device = device
        self.epoch = 0

    def train(self, dataset):
        error_count=0
        self.model.train()
        self.optimizer.zero_grad()
        total_loss = 0.0
        indices = torch.randperm(len(dataset), dtype=torch.long, device='cpu')
        for idx in range(len(dataset)):
            tree, sentence, label, tweet = dataset[indices[idx]]
            target = utils.map_label_to_target(label, 3)
            sentence = sentence.to(self.device)
            target = target.to(self.device)
            output = self.model(tree, sentence)
            loss = self.criterion(output, target)
            total_loss += loss.item()
            loss.backward()
            if idx % self.args.batch_size == 0 and idx > 0:
                self.optimizer.step()
                self.optimizer.zero_grad()

        self.epoch += 1
        return total_loss / len(dataset)
        
    def test(self, dataset):
        self.model.eval()
        with torch.no_grad():
            error_count=0
            total_loss = 0.0
            predictions = torch.zeros(len(dataset), dtype=torch.float, device='cpu')
            accuracy=0
            for idx in tqdm(range(len(dataset)), desc='Testing epoch  ' + str(self.epoch) + ''):
                ltree, linput, label,tweet = dataset[idx]
                target = utils.map_label_to_target(label, 3)
                linput= linput.to(self.device)
                target = target.to(self.device)
                output = self.model(ltree, linput)
                loss = self.criterion(output, target)
                total_loss += loss.item()
                output = output.squeeze().to('cpu')
                predictions[idx] = torch.argmax(F.softmax(output))
        return total_loss / len(dataset), predictions


In [10]:
args=args_init()

In [11]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch.manual_seed(args.seed)
random.seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True
if not os.path.exists(args.save):
    os.makedirs(args.save)

train_dir = os.path.join(args.data, 'train/')
dev_dir = os.path.join(args.data, 'dev/')
test_dir = os.path.join(args.data, 'test/')

hate_vocab_file = os.path.join(args.data, 'hate.vocab')
if not os.path.isfile(hate_vocab_file):
    token_files = [os.path.join(split, 'data.toks') for split in [train_dir, dev_dir, test_dir]]

    hate_vocab_file = os.path.join(args.data, 'hate.vocab')
    utils.build_vocab(token_files, hate_vocab_file)

vocab = Vocab(filename=hate_vocab_file,
              data=[Constants.PAD_WORD, Constants.UNK_WORD,
                    Constants.BOS_WORD, Constants.EOS_WORD])
print('==> HATE vocabulary size : %d ' % vocab.size())

train_file = os.path.join(args.data, 'hate_train.pth')
if os.path.isfile(train_file):
    train_dataset = torch.load(train_file)
else:
    train_dataset = HATEDataset(train_dir, vocab, args.num_classes)
    torch.save(train_dataset, train_file)
print('==> Size of train data   : %d ' % len(train_dataset))
dev_file = os.path.join(args.data, 'hate_dev.pth')
if os.path.isfile(dev_file):
    dev_dataset = torch.load(dev_file)
else:
    dev_dataset = HATEDataset(dev_dir, vocab, args.num_classes)
    torch.save(dev_dataset, dev_file)
print('==> Size of dev data     : %d ' % len(dev_dataset))
test_file = os.path.join(args.data, 'hate_test.pth')
if os.path.isfile(test_file):
    test_dataset = torch.load(test_file)
else:
    test_dataset = HATEDataset(test_dir, vocab, args.num_classes)
    torch.save(test_dataset, test_file)
print('==> Size of test data    : %d ' % len(test_dataset))

==> HATE vocabulary size : 35288 
==> Size of train data   : 19826 
==> Size of dev data     : 2478 
==> Size of test data    : 2479 


## MODEL

In [12]:
model = SimilarityTreeLSTM(
        vocab.size(),
        args.input_dim,
        args.mem_dim,
        args.hidden_dim,
        args.num_classes,
        args.sparse,
        args.freeze_embed)
criterion =nn.BCEWithLogitsLoss()
model.cuda()

SimilarityTreeLSTM(
  (emb): Embedding(35288, 300, padding_idx=0)
  (childsumtreelstm): ChildSumTreeLSTM(
    (ioux): Linear(in_features=300, out_features=450, bias=True)
    (iouh): Linear(in_features=150, out_features=450, bias=True)
    (fx): Linear(in_features=300, out_features=150, bias=True)
    (fh): Linear(in_features=150, out_features=150, bias=True)
  )
  (predict): Predict(
    (wh): Linear(in_features=150, out_features=50, bias=True)
    (wp): Linear(in_features=50, out_features=3, bias=True)
  )
)

## GloVe embedding

In [13]:
emb_file = os.path.join(args.data, 'hate_embed.pth')
if os.path.isfile(emb_file):
    emb = torch.load(emb_file)
else:
    glove_vocab, glove_emb = utils.load_word_vectors(
        os.path.join(args.glove, 'glove.840B.300d'))
    print('==> GLOVE vocabulary size: %d ' % glove_vocab.size())
    emb = torch.zeros(vocab.size(), glove_emb.size(1), dtype=torch.float, device=device)
    emb.normal_(0, 0.05)
    for idx, item in enumerate([Constants.PAD_WORD, Constants.UNK_WORD,
                                Constants.BOS_WORD, Constants.EOS_WORD]):
        emb[idx].zero_()
    for word in vocab.labelToIdx.keys():
        if glove_vocab.getIndex(word):
            emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(word)]
    torch.save(emb, emb_file)

model.emb.weight.data.copy_(emb)

model.to(device), criterion.to(device)
if args.optim == 'adam':
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()), lr=args.lr, weight_decay=args.wd)
elif args.optim == 'adagrad':
    optimizer = optim.Adagrad(filter(lambda p: p.requires_grad,
                                     model.parameters()), lr=args.lr, weight_decay=args.wd)
elif args.optim == 'sgd':
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()), lr=args.lr, weight_decay=args.wd)
metrics = Metrics(args.num_classes)

## Training the model

In [14]:
trainer = Trainer(args, model, criterion, optimizer, device)

for epoch in range(args.epochs):
        train_loss = trainer.train(train_dataset)
        train_loss, train_pred = trainer.test(train_dataset)
        dev_loss, dev_pred = trainer.test(dev_dataset)
        test_loss, test_pred = trainer.test(test_dataset)

        train_pearson = metrics.pearson(train_pred, train_dataset.labels)
        train_mse = metrics.mse(train_pred, train_dataset.labels)
        print('==> Epoch {}, Train \tLoss: {}\tPearson: {}\tMSE: {}'.format(
            epoch, train_loss, train_pearson, train_mse))
        dev_pearson = metrics.pearson(dev_pred, dev_dataset.labels)
        dev_mse = metrics.mse(dev_pred, dev_dataset.labels)
        print('==> Epoch {}, Dev \tLoss: {}\tPearson: {}\tMSE: {}'.format(
            epoch, dev_loss, dev_pearson, dev_mse))
        test_pearson = metrics.pearson(test_pred, test_dataset.labels)
        test_mse = metrics.mse(test_pred, test_dataset.labels)
        print('==> Epoch {}, Test \tLoss: {}\tPearson: {}\tMSE: {}'.format(
            epoch, test_loss, test_pearson, test_mse))


Testing epoch  1: 100%|██████████| 19826/19826 [02:50<00:00, 116.34it/s]
Testing epoch  1: 100%|██████████| 2478/2478 [00:21<00:00, 115.30it/s]
Testing epoch  1: 100%|██████████| 2479/2479 [00:21<00:00, 117.17it/s]


==> Epoch 0, Train 	Loss: 0.13215222259597081	Pearson: 0.784631609916687	MSE: 0.08720871806144714
==> Epoch 0, Dev 	Loss: 0.15293994492043994	Pearson: 0.7302547693252563	MSE: 0.10936238616704941
==> Epoch 0, Test 	Loss: 0.15426593654181453	Pearson: 0.7007637619972229	MSE: 0.11133521795272827


Testing epoch  2: 100%|██████████| 19826/19826 [02:52<00:00, 114.80it/s]
Testing epoch  2: 100%|██████████| 2478/2478 [00:21<00:00, 116.98it/s]
Testing epoch  2: 100%|██████████| 2479/2479 [00:21<00:00, 115.77it/s]


==> Epoch 1, Train 	Loss: 0.0944246005025635	Pearson: 0.8578209280967712	MSE: 0.058660343289375305
==> Epoch 1, Dev 	Loss: 0.15667790403545814	Pearson: 0.7313480377197266	MSE: 0.11138014495372772
==> Epoch 1, Test 	Loss: 0.16516757347616087	Pearson: 0.7036624550819397	MSE: 0.11173860728740692


Testing epoch  3: 100%|██████████| 19826/19826 [02:47<00:00, 118.07it/s]
Testing epoch  3: 100%|██████████| 2478/2478 [00:20<00:00, 120.14it/s]
Testing epoch  3: 100%|██████████| 2479/2479 [00:21<00:00, 115.21it/s]


==> Epoch 2, Train 	Loss: 0.06412495219072116	Pearson: 0.9289300441741943	MSE: 0.029910219833254814
==> Epoch 2, Dev 	Loss: 0.1751469620333564	Pearson: 0.7169526815414429	MSE: 0.11743341386318207
==> Epoch 2, Test 	Loss: 0.18674602221367725	Pearson: 0.6715011596679688	MSE: 0.12626059353351593


Testing epoch  4: 100%|██████████| 19826/19826 [02:52<00:00, 114.98it/s]
Testing epoch  4: 100%|██████████| 2478/2478 [00:20<00:00, 118.38it/s]
Testing epoch  4: 100%|██████████| 2479/2479 [00:21<00:00, 115.15it/s]


==> Epoch 3, Train 	Loss: 0.03760693609738361	Pearson: 0.9487018585205078	MSE: 0.021587813273072243
==> Epoch 3, Dev 	Loss: 0.2506877738595704	Pearson: 0.7116430401802063	MSE: 0.114205002784729
==> Epoch 3, Test 	Loss: 0.25923866282269836	Pearson: 0.7085686922073364	MSE: 0.10609116405248642


Testing epoch  5: 100%|██████████| 19826/19826 [02:50<00:00, 115.98it/s]
Testing epoch  5: 100%|██████████| 2478/2478 [00:21<00:00, 115.90it/s]
Testing epoch  5: 100%|██████████| 2479/2479 [00:21<00:00, 117.24it/s]


==> Epoch 4, Train 	Loss: 0.01719328267453565	Pearson: 0.9789620637893677	MSE: 0.009028548374772072
==> Epoch 4, Dev 	Loss: 0.265472170210779	Pearson: 0.700954258441925	MSE: 0.1283292919397354
==> Epoch 4, Test 	Loss: 0.276041755492336	Pearson: 0.6845922470092773	MSE: 0.12182331830263138


Testing epoch  6: 100%|██████████| 19826/19826 [02:50<00:00, 116.29it/s]
Testing epoch  6: 100%|██████████| 2478/2478 [00:20<00:00, 118.82it/s]
Testing epoch  6: 100%|██████████| 2479/2479 [00:21<00:00, 116.72it/s]


==> Epoch 5, Train 	Loss: 0.01048581303891821	Pearson: 0.9887640476226807	MSE: 0.004791687708348036
==> Epoch 5, Dev 	Loss: 0.3143124163691485	Pearson: 0.6973065733909607	MSE: 0.12711864709854126
==> Epoch 5, Test 	Loss: 0.3266737072731509	Pearson: 0.6999360918998718	MSE: 0.11294876784086227


Testing epoch  7: 100%|██████████| 19826/19826 [02:49<00:00, 116.88it/s]
Testing epoch  7: 100%|██████████| 2478/2478 [00:21<00:00, 114.52it/s]
Testing epoch  7: 100%|██████████| 2479/2479 [00:20<00:00, 119.44it/s]


==> Epoch 6, Train 	Loss: 0.00860040486665525	Pearson: 0.9901694655418396	MSE: 0.004186422098428011
==> Epoch 6, Dev 	Loss: 0.3509296525690115	Pearson: 0.717474639415741	MSE: 0.11581920832395554
==> Epoch 6, Test 	Loss: 0.3649024811287166	Pearson: 0.7082690000534058	MSE: 0.10891488194465637


Testing epoch  8: 100%|██████████| 19826/19826 [02:56<00:00, 112.22it/s]
Testing epoch  8: 100%|██████████| 2478/2478 [00:21<00:00, 114.86it/s]
Testing epoch  8: 100%|██████████| 2479/2479 [00:21<00:00, 114.47it/s]


==> Epoch 7, Train 	Loss: 0.005699498035953817	Pearson: 0.9937180876731873	MSE: 0.002673257375136018
==> Epoch 7, Dev 	Loss: 0.3715274472135388	Pearson: 0.7110110521316528	MSE: 0.12106537818908691
==> Epoch 7, Test 	Loss: 0.38152389967894473	Pearson: 0.6873738169670105	MSE: 0.11940298229455948


Testing epoch  9: 100%|██████████| 19826/19826 [02:52<00:00, 114.97it/s]
Testing epoch  9: 100%|██████████| 2478/2478 [00:20<00:00, 118.23it/s]
Testing epoch  9: 100%|██████████| 2479/2479 [00:21<00:00, 117.09it/s]


==> Epoch 8, Train 	Loss: 0.0048992095222901546	Pearson: 0.9941954016685486	MSE: 0.002471502171829343
==> Epoch 8, Dev 	Loss: 0.4080363549427375	Pearson: 0.6972534656524658	MSE: 0.12953995168209076
==> Epoch 8, Test 	Loss: 0.42409087808546886	Pearson: 0.6908644437789917	MSE: 0.12061315029859543


Testing epoch  10: 100%|██████████| 19826/19826 [02:47<00:00, 118.08it/s]
Testing epoch  10: 100%|██████████| 2478/2478 [00:21<00:00, 117.78it/s]
Testing epoch  10: 100%|██████████| 2479/2479 [00:20<00:00, 119.94it/s]


==> Epoch 9, Train 	Loss: 0.003641408437803716	Pearson: 0.9956061840057373	MSE: 0.0018662362126633525
==> Epoch 9, Dev 	Loss: 0.43157868645756375	Pearson: 0.6981258392333984	MSE: 0.12953995168209076
==> Epoch 9, Test 	Loss: 0.4419457027933511	Pearson: 0.6703190803527832	MSE: 0.12868091464042664


Testing epoch  11: 100%|██████████| 19826/19826 [02:50<00:00, 116.24it/s]
Testing epoch  11: 100%|██████████| 2478/2478 [00:20<00:00, 118.43it/s]
Testing epoch  11: 100%|██████████| 2479/2479 [00:20<00:00, 118.56it/s]


==> Epoch 10, Train 	Loss: 0.00331669196103469	Pearson: 0.996779203414917	MSE: 0.0013618480879813433
==> Epoch 10, Dev 	Loss: 0.44576713797293643	Pearson: 0.6978472471237183	MSE: 0.1287328451871872
==> Epoch 10, Test 	Loss: 0.4619130476200637	Pearson: 0.6738057732582092	MSE: 0.12626059353351593


Testing epoch  12: 100%|██████████| 19826/19826 [02:49<00:00, 116.81it/s]
Testing epoch  12: 100%|██████████| 2478/2478 [00:20<00:00, 119.41it/s]
Testing epoch  12: 100%|██████████| 2479/2479 [00:21<00:00, 117.58it/s]


==> Epoch 11, Train 	Loss: 0.0025736114531559233	Pearson: 0.9963154792785645	MSE: 0.0015636032912880182
==> Epoch 11, Dev 	Loss: 0.4714903377533766	Pearson: 0.6904826760292053	MSE: 0.1307506114244461
==> Epoch 11, Test 	Loss: 0.4843685806119319	Pearson: 0.6742327809333801	MSE: 0.12545381486415863


Testing epoch  13: 100%|██████████| 19826/19826 [02:50<00:00, 116.16it/s]
Testing epoch  13: 100%|██████████| 2478/2478 [00:20<00:00, 118.99it/s]
Testing epoch  13: 100%|██████████| 2479/2479 [00:20<00:00, 119.80it/s]


==> Epoch 12, Train 	Loss: 0.0026029717977020454	Pearson: 0.9973704814910889	MSE: 0.0011096539674326777
==> Epoch 12, Dev 	Loss: 0.49593172587505757	Pearson: 0.684248685836792	MSE: 0.13680388033390045
==> Epoch 12, Test 	Loss: 0.5124826312455948	Pearson: 0.6657966375350952	MSE: 0.13190802931785583


Testing epoch  14: 100%|██████████| 19826/19826 [02:49<00:00, 116.95it/s]
Testing epoch  14: 100%|██████████| 2478/2478 [00:20<00:00, 118.13it/s]
Testing epoch  14: 100%|██████████| 2479/2479 [00:20<00:00, 119.60it/s]


==> Epoch 13, Train 	Loss: 0.0018887222253474976	Pearson: 0.9981882572174072	MSE: 0.0007565822452306747
==> Epoch 13, Dev 	Loss: 0.5109500752250246	Pearson: 0.6869653463363647	MSE: 0.13236480951309204
==> Epoch 13, Test 	Loss: 0.526395234477791	Pearson: 0.6721993684768677	MSE: 0.12545381486415863


Testing epoch  15: 100%|██████████| 19826/19826 [02:50<00:00, 116.25it/s]
Testing epoch  15: 100%|██████████| 2478/2478 [00:21<00:00, 116.12it/s]
Testing epoch  15: 100%|██████████| 2479/2479 [00:20<00:00, 118.20it/s]

==> Epoch 14, Train 	Loss: 0.0017768173070414148	Pearson: 0.9973676204681396	MSE: 0.0011096539674326777
==> Epoch 14, Dev 	Loss: 0.5152169809444292	Pearson: 0.6798332929611206	MSE: 0.13720741868019104
==> Epoch 14, Test 	Loss: 0.5315357883341573	Pearson: 0.6726406812667847	MSE: 0.12787413597106934





## Running the model on test set

In [15]:
test_loss,predictions = trainer.test(test_dataset)


Testing epoch  15: 100%|██████████| 2479/2479 [00:21<00:00, 115.06it/s]


## Accuracy of the model

In [29]:
f = f1_score( test_dataset.labels,predictions, average='weighted')
print("F1 Score of tree LSTM: ", f)
print('')

p = precision_score(test_dataset.labels, predictions, average='weighted')
print("Precision Score of tree LSTM: ", p)
print('')

r = recall_score(test_dataset.labels, predictions, average='weighted')
print("Recall Score of tree LSTM: ", r)
print('')

a = accuracy_score(test_dataset.labels, predictions)
print("Accuracy Score of tree LSTM: ", a)

F1 Score of tree LSTM:  0.8950890054890392

Precision Score of tree LSTM:  0.8939502496933397

Recall Score of tree LSTM:  0.8963291649858814

Accuracy Score of tree LSTM:  0.8963291649858814
