In [1]:
import json
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import spacy
from operator import itemgetter
import numpy as np
import io
import random
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import time
import torch.nn.functional as F

nlp = spacy.load('en_core_web_sm')

In [2]:
#Check if cuda is available
cuda = torch.cuda.is_available()
print('CUDA is', cuda)

num_workers = 8 if cuda else 0
print(num_workers)

CUDA is False
0


In [3]:
with io.open('../Data/glove.6B.50d.txt', 'r', encoding='utf8') as f:
    glove_file = f.read()
    
glove_sentences = glove_file.splitlines()
glove_vocab = {}
for sentence in glove_sentences:
    word = sentence.split()[0]
    embedding = np.array(sentence.split()[1:], dtype = float)
    glove_vocab[word] = embedding

In [4]:
f = open('../Data/furniture_cleaned-tagged_m.json',) 
data = json.load(f)

#CALCULATING AMBIGUITY SCORES IN IS ADJECTIVES
ambiguity_m = {}
for element in data[-1]:
    if element[3] == 'a':
        score = 0
    else:
        score = 1
    ambiguity_m[element[0]] = [element[2], score]
    
    
f = open('../Data/furniture_cleaned-tagged_a.json',) 
data = json.load(f)

#CALCULATING AMBIGUITY SCORES IN IS ADJECTIVES
ambiguity_a = {}
for element in data[-1]:
    if element[3] == 'a':
        score = 0
    else:
        score = 1
    ambiguity_a[element[0]] = [element[2], score]
    

In [5]:
#GIVING PREFERENCE TO AKSHAT'S LABELS. REVERSE THE ORDER TO GIVE PREFERENCE TO MANUEL'S LABELS
ambiguity = {}

for adj in ambiguity_a:
    if ambiguity_a[adj] !=0 and adj in glove_vocab:
        ambiguity[adj] = ambiguity_a[adj][1]
        
for adj in ambiguity_m:
    if ambiguity_m[adj] !=0 and adj not in ambiguity and adj in glove_vocab:
        ambiguity[adj] = ambiguity_m[adj][1]

In [6]:
all_data = []
for adj in ambiguity:
    all_data.append([glove_vocab[adj], ambiguity[adj]])
    
random.shuffle(all_data)
size = len(all_data)
training_data = all_data[:int(size*0.9)]
test_data = all_data[int(size*0.9):]


In [7]:
class MyDataset(Dataset):
    def __init__(self, X):
        self.X = X
        
    def __len__(self):
        return len(self.X)

    def __getitem__(self,index):

        return torch.from_numpy(self.X[index][0]).float(), self.X[index][1]

In [8]:
batch_size=8
train_dataset = MyDataset(training_data)
train_loader = DataLoader(train_dataset, shuffle = True, batch_size = batch_size)

test_dataset = MyDataset(test_data)
test_loader = DataLoader(test_dataset, shuffle = False, batch_size = batch_size)

In [18]:
class My_MLP_Model(nn.Module):
    def __init__(self):
        super(My_MLP_Model, self).__init__()
        #self.batchnorm1 = nn.BatchNorm1d(50)
        self.fc1 = nn.Linear(50, 128)
        
        #self.batchnorm2 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(128, 8)
        
        #self.batchnorm3 = nn.BatchNorm1d(32)
        #self.fc3 = nn.Linear(32, 8)
        
        #self.batchnorm_last = nn.BatchNorm1d(8)
        self.fc_last = nn.Linear(8, 2)
        

    def forward(self, x):
        #x = self.batchnorm1(x)
        x = F.relu(self.fc1(x))
        
        #x = self.batchnorm2(x)
        x = F.relu(self.fc2(x))
        
        #x = self.batchnorm3(x)
        #x = F.relu(self.fc3(x))
        
        #x = self.batchnorm_last(x)
        x = self.fc_last(x)

        return x

In [19]:
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()

    running_loss = 0.0
    
    start_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):   
        optimizer.zero_grad()   # .backward() accumulates gradients
        data = data.to(device)
        target = target.to(device) # all data & model on same device

        outputs = model(data)
        loss = criterion(outputs, target)
        running_loss += loss.item()

        loss.backward()
        optimizer.step()
    
    end_time = time.time()
    
    running_loss /= len(train_loader)
    print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')  
    return running_loss

In [20]:
def validate_model(model, validate_loader, criterion):
    with torch.no_grad():
        model.eval()

        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0

        for batch_idx, (data, target) in enumerate(validate_loader):   
            data = data.to(device)
            target = target.to(device)

            outputs = model(data)

            _, predicted = torch.max(outputs.data, 1)
            total_predictions += target.size(0)
            correct_predictions += (predicted == target).sum().item()

            loss = criterion(outputs, target).detach()
            running_loss += loss.item()


        running_loss /= len(validate_loader)
        acc = (correct_predictions/total_predictions)*100.0
        print('Testing Loss: ', running_loss)
        print('Testing Accuracy: ', acc, '%')
        return running_loss, acc






In [22]:
model = My_MLP_Model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
#optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
device = torch.device("cuda" if cuda else "cpu")
model.to(device)

for i in range(10):
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    test_loss, test_acc = validate_model(model, test_loader, criterion)

    print('='*20)

Training Loss:  0.6910996456940969 Time:  0.18021798133850098 s
Testing Loss:  0.6602045595645905
Testing Accuracy:  67.5 %
Training Loss:  0.5768260634607739 Time:  0.1516423225402832 s
Testing Loss:  0.6029305189847947
Testing Accuracy:  66.25 %
Training Loss:  0.5077319701512655 Time:  0.17156195640563965 s
Testing Loss:  0.5970222055912018
Testing Accuracy:  71.25 %
Training Loss:  0.47853511687782074 Time:  0.22916913032531738 s
Testing Loss:  0.6012938678264618
Testing Accuracy:  72.5 %
Training Loss:  0.45412671259707876 Time:  0.16925406455993652 s
Testing Loss:  0.6081951022148132
Testing Accuracy:  68.75 %
Training Loss:  0.42786228044165503 Time:  0.19726204872131348 s
Testing Loss:  0.6110174238681794
Testing Accuracy:  70.0 %
Training Loss:  0.4029181234538555 Time:  0.12730121612548828 s
Testing Loss:  0.6284371584653854
Testing Accuracy:  65.0 %
Training Loss:  0.37369384202692246 Time:  0.15577125549316406 s
Testing Loss:  0.6692878693342209
Testing Accuracy:  70.0 %
Tr