In [1]:
import json
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import spacy
from operator import itemgetter
import numpy as np
import io
import random
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import time
import torch.nn.functional as F

nlp = spacy.load('en_core_web_sm')

In [2]:
#Check if cuda is available
cuda = torch.cuda.is_available()
print('CUDA is', cuda)

num_workers = 8 if cuda else 0
print(num_workers)

CUDA is False
0


In [3]:
with io.open('../Data/glove.6B.50d.txt', 'r', encoding='utf8') as f:
    glove_file = f.read()
    
glove_sentences = glove_file.splitlines()
glove_vocab = {}
for sentence in glove_sentences:
    word = sentence.split()[0]
    embedding = np.array(sentence.split()[1:], dtype = float)
    glove_vocab[word] = embedding

In [138]:
f = open('../Data/furniture_cleaned-tagged_m.json',)
#f = open('../Data/wearable_tech_cleaned-tagged_m.json',)
data = json.load(f)

#CALCULATING AMBIGUITY SCORES IN IS ADJECTIVES
ambiguity_m = {}
for element in data[-1]:
    if element[3] == 'a':
        score = 0
    else:
        score = 1
    ambiguity_m[element[0]] = [element[2], score]
    
    
f = open('../Data/furniture_cleaned-tagged_a.json',)
#f = open('../Data/wearable_tech_cleaned-tagged_a.json',)
data = json.load(f)

#CALCULATING AMBIGUITY SCORES IN IS ADJECTIVES
ambiguity_a = {}
for element in data[-1]:
    if element[3] == 'a':
        score = 0
    else:
        score = 1
    ambiguity_a[element[0]] = [element[2], score]
    

In [147]:
#GIVING PREFERENCE TO AKSHAT'S LABELS. REVERSE THE ORDER TO GIVE PREFERENCE TO MANUEL'S LABELS
ambiguity = {}

for adj in ambiguity_m:
    if int(ambiguity_m[adj][0]) !=0 and adj in glove_vocab:
        ambiguity[adj] = ambiguity_m[adj][1]
        
for adj in ambiguity_a:
    if int(ambiguity_a[adj][0]) !=0 and adj not in ambiguity and adj in glove_vocab:
        ambiguity[adj] = ambiguity_a[adj][1]

In [148]:
all_data = []
for adj in ambiguity:
    all_data.append([glove_vocab[adj], ambiguity[adj]])
    
random.shuffle(all_data)
size = len(all_data)
training_data = all_data[:int(size*0.9)]
test_data = all_data[int(size*0.9):]


In [152]:
class MyDataset(Dataset):
    def __init__(self, X):
        self.X = X
        
    def __len__(self):
        return len(self.X)

    def __getitem__(self,index):
        
        element = self.X[index][0]
        element = element.reshape(1, element.shape[0])
        label = self.X[index][1]
        
        return torch.from_numpy(element).float(), label

In [153]:
batch_size=8
train_dataset = MyDataset(training_data)
train_loader = DataLoader(train_dataset, shuffle = True, batch_size = batch_size)

test_dataset = MyDataset(test_data)
test_loader = DataLoader(test_dataset, shuffle = False, batch_size = batch_size)

In [154]:
class My_CNN_Model(nn.Module):
    def __init__(self):
        super(My_CNN_Model, self).__init__()
        #self.batchnorm1 = nn.BatchNorm1d(200)
        self.conv1 = nn.Conv1d(1, 32, 10, padding = 0)
        self.conv2 = nn.Conv1d(32, 64, 10, padding = 0)
        self.fc1 = nn.Linear(64 * 32, 32)
        self.fc2 = nn.Linear(32, 8)
        self.fc3 = nn.Linear(8, 2)
        

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.view(-1, 64 * 32)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

In [155]:
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()

    running_loss = 0.0
    
    start_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):   
        optimizer.zero_grad()   # .backward() accumulates gradients
        data = data.to(device)
        target = target.to(device) # all data & model on same device

        outputs = model(data)
        loss = criterion(outputs, target)
        running_loss += loss.item()

        loss.backward()
        optimizer.step()
    
    end_time = time.time()
    
    running_loss /= len(train_loader)
    print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')  
    return running_loss

In [156]:
def validate_model(model, validate_loader, criterion):
    with torch.no_grad():
        model.eval()

        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0

        for batch_idx, (data, target) in enumerate(validate_loader):   
            data = data.to(device)
            target = target.to(device)

            outputs = model(data)

            _, predicted = torch.max(outputs.data, 1)
            total_predictions += target.size(0)
            correct_predictions += (predicted == target).sum().item()

            loss = criterion(outputs, target).detach()
            running_loss += loss.item()


        running_loss /= len(validate_loader)
        acc = (correct_predictions/total_predictions)*100.0
        print('Testing Loss: ', running_loss)
        print('Testing Accuracy: ', acc, '%')
        return running_loss, acc






In [161]:
model = My_CNN_Model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
#optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
device = torch.device("cuda" if cuda else "cpu")
model.to(device)

for i in range(8):
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    test_loss, test_acc = validate_model(model, test_loader, criterion)

    print('='*20)

Training Loss:  0.5693191820628023 Time:  0.36072397232055664 s
Testing Loss:  0.431238012181388
Testing Accuracy:  86.15384615384616 %
Training Loss:  0.41585348786352433 Time:  0.3719050884246826 s
Testing Loss:  0.3980816884173287
Testing Accuracy:  87.6923076923077 %
Training Loss:  0.3775771201065142 Time:  0.5120980739593506 s
Testing Loss:  0.3258104510605335
Testing Accuracy:  87.6923076923077 %
Training Loss:  0.3531993076919693 Time:  0.45446324348449707 s
Testing Loss:  0.35277900306714904
Testing Accuracy:  89.23076923076924 %
Training Loss:  0.3483517834891195 Time:  0.38564133644104004 s
Testing Loss:  0.364839735130469
Testing Accuracy:  84.61538461538461 %
Training Loss:  0.3301712757308189 Time:  0.37030792236328125 s
Testing Loss:  0.3924524440533585
Testing Accuracy:  81.53846153846153 %
Training Loss:  0.3355761585272338 Time:  0.4092741012573242 s
Testing Loss:  0.3905705019003815
Testing Accuracy:  86.15384615384616 %
Training Loss:  0.33218459448177523 Time:  0.3

In [121]:
f = open('../Data/furniture_cleaned-tagged_m.json',) 
data = json.load(f)

#Find all adjectives used by authors and reporters that also have an ambiguity score and are relevant
adjectives = []
for article in range(len(data)-2):
    adjectives += data[article]['adj_quotes']
    adjectives += data[article]['adj_no_quotes']
    
adjectives = set(adjectives)#dataset for checking

In [128]:
model.eval()
for adj in adjectives:
    if adj in glove_vocab:
        inp = glove_vocab[adj]
        inp = inp.reshape(1, 1, inp.shape[0])
        output = model(torch.from_numpy(inp).float())
        print(adj, np.argmax(output.detach().numpy()))

icelandic 1
cozy 1
laude 1
scrumptious 1
flatter 1
planned 1
tiny 0
californian 1
discrete 1
devoid 1
sentient 1
feminine 1
duffle 0
ex 0
outlandish 1
exciting 1
french 1
semi 0
rough 0
collapsible 0
iridescent 1
even 1
tubular 0
animated 1
good 1
dove 0
archetypical 1
unfurling 1
unsuspecting 1
operational 1
weighted 0
furnishing 1
looped 0
unpleasant 1
porous 1
opaque 0
promising 1
footwear 0
focal 1
gestalt 1
parabolic 0
special 0
amazed 1
political 1
unpredictable 1
livable 1
spanish 1
exact 1
yearly 0
funny 1
notable 1
smoothness 1
spiny 0
atmospheric 1
stimulating 1
esteemed 1
prolific 1
sunny 0
dovetail 0
rotatable 1
160th 0
west 0
catholic 1
pedestal 0
subaltern 1
topical 1
orange 0
voluble 1
overt 1
passing 1
cooperative 1
modelled 1
objective 1
fifth 0
rewarding 1
characteristic 1
southern 1
wildest 1
cultural 1
disjointed 1
environmental 1
opalescent 1
nap 0
chic 1
convenient 1
medieval 1
irish 1
near 1
climactic 1
venetian 1
ill 1
corporate 1
intangible 1
promotional 0
outs

tedious 1
categorised 1
translate 1
anachronistic 1
ethereal 1
accepted 1
sugary 0
cleft 0
dry 0
disorderly 1
impressed 1
gilded 0
advanced 1
confidential 0
poured 1
picky 1
graphic 1
visionary 1
ready 1
impermeable 1
bad 0
various 0
taboo 1
patterned 0
horrible 1
foamed 1
alive 0
invented 0
dependent 1
gentle 1
familiar 1
royal 1
year 0
airtight 0
secondary 1
climbable 1
uneconomic 1
reversed 1
optimistic 1
inspirational 1
laborious 1
bentwood 0
assyrian 0
slovenian 1
perceptive 1
native 1
plush 1
3rd 0
chronograph 0
overburden 1
lean 0
joyous 1
concise 1
ironic 1
kenyan 1
challenging 1
cast 1
imbuing 1
continued 0
springy 1
semiprecious 1
sweetest 1
latvian 1
justified 1
unsurpassed 1
magic 1
nordic 1
harmful 1
pretty 1
fortuny 1
marked 1
crowded 1
revolutionary 1
approachable 1
susceptible 1
prime 1
durable 1
pleasing 1
integrated 1
hostile 1
subtle 1
bluetooth 0
experimental 1
mexican 1
retro 1
countersunk 1
blockbuster 0
outstretched 1
bench 0
lipped 0
cubic 1
pleated 0
final 0
de

sheer 1
layered 0
ferrari 0
eponymous 1
waterproof 0
release 0
poufs 1
mere 1
viscous 1
beaten 0
absorbent 1
cape 1
sanded 0
forceps 0
aristocratic 1
nero 0
horizontal 0
definitive 0
orthopaedic 0
discordant 1
mariposa 0
fragile 1
unrealized 1
lily 0
straight 0
defined 1
deeper 1
skateboarding 1
unbuilt 1
prickly 1
seasonal 0
veneer 1
emotive 1
depressing 1
terrestrial 1
couscous 0
utilitarian 1
understated 1
upright 1
sino 1
underway 1
tangled 0
generic 1
vous 1
unrestrained 1
contained 0
combine 0
fun 1
recent 0
tough 1
charitable 1
insane 1
hideous 1
deformable 1
planetary 1
sweet 1
polychromatic 1
flush 1
transformable 1
devoted 1
reduced 0
mosaic 0
patinated 1
supernatural 1
tropical 1
pino 0
characterful 1
vaulting 0
directional 1
olmsted 1
weighty 1
lucky 0
burly 1
splayed 0
childish 1
subsequent 0
alone 1
considerate 1
winglike 1
chromed 0
singaporean 1
arctic 1
egyptian 1
upturned 0
slim 0
oppressive 1
ambient 1
optional 1
gothic 1
elastic 0
padded 0
extendable 1
invisible 1
s

ongoing 0
endearing 1
covetable 1
correctional 1
active 1
thick 0
ancillary 0
flashy 1
infamous 0
hybrid 1
slanted 0
bevelled 1
processed 0
hazardous 1
delicate 1
academic 1
upwards 0
simple 1
modernist 1
english 1
asian 1
captive 1
illustrated 1
renewed 1
tiered 0
kentucky 0
weird 1
unfolded 1
hammock 0
future 1
hung 0
honoured 1
straighter 0
consumerist 1
exclusionary 1
oily 1
formative 1
reproductive 1
17th 0
lifestyle 1
equal 1
outermost 0
independent 0
significant 1
smoothed 0
woolen 0
meaningful 1
liquid 0
shorter 0
headrest 1
precise 1
intertwined 1
downloadable 1
minimum 1
incoming 1
fewer 1
sine 0
leading 0
exclusive 1
brash 1
fundraiser 1
terrorist 0
formation 1
daily 0
personable 1
limited 1
petite 1
liberal 1
circulatory 1
uninstalled 1
obsidian 0
ambiguous 1
executive 0
ensemble 1
feminist 1
sustainable 1
viable 1
broke 0
yellow 0
clear 1
quixotic 1
portuguese 1
kitschy 1
rubbery 0
macho 1
unified 1
squashed 0
solitary 1
caribe 0
lit 1
effective 1
degrading 1
nad 0
swarm 1

austrian 1
relaxing 1
humane 1
fantastical 1
estonian 1
colored 0
ripe 1
bogus 1
laminated 0
uppermost 1
unmissable 1
lead 0
umbilical 1
reclaimed 0
supplementary 1
biannual 0
beloved 1
idealised 1
atelier 1
brasserie 0
unrealised 1
sawdust 0
natural 1
nomadic 1
suppleness 1
responsive 1
titled 0
dedicated 1
freer 1
heated 1
married 0
negative 0
famous 1
plaited 1
silly 1
philanthropist 1
omnipresent 1
hubert 0
clearest 1
whitest 1
later 0
sculpt 0
vertical 0
spatial 1
phenolic 0
flying 0
formulaic 1
specialised 1
sloped 0
ritual 1
programmable 0
octagonal 0
fundamental 1
chubby 1
conversational 1
eating 1
nitrate 0
willing 1
enter 0
foldable 0
coherent 1
flax 0
proud 1
transportable 1
temporal 1
helmet 1
menial 1
female 1
abundant 1
tender 0
okay 0
slower 0
bourgeois 1
bulgarian 1
anarchic 1
gestural 1
curved 0
flask 0
exposed 1
newborn 1
upholstery 0
feverish 1
imperfect 1
unbuttoned 0
3d 1
holocene 1
happy 1
mutable 1
synergetic 1
swiss 1
indirect 1
volatile 1
woolly 0
experienced 0