In [18]:
import numpy as np

from scipy.stats import pearsonr
from scipy.stats import spearmanr
from sklearn.metrics.pairwise import cosine_similarity
from scipy import spatial
from scipy import stats

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.autograd as autograd
import torch.optim as optim
import torch.nn.functional as F
torch.manual_seed(1)

import time
from datetime import datetime

from collections import defaultdict
from collections import Counter

from random import randint
import pickle

import warnings
warnings.filterwarnings('error')

from sklearn.metrics.pairwise import cosine_similarity
from scipy import spatial

import string
puncs = set(string.punctuation)

In [19]:
label = '_hansard_en'
#label = '_hansards_fr'

with open('tokens2id' +label+'.pickle', 'rb') as f:
    tokens2id = pickle.load(f)
with open('id2tokens' +label+'.pickle', 'rb') as f:
    id2tokens = pickle.load(f)

In [20]:
with open('wordvecs_skipgram.pickle', 'rb') as file:
    w_embeds = pickle.load(file)

In [21]:
w_embeds[1].shape

(100,)

In [22]:
with open('lst/lst.gold.candidates') as file:
    whole_gold = file.read().splitlines() 
    
    gold_cands_l = [l.split('::') for l in whole_gold]

In [23]:
gold_cands = defaultdict(dict)

for g in gold_cands_l:
    word_pos = g[0]
    
    word, postag = word_pos.split('.')
    
    candidates = g[1].split(';')
    
    gold_cands[word] = {'postag':postag, 'candidates':candidates}

In [24]:
gold_cands['bright']

{'candidates': ['alight',
  'skilled',
  'deep',
  'good',
  'sharp',
  'luminous',
  'colourful',
  'optimisitc',
  'vivid',
  'capable',
  'positive',
  'hopeful',
  'shining',
  'intelligent',
  'smart',
  'clever',
  'motivated',
  'vibrant',
  'up-and-coming',
  'well-lit',
  'gleam',
  'most talented',
  'great',
  'talented',
  'most able',
  'brilliant',
  'light',
  'clear',
  'gifted',
  'promising'],
 'postag': 'a'}

In [25]:
with open('lst/lst_test.preprocessed') as file:
    
    lst_sentences = [l.split() for l in file.readlines()]

In [78]:
lst_test_preprocessed = defaultdict(dict)

for l in lst_sentences:
    word, postag = l[0].split('.')
    sentence_no = int(l[1])
    word_position = int(l[2])
    sentence_tokens = l[3:]
    
    processed_tokens = []
    
    #remove punctuations TODO position might change
    punc_inds = []
    for s in range(len(sentence_tokens)):
        
        if sentence_tokens[s] not in puncs:
            processed_tokens.append(sentence_tokens[s])
        else:
            punc_inds.append(s)
                
    temp_word_position = word_position
    
    for d in punc_inds:
        if d < temp_word_position:
            word_position -= 1
        
    lst_test_preprocessed[(word,sentence_no)] = {'postag':postag, 'word_position':word_position,'sentence':processed_tokens}

In [79]:
lst_test_preprocessed[('bull',589)]

{'postag': 'n',
 'sentence': ['some',
  'darting',
  'terms',
  'originated',
  'outside',
  'the',
  'sport',
  'such',
  'as',
  'hat',
  'trick',
  'meaning',
  'all',
  'three',
  'darts',
  'of',
  'a',
  'player',
  "'s",
  'round',
  'landing',
  'in',
  'the',
  'bull'],
 'word_position': 23}

In [80]:
# base word embed
#word+context combination
#levy lexical subs paper has the formulas

#add
# mul
# baladd
# balmul
# kl
#gold standard is lemmatized
#noun verb adverb adj
#skipgram context insensitive but we can make it sensitive via using context embeds

In [81]:
class SkipGram(nn.Module):
    def __init__(self, vocabulary_size, embedding_dim):
        super(SkipGram, self).__init__()
        
        #sparse embeddings for word and context vectors
        self.w_embeddings = nn.Embedding(vocabulary_size, embedding_dim) #, sparse = True
        self.lin1 = nn.Linear(embedding_dim, vocabulary_size, bias = False)
           
    def forward(self, pos_words):
        
        out = self.w_embeddings(pos_words)
        
        out = self.lin1(out)
        
        final_out = F.log_softmax(out, dim = 0)
        
        return final_out

In [82]:
class SkipGramNeg(nn.Module):
    def __init__(self, vocabulary_size, embedding_dim):
        super(SkipGramNeg, self).__init__()
        
        #sparse embeddings for word and context vectors
        
        self.w_embeddings = nn.Embedding(vocabulary_size, embedding_dim, sparse = True)
        self.c_embeddings = nn.Embedding(vocabulary_size, embedding_dim, sparse = True)
        
        # initialization of embeds
        # https://adoni.github.io/2017/11/08/word2vec-pytorch/

        initrange = 0.5 / embedding_dim
        self.w_embeddings.weight.data.uniform_(-initrange, initrange)
        self.c_embeddings.weight.data.uniform_(-0, 0)

    def forward(self, pos_words, pos_conts, neg_conts):
        
        #Loss calculation, Levy&Goldberg word2vec Explained
        #https://adoni.github.io/2017/11/08/word2vec-pytorch/
        
        w_out = self.w_embeddings(pos_words)
        
        pos_out = self.c_embeddings(pos_conts)
        neg_out = self.c_embeddings(neg_conts)
        
#         print(neg_conts)
#         print(pos_conts)
#         print(pos_words)
               
        pos_val = torch.mul(w_out, pos_out).squeeze()
        pos_val = torch.sum(pos_val, dim = 1)
        pos_loss = F.logsigmoid(pos_val)
        
        neg_val = torch.bmm(neg_out, w_out.unsqueeze(2)).squeeze()
        neg_val = torch.sum(neg_val, dim = 1)
        neg_loss = F.logsigmoid(-neg_val)
        
        final_out = pos_loss + neg_loss.sum()
        final_out = -final_out.sum()/len(pos_words) #neg and mean
         
        return final_out
    

In [83]:
with open('wordvecs_skipgram_word.pickle', 'rb') as file:
    w_embeds = pickle.load(file)
    
with open('wordvecs_skipgram_context.pickle', 'rb') as file:
    c_embeds = pickle.load(file)

In [87]:
# lst_test_nouns = defaultdict(dict)
# lst_test_verbs = defaultdict(dict)
# lst_test_adjectives = defaultdict(dict)

# with open('tokens2id.pickle') as f:
#     tokens2id = pickle.load(f)
# with open('id2tokens.pickle') as f:
#     id2tokens = pickle.load(f)
    
window_size = 5

# #embedding parameters
# weights = model.w_embeddings.weight.detach()

results_to_write = []

for ls in lst_test_preprocessed:
    
    #GET WORD AND SENTENCE RELATED INFO
    item = lst_test_preprocessed[ls]
    central_word = ls[0]
    sentence_id = ls[1]
    
    postag = item['postag']
    word_position = item['word_position']
    sentence = item['sentence']
    
    # GET CONTEXT WORDS HERE
    context_words = get_context_window(sentence, word_position, window_size)
    
    context_ids = []
    context_vectors = []
    
    for cw in context_words:
        ctx_id = tokens2id[cw]
        context_ids.append(ctx_id)
        context_vectors.append(c_embeds[ctx_id])
        
    #get the embedding of the word
    embed1 = w_embeds[tokens2id[central_word]]
    
    #get the list of candidate gold annotations
    cands = gold_cands[central_word]['candidates']
    
    cos_sims = defaultdict(float)
    add_heur = defaultdict(float)
    mul_heur = defaultdict(float)
    
    for c in cands:
        
        #for each candidate find the cosine similarity between target and central
        id_for_cand = tokens2id[c]
        
        embed2 = w_embeds[id_for_cand]
    
        cosine_similarity = 1 - spatial.distance.cosine(embed1, embed2)
        cos_sims[c] = cosine_similarity
        
        #USING FORMULAS IN LEVY - LEXICAL SUBS.
#         add_res = 
#         add_heur.append(add_res)
        
#         mul_res = 
#         mul_heur.append(mul_res)

    sorted_d = sorted(cos_sims.items(), key=lambda x: x[1], reverse=True)
    
    ranked_res = out_ranked_results(central_word, postag, sentence_id, sorted_d)
    
    results_to_write.append(ranked_res)
    
file_name = 'skipgram' + label + '.txt'

with open (file_name, 'w') as f:
 
    for r in range(len(results_to_write)):

        if r == len(results_to_write) - 1:
            f.write(results_to_write[r])
        else:
            f.write(results_to_write[r])
            f.write('\n')

28
43 ['on', 'sunday', 'at', 'craven', 'cottage', 'jose', 'mourinho', 'and', 'his', 'all', 'stars', 'exhibited', 'all', 'of', 'the', 'above', 'symptoms', 'and', 'they', 'were', 'made', 'to', 'pay', 'the', 'price', 'by', 'a', 'fulham', 'side', 'that', 'had', 'in', 'previous', 'weeks', 'woken', 'up', 'after', 'matches', 'with', 'their', 'heads', 'kicked', 'in']
('part', 0.26895016431808472)
('responsibility', 0.18051937222480774)
('instead', 0.086155511438846588)
('hand', 0.074016496539115906)
('you', 0.058274645358324051)
('view', 0.014109814539551735)
('against', -0.010215929709374905)
('team', -0.034613519906997681)
('aspect', -0.063162311911582947)
('bank', -0.068525224924087524)
('perspective', -0.07282053679227829)
('divide', -0.081049293279647827)
('surface', -0.081049293279647827)
('conversely', -0.081049293279647827)
('other hand', -0.081049293279647827)
('dividing line', -0.081049293279647827)
('flank', -0.081049293279647827)
('for us', -0.081049293279647827)
('ally', -0.081049

('cleanse', -0.011679803021252155)
('wipe', -0.011679803021252155)
('spruce up', -0.011679803021252155)
('purify', -0.011679803021252155)
('remedy', -0.011679803021252155)
('purge', -0.011679803021252155)
('make a fortune', -0.011679803021252155)
('clear', -0.012867450714111328)
('correct', -0.045894153416156769)
('restore', -0.071009032428264618)
('fix', -0.15896563231945038)
16
29 ['on', 'wednesday', 'democrats', 'across', 'america', 'joined', 'together', 'to', 'restore', 'truth', 'and', 'trust', 'to', 'government', 'and', 'to', 'clean', 'up', 'the', 'republican', 'culture', 'of', 'corruption', 'that', 'has', 'pervaded', 'our', 'nations', 'capital']
('win', 0.14097504317760468)
('complete', 0.055416341871023178)
('remove', 0.034846484661102295)
('rectify', -0.011679803021252155)
('tidy', -0.011679803021252155)
('profit greatly', -0.011679803021252155)
('wash up', -0.011679803021252155)
('wash', -0.011679803021252155)
('tidy up', -0.011679803021252155)
('scrape', -0.011679803021252155

('force', 0.030820198357105255)
('heart', 0.020607052370905876)
('energy', -0.047939807176589966)
('centre', -0.078895613551139832)
('sound', -0.12989270687103271)
12
13 ['she', 'drank', 'a', 'great', 'deal', 'of', 'coffee', 'and', 'had', 'a', 'rapid', 'thin', 'pulse']
('heartrate', 1.0)
('emanation', 1.0)
('lifeblood', 1.0)
('heartbeat rate', 1.0)
('vibration', 1.0)
('beep', 1.0)
('throb', 1.0)
('rhythm', 1.0)
('reflex', 1.0)
('beat', 1.0)
('wave', 1.0)
('heartbeat', 1.0)
('stimulus', 1.0)
('legume', 1.0)
('hearbeat', 1.0)
('edible seed', 1.0)
('throbbing', 1.0)
('force', 0.030820198357105255)
('heart', 0.020607052370905876)
('energy', -0.047939807176589966)
('centre', -0.078895613551139832)
('sound', -0.12989270687103271)
7
19 ['it', 'was', 'amazing', 'how', 'you', 'managed', 'to', 'put', 'together', 'so', 'many', 'facts', 'and', 'present', 'in', 'such', 'a', 'hilarious', 'manner']
('remove', 0.20783215761184692)
('send', 0.17305231094360352)
('leave', 0.13050149381160736)
('give', 0

('female child', 1.0)
('gal', 1.0)
('young woman', 1.0)
('female', 1.0)
('she', 0.11418427526950836)
('miss', 0.099982380867004395)
('woman', 0.032235376536846161)
('child', -0.059540934860706329)
4
28 ['media', 'reports', 'stating', 'that', 'girls', 'are', 'now', 'topping', 'more', 'subjects', 'than', 'boys', 'and', 'that', 'boys', 'are', 'being', 'left', 'behind', 'are', 'now', 'replicated', 'in', 'most', 'australian', 'states', 'each', 'january']
('young female', 1.0)
('lass', 1.0)
('female children', 1.0)
('female child', 1.0)
('gal', 1.0)
('young woman', 1.0)
('female', 1.0)
('she', 0.11418427526950836)
('miss', 0.099982380867004395)
('woman', 0.032235376536846161)
('child', -0.059540934860706329)
4
10 ['he', 'responded', 'good', 'god', 'girl', 'ca', "n't", 'you', 'understand', 'english']
('young female', 1.0)
('lass', 1.0)
('female children', 1.0)
('female child', 1.0)
('gal', 1.0)
('young woman', 1.0)
('female', 1.0)
('she', 0.11418427526950836)
('miss', 0.099982380867004395)
('

6
7 ['trying', 'to', 'take', 'away', 'my', 'good', 'mood']
('demeanour', 1.0)
('atmosphere', 1.0)
('tone', 1.0)
('humour', 1.0)
('vibe', 1.0)
('timbre', 1.0)
('feel like writing', 1.0)
('outlook', 1.0)
('modality', 1.0)
('ambience', 1.0)
('frame of mind', 1.0)
('mode', 1.0)
('disposition', 1.0)
('temperament', 1.0)
('state of mind', 1.0)
('temper', 1.0)
('mindset', 1.0)
('feeling', 0.038100764155387878)
('attitude', 0.037319798022508621)
('manner', 0.021678119897842407)
('form', -0.0392870232462883)
29
30 ['3.', 'there', 'are', 'a', 'number', 'of', 'verb-preposition', 'combinations', 'which', 'are', 'formally', 'like', 'add', 'on', 'but', 'have', 'the', 'meaning', 'of', 'continuing', 'or', 'resuming', 'an', 'action', 'when', 'used', 'in', 'the', 'imperative', 'mood']
('demeanour', 1.0)
('atmosphere', 1.0)
('tone', 1.0)
('humour', 1.0)
('vibe', 1.0)
('timbre', 1.0)
('feel like writing', 1.0)
('outlook', 1.0)
('modality', 1.0)
('ambience', 1.0)
('frame of mind', 1.0)
('mode', 1.0)
('disp

('allotment', 0.025469675660133362)
('instance', -0.011918889358639717)
('age', -0.052226077765226364)
('space', -0.05995246022939682)
('point', -0.13302420079708099)
('period', -0.15780431032180786)
25
49 ['one', 'ring', 'to', 'rule', 'them', 'all', 'one', 'ring', 'to', 'find', 'them', 'one', 'ring', 'to', 'bring', 'them', 'all', 'and', 'in', 'the', 'darkness', 'bind', 'them', 'in', 'ancient', 'times', 'the', 'rings', 'of', 'power', 'were', 'crafted', 'by', 'the', 'elven-smiths', 'and', 'sauron', 'the', 'dark', 'lord', 'forged', 'the', 'one', 'ring', 'to', 'rule', 'all', 'the', 'others']
('allocation', 0.15315552055835724)
('chance', 0.068834654986858368)
('occasion', 0.046516075730323792)
('opportunity', 0.046069256961345673)
('day', 0.037222061306238174)
('moment', 0.027788862586021423)
('instant', 0.025469675660133362)
('the Victorian period', 0.025469675660133362)
('interval', 0.025469675660133362)
('epoch', 0.025469675660133362)
('juncture', 0.025469675660133362)
('occasionally',

('make', -0.080618098378181458)
('bring', -0.10946998000144958)
('claim', -0.16350261867046356)
6
11 ['we', 'also', 'illustrate', 'the', 'themes', 'by', 'drawing', 'on', 'the', 'contributors', 'stories']
('sketch', 0.15906873345375061)
('exploit', 0.15906873345375061)
('lead to', 0.15906873345375061)
('extract', 0.15906873345375061)
('select', 0.15906873345375061)
('portray', 0.15906873345375061)
('depict', 0.15906873345375061)
('attract', 0.15906873345375061)
('paint', 0.15906873345375061)
('tie', 0.15906873345375061)
('make use of', 0.15906873345375061)
('pit', 0.15906873345375061)
('acquire', 0.15906873345375061)
('take from', 0.15906873345375061)
('drag', 0.15906873345375061)
('derive', 0.15906873345375061)
('pull', 0.15906873345375061)
('earn', 0.15906873345375061)
('gather', 0.15906873345375061)
('haul', 0.15906873345375061)
('summon', 0.15906873345375061)
('employ', 0.15906873345375061)
('obtain', 0.071350030601024628)
('rely', 0.067197948694229126)
('get', 0.052851196378469467)

('less serious', -0.20189717411994934)
('luminous', -0.20189717411994934)
('pale', -0.20189717411994934)
('casual', -0.20189717411994934)
('restricted', -0.20189717411994934)
('faint', -0.20189717411994934)
('trivial', -0.20189717411994934)
('unheavy', -0.20189717411994934)
('frothy', -0.20189717411994934)
('upbeat', -0.20189717411994934)
('minor', -0.20189717411994934)
('easily digested', -0.20189717411994934)
('slight', -0.20189717411994934)
('inconsiderable', -0.20189717411994934)
('of little weight', -0.20189717411994934)
('undemanding', -0.20189717411994934)
('facile', -0.20189717411994934)
('pale ', -0.20189717411994934)
('small amounts of', -0.20189717411994934)
('entertaining', -0.20189717411994934)
('soft', -0.20189717411994934)
3
14 ['on', 'to', 'a', 'lighter', 'note', 'now', 'i', 'started', 'trying', 'to', 'build', 'a', 'store', 'recently']
('small', 0.12048936635255814)
('easy', 0.066128812730312347)
('fair', -0.0099145900458097458)
('simple', -0.11760240793228149)
('bright

('bear', 0.048094376921653748)
('have', 0.04494466632604599)
('know', 0.017106063663959503)
('acquire', -0.0073131946846842766)
('tolerate', -0.0073131946846842766)
('put up with', -0.0073131946846842766)
('sustain', -0.0073131946846842766)
('endure', -0.0073131946846842766)
('undergo', -0.0073131946846842766)
('be subjected to', -0.0073131946846842766)
('encounter', -0.0073131946846842766)
('accept', -0.03409905731678009)
('feel', -0.059695456176996231)
('experience', -0.09015519917011261)
10
26 ['opposition', 'political', 'parties', 'like', 'the', 'national', 'league', 'for', 'democracy', 'have', 'suffered', 'even', 'greater', 'restrictions', 'on', 'their', 'activities', 'and', 'any', 'signs', 'of', 'dissent', 'have', 'been', 'ruthlessly', 'crushed']
('bear', 0.048094376921653748)
('have', 0.04494466632604599)
('know', 0.017106063663959503)
('acquire', -0.0073131946846842766)
('tolerate', -0.0073131946846842766)
('put up with', -0.0073131946846842766)
('sustain', -0.00731319468468427

('erupt on to', 1.0)
('laden', 1.0)
('rupture', 1.0)
('erupt with laughter', 1.0)
('leap', 1.0)
('descend', 1.0)
('bristle', 1.0)
('split', 1.0)
('fracture', 1.0)
('deflate', 1.0)
('bounce', 1.0)
('barge', 1.0)
('full to burst', 1.0)
('arrive on', 1.0)
('overflow', 1.0)
('explode on to', 1.0)
('erupt on', 1.0)
('rush', 1.0)
('enthusiastic', 1.0)
('explode', 1.0)
('erupt', 1.0)
('burgeon', 1.0)
('push', 1.0)
('break', -0.0014567868784070015)
('cover', -0.025101637467741966)
('run', -0.058532398194074631)
('surprise', -0.12085109204053879)
('end', -0.167022705078125)
10
18 ['one', 'evening', 'while', 'he', 'and', 'jan', 'were', 'talking', 'the', 'children', 'burst', 'into', 'the', 'living', 'room', 'arguing', 'about', 'something']
('explode on', 1.0)
('bound', 1.0)
('pop', 1.0)
('erupt on to', 1.0)
('laden', 1.0)
('rupture', 1.0)
('erupt with laughter', 1.0)
('leap', 1.0)
('descend', 1.0)
('bristle', 1.0)
('split', 1.0)
('fracture', 1.0)
('deflate', 1.0)
('bounce', 1.0)
('barge', 1.0)
('

('community', 0.0036796678323298693)
('area', -0.018215598538517952)
('resource', -0.022396037355065346)
('subject', -0.040429208427667618)
('ground', -0.062321003526449203)
('section', -0.062890604138374329)
('region', -0.091145843267440796)
20
21 ['decimal', 'precision', 'will', 'in', 'turn', 'determine', 'the', 'amount', 'of', 'disk', 'space', 'that', 'is', 'allocated', 'to', 'the', 'data', 'stored', 'in', 'that', 'field']
('activity', 0.14587622880935669)
('domain', 0.060169924050569534)
('speciality', 0.060169924050569534)
('sphere', 0.060169924050569534)
('investigative', 0.060169924050569534)
('pitch', 0.060169924050569534)
('battleground', 0.060169924050569534)
('discipline', 0.060169924050569534)
('area of operation', 0.060169924050569534)
('geographical', 0.060169924050569534)
('field of vision', 0.060169924050569534)
('acreage', 0.060169924050569534)
('licence', 0.060169924050569534)
('on the streets', 0.060169924050569534)
('holder', 0.060169924050569534)
('box', 0.06016992

('thundering', 1.0)
('shrill', 1.0)
('blaring', 1.0)
('raucous', 1.0)
('resonant', 1.0)
('strident', 1.0)
('conspicuous', 1.0)
('noisy', 1.0)
('forceful', 1.0)
('rowdy', 1.0)
('powerful', 1.0)
('deafening', 1.0)
('strong', 0.020003164187073708)
('heavy', 0.010278486646711826)
('clear', -0.06428065150976181)
('big', -0.078772254288196564)
6
16 ['she', 'interrupts', 'his', 'sleep', 'with', 'her', 'loud', 'noises', 'ruins', 'his', 'breakfast', 'and', 'loses', 'his', 'magnifying', 'glass']
('imposing', 1.0)
('obtrusive', 1.0)
('disturbing', 1.0)
('amplified', 1.0)
('booming', 1.0)
('vociferous', 1.0)
('thundering', 1.0)
('shrill', 1.0)
('blaring', 1.0)
('raucous', 1.0)
('resonant', 1.0)
('strident', 1.0)
('conspicuous', 1.0)
('noisy', 1.0)
('forceful', 1.0)
('rowdy', 1.0)
('powerful', 1.0)
('deafening', 1.0)
('strong', 0.020003164187073708)
('heavy', 0.010278486646711826)
('clear', -0.06428065150976181)
('big', -0.078772254288196564)
4
21 ['he', 'said', 'in', 'a', 'loud', 'voice', 'fear', 

('stop at nothing', 1.0)
('shift', 1.0)
('haul', 1.0)
('try everything', 1.0)
('wrench', 1.0)
('draw ', 1.0)
('push', 1.0)
('yank', 1.0)
('remove', 0.21082836389541626)
('draw', 0.15906873345375061)
('read', -0.0018950710073113441)
('stop', -0.0067920130677521229)
('withdraw', -0.029741410166025162)
('park', -0.033374123275279999)
('move', -0.047079902142286301)
('request', -0.17846497893333435)
3
18 ['you', 'have', 'to', 'pull', 'instead...', 'pull', 'targeted', 'visitors', 'into', 'your', 'site', 'presell', 'them...', 'then', 'and', 'only', 'then', 'sell']
('lure', 1.0)
('rip', 1.0)
('heave', 1.0)
('manipulate', 1.0)
('do everything it takes', 1.0)
('stretch', 1.0)
('attract', 1.0)
('tow', 1.0)
('get hold of', 1.0)
('stop the car', 1.0)
('look at', 1.0)
('stop at the side of the road', 1.0)
('pluck', 1.0)
('drag', 1.0)
('squeeze', 1.0)
('tug', 1.0)
('extract', 1.0)
('go to any length', 1.0)
('stop at nothing', 1.0)
('shift', 1.0)
('haul', 1.0)
('try everything', 1.0)
('wrench', 1.0)


('solid', 1.0)
('gradual', 1.0)
('unchanging', 1.0)
('continuous', 1.0)
('constant ', 1.0)
('unvarying', 1.0)
('persistent', 1.0)
('unshaking', 1.0)
('unremitting', 1.0)
('unfluctuating', 1.0)
('stable', 1.0)
('uninterrupted', 1.0)
('unbroken', 1.0)
('sure', 0.091924577951431274)
('fixed', 0.075708732008934021)
('regular', -0.052563954144716263)
('firm', -0.11571306735277176)
23
46 ['draw', 'on', 'a', 'large', 'piece', 'of', 'paper', '2', 'to', '3', 'times', 'the', 'final', 'size', 'to', 'make', 'it', 'easier', 'you', 'wo', "n't", 'need', 'a', 'steady', 'hand.after', 'you', 'settle', 'on', 'the', 'content', 'and', 'general', 'layout', '*then*', 'do', 'some', 'research', 'and', 'figure', 'out', 'how', 'to', 'make', 'the', 'final', 'document']
('unwavering', 1.0)
('constant', 1.0)
('solid', 1.0)
('gradual', 1.0)
('unchanging', 1.0)
('continuous', 1.0)
('constant ', 1.0)
('unvarying', 1.0)
('persistent', 1.0)
('unshaking', 1.0)
('unremitting', 1.0)
('unfluctuating', 1.0)
('stable', 1.0)
(

('although', -0.1097150444984436)
3
12 ['we', 'have', 'not', 'yet', 'found', 'any', 'mud/moo', 'environments', 'that', 'handle', 'nl', 'processing']
('however', 0.11821523308753967)
('before', 0.10753505676984787)
('thus far', 0.080881088972091675)
('in spite of that', 0.080881088972091675)
('lately', 0.080881088972091675)
('up to now', 0.080881088972091675)
('hitherto', 0.080881088972091675)
('despite this', 0.080881088972091675)
('to date', 0.080881088972091675)
('so far', 0.080881088972091675)
('not arrived', 0.080881088972091675)
('until now', 0.080881088972091675)
('are still to happen', 0.080881088972091675)
('still', 0.020178152248263359)
('but', 0.01801244355738163)
('already', 0.010460376739501953)
('nevertheless', -0.053305286914110184)
('although', -0.1097150444984436)
0
22 ['yet', 'the', 'result', 'is', 'an', 'odd', 'sentence', 'that', 'no', 'one', 'would', 'say', 'a', 'john', 'saw', 'mary', 'with', 'her', 'best', 'friend', "'s", 'husband']
('however', 0.11821523308753967)


('roll', 0.069642901420593262)
('treatise', 0.069642901420593262)
('article', 0.013785813935101032)
('study', -0.12833879888057709)
('presentation', -0.1625702828168869)
1
29 ['this', 'paper', 'builds', 'on', 'the', 'work', 'of', 'neil', 'a.', 'manson', 'in', 'order', 'to', 'show', 'that', 'the', 'precautionary', 'principle', 'in', 'all', 'of', 'its', 'forms', 'is', 'fraught', 'with', 'vagueness', 'and', 'ambiguity']
('newspaper', 0.12457682192325592)
('report', 0.10419204831123352)
('document', 0.084407031536102295)
('essay', 0.069642901420593262)
('toilet tissue', 0.069642901420593262)
('publication', 0.069642901420593262)
('manuscript', 0.069642901420593262)
('journal', 0.069642901420593262)
('bathroom tissue', 0.069642901420593262)
('papyrus', 0.069642901420593262)
('thesis', 0.069642901420593262)
('stationery', 0.069642901420593262)
('roll', 0.069642901420593262)
('treatise', 0.069642901420593262)
('article', 0.013785813935101032)
('study', -0.12833879888057709)
('presentation', -

In [61]:
def get_context_window(sentence, central_word_index, window_size):
    
    context = []
    print(central_word_index)
    print(len(sentence), sentence)
    word = sentence[central_word_index]
    
    for w in range(1,window_size+1):
        
        left_cont = central_word_index - w
        right_cont = central_word_index + w
        
        #find the window words to the left and right
        #add as pair if they are inside sentence boundaries
        
        if left_cont > -1:
            context.append(sentence[left_cont])
            
        if right_cont < len(sentence):
            context.append(sentence[right_cont])
            
    return context

In [57]:
def out_ranked_results(word, postag, sentence_id, sorted_d):
    
    word_pos = str(word+'.'+postag)
    res = 'RANKED\t' + word_pos + ' '+ str(sentence_id)
    
    for d in sorted_d:
        print(d)
        res += '\t' + d[0]+' '+ str(d[1])
    
    return res

In [41]:
print(len(model.w_embeddings.weight))

79
