In [1]:
import json
import numpy as np
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

from load_glove import Glove
from stanfordcorenlp import StanfordCoreNLP

In [2]:
# constants
train_file_raw = 'samples50000_0206194500.train.txt'
dev_file_raw = 'samples50000_0206194500.dev.txt'
test_file_raw = 'samples50000_0206194500.test.txt'
dataset_dir = './dataset_filtered'
train_file = '{}/train.json'.format(dataset_dir)
dev_file = '{}/dev.json'.format(dataset_dir)
test_file = '{}/test.json'.format(dataset_dir)
train_pos_file = '{}/train_pos.json'.format(dataset_dir)
dev_pos_file = '{}/dev_pos.json'.format(dataset_dir)
test_pos_file = '{}/test_pos.json'.format(dataset_dir)
train_dep_file = '{}/train_dep.json'.format(dataset_dir)
dev_dep_file = '{}/dev_dep.json'.format(dataset_dir)
test_dep_file = '{}/test_dep.json'.format(dataset_dir)

if not os.path.isdir(dataset_dir):
    os.mkdir(dataset_dir)

In [3]:
# ======================== initial dataset processing =====================

In [4]:
# load file
def ReadDataFile(filename):
    sentences = list()
    len_sentences = list()
    with open(filename, 'r', encoding='utf-8') as textfile:
        for line in textfile:
            line = line.strip()
            tokens = line.split()
            sentences.append(line)
            len_sentences.append(len(tokens))
    return sentences, len_sentences

In [5]:
# std and avg of sentence length in total dataset
sentences_tr, len_sentences_tr = ReadDataFile(train_file_raw)
sentences_dv, len_sentences_dv = ReadDataFile(dev_file_raw)
sentences_ts, len_sentences_ts = ReadDataFile(test_file_raw)

len_sentences_tt = len_sentences_tr + len_sentences_dv + len_sentences_ts
mean_total = np.mean(len_sentences_tt)
std_total = np.std(len_sentences_tt)

print(mean_total, std_total)

22.514047250226433 14.624483763629705


In [6]:
# filter sentences 
# take the sentences of length within range [mean - std, mean + std]
def FilterSentences(sentences, len_sentences, mean, std):
    filtered_sentences = list()
    for i in range(len(sentences)):
        if len_sentences[i] >= mean - std and len_sentences[i] <= mean + std:
            filtered_sentences.append(sentences[i])
    return filtered_sentences, len(filtered_sentences)

In [7]:
# num of sentences in raw dataset vs. num of sentences after [mean - std, mean + std] filter
sentences_tr, num_sentences_tr_flt = FilterSentences(sentences_tr, len_sentences_tr, mean_total, std_total)
sentences_dv, num_sentences_dv_flt = FilterSentences(sentences_dv, len_sentences_dv, mean_total, std_total)
sentences_ts, num_sentences_ts_flt = FilterSentences(sentences_ts, len_sentences_ts, mean_total, std_total)
print(len(len_sentences_tr), num_sentences_tr_flt)
print(len(len_sentences_dv), num_sentences_dv_flt)
print(len(len_sentences_ts), num_sentences_ts_flt)

with open(train_file, 'w', encoding='utf-8') as textfile:
    json.dump(sentences_tr, textfile, indent=2)

with open(dev_file, 'w', encoding='utf-8') as textfile:
    json.dump(sentences_dv, textfile, indent=2)

with open(test_file, 'w', encoding='utf-8') as textfile:
    json.dump(sentences_ts, textfile, indent=2)

277329 214804
36934 28148
40148 30134


In [8]:
# parsing with dependency parser
def DependencyParsing(sentences, num_sentences, out_pos_file_basename, out_dep_file_basename):
    file_index = 0
    current_min = 0
    sent_per_file = 10000
    current_max = min(sent_per_file, num_sentences - sent_per_file * file_index)
    
    sentences_with_pos = dict(zip(range(current_min, current_max), [[] for i in range(current_min, current_max)]))
    sentences_with_dep = dict(zip(range(current_min, current_max), [[] for i in range(current_min, current_max)]))
    
    stanfordnlp = StanfordCoreNLP('./stanford-corenlp-3.9.2') # initialize corenlp server in the background 
    properties = {'annotators': 'depparse', 'pipelineLanguage': 'en', 'outputFormat': 'json'}
    
    logging.info('annotating {} set with stanford parser, total sentences to annotate: {}...'.format(out_pos_file_basename[14:-4], num_sentences))
    
    for i in range(len(sentences)):
        annotated = stanfordnlp.annotate(sentences[i], properties=properties)
        
        annotated = json.loads(annotated)
        
        dep_info = annotated['sentences'][0]['enhancedPlusPlusDependencies']
        pos_info = annotated['sentences'][0]['tokens']
        
        sentences_with_dep[i] = dep_info
        
        
        for token in pos_info:
            sentences_with_pos[i].append({'index':token['index'], 'word':token['word'], 'pos':token['pos']})
        
        if (i + 1) % sent_per_file == 0 or i == len(sentences) - 1: # output a file after every so many sentences 
            logging.info('\t{}/{}: {}% parsed'.format((i+1), num_sentences, round((i+1)/num_sentences*100, 2)))

            out_pos_file = '{}.{}.json'.format(out_pos_file_basename, file_index)
            out_dep_file = '{}.{}.json'.format(out_dep_file_basename, file_index)
            
            with open(out_pos_file, 'w', encoding='utf-8') as textfile:
                json.dump(sentences_with_pos, textfile)
            
            with open(out_dep_file, 'w', encoding='utf-8') as textfile:
                json.dump(sentences_with_dep, textfile)
            
            # reset filename, range for the next batch  
            file_index += 1
            current_min = sent_per_file * file_index
            current_max = min(sent_per_file, num_sentences - sent_per_file * file_index) + sent_per_file * file_index
            
            sentences_with_pos = dict(zip(range(current_min, current_max), [[] for i in range(current_min, current_max)]))
            sentences_with_dep = dict(zip(range(current_min, current_max), [[] for i in range(current_min, current_max)]))
            
    stanfordnlp.close()

In [9]:
#### call dep parsing elsewhere

In [10]:
# ============================ Glove Part ==========================

In [11]:
glove_model = Glove()
glove_model.load_model(normalize=True)

Loading pretrained Glove vectors from file ./glove/glove.840B.300d.txt
  --9.11%  loaded.
  --18.21%  loaded.
  --27.32%  loaded.
  --36.43%  loaded.
  --45.54%  loaded.
  --54.64%  loaded.
  --63.75%  loaded.
  --72.86%  loaded.
  --81.97%  loaded.
  --91.07%  loaded.
Finished loading Glove model. 2196017 vectors loaded


In [12]:
# iterate over sentences (keep the index), filter out the sentences based on if token is in glove embedding or not
def FilterSentencesGlove(sentences, num_sentences, split_name):
    idx_sent_to_remove = list()
    for i in range(num_sentences):
        tokens = sentences[i].split()
        for w in tokens:
            if w == '-RCB-':
                w = ')'
            elif w == '-LCB-':
                w = '('
                
            if glove_model.get_vector(w) is None: 
                idx_sent_to_remove.append(i)
                break # if any of the token returns None, skip the sentence
    
    idx_sent_filtered = list(set(range(num_sentences)) - set(idx_sent_to_remove))
    
    with open('{}/{}_idx_filtered.txt'.format(dataset_dir, split_name), 'w', encoding='utf-8') as textfile:
        json.dump(idx_sent_filtered, textfile)
        
    return idx_sent_filtered

In [13]:
idx_sent_filtered_dv = FilterSentencesGlove(sentences_dv, num_sentences_dv_flt, 'dev')
idx_sent_filtered_ts = FilterSentencesGlove(sentences_ts, num_sentences_ts_flt, 'test')
idx_sent_filtered_tr = FilterSentencesGlove(sentences_tr, num_sentences_tr_flt, 'train')

Input token <Ewauna> is not in the model. Will return None type vector
Input token <heatsource> is not in the model. Will return None type vector
Input token <Brahdamba> is not in the model. Will return None type vector
Input token <Martanda> is not in the model. Will return None type vector
Input token <Martanda> is not in the model. Will return None type vector
Input token <Martanda> is not in the model. Will return None type vector
Input token <Diwan-in-Council> is not in the model. Will return None type vector
Input token <Martanda> is not in the model. Will return None type vector
Input token <Martanda> is not in the model. Will return None type vector
Input token <Martanda> is not in the model. Will return None type vector
Input token <Martanda> is not in the model. Will return None type vector
Input token <Endynomena> is not in the model. Will return None type vector
Input token <nine-fingered> is not in the model. Will return None type vector
Input token <73,744> is not in the 

Input token <Kundara> is not in the model. Will return None type vector
Input token <Alappad> is not in the model. Will return None type vector
Input token <Sakthikulangara> is not in the model. Will return None type vector
Input token <Mannathippara> is not in the model. Will return None type vector
Input token <Sasthamcotta> is not in the model. Will return None type vector
Input token <Katchai> is not in the model. Will return None type vector
Input token <Mantharan> is not in the model. Will return None type vector
Input token <Kurunkozhiyur> is not in the model. Will return None type vector
Input token <swimfins> is not in the model. Will return None type vector
Input token <swimfins> is not in the model. Will return None type vector
Input token <FDU2> is not in the model. Will return None type vector
Input token <FDU3> is not in the model. Will return None type vector
Input token <UWFP> is not in the model. Will return None type vector
Input token <KILOVOLT> is not in the model. 

Input token <Pomabamba> is not in the model. Will return None type vector
Input token <Fânețelor> is not in the model. Will return None type vector
Input token <al-Turkmani> is not in the model. Will return None type vector
Input token <al-Qurashi> is not in the model. Will return None type vector
Input token <AQI/ISI> is not in the model. Will return None type vector
Input token <massacre.For> is not in the model. Will return None type vector
Input token <AQI/ISI> is not in the model. Will return None type vector
Input token <Hanein> is not in the model. Will return None type vector
Input token <Batawi> is not in the model. Will return None type vector
Input token <led-government> is not in the model. Will return None type vector
Input token <l'Ahl> is not in the model. Will return None type vector
Input token <#FFC318> is not in the model. Will return None type vector
Input token <#FFC318> is not in the model. Will return None type vector
Input token <#FFC318> is not in the model. Wi

Input token <Karatgurk> is not in the model. Will return None type vector
Input token <Djurt-djurt> is not in the model. Will return None type vector
Input token <Karatgurk> is not in the model. Will return None type vector
Input token <second-brightest> is not in the model. Will return None type vector
Input token <Marshpillow> is not in the model. Will return None type vector
Input token <Gratiaen> is not in the model. Will return None type vector
Input token <Dileepa> is not in the model. Will return None type vector
Input token <Adprint> is not in the model. Will return None type vector
Input token <Tokár> is not in the model. Will return None type vector
Input token <508,714> is not in the model. Will return None type vector
Input token <breasts-and-blood> is not in the model. Will return None type vector
Input token <HOOK2> is not in the model. Will return None type vector
Input token <Okesene> is not in the model. Will return None type vector
Input token <Hitro> is not in the mo

Input token <Sharanskaya> is not in the model. Will return None type vector
Input token <uyezds> is not in the model. Will return None type vector
Input token <uyezds> is not in the model. Will return None type vector
Input token <Aurangezeb> is not in the model. Will return None type vector
Input token <Girsa> is not in the model. Will return None type vector
Input token <Zil-Hijja> is not in the model. Will return None type vector
Input token <Brahmdev> is not in the model. Will return None type vector
Input token <witness-spouse> is not in the model. Will return None type vector
Input token <party-spouse> is not in the model. Will return None type vector
Input token <Logoplaste> is not in the model. Will return None type vector
Input token <Logoplaste> is not in the model. Will return None type vector
Input token <Maiorem> is not in the model. Will return None type vector
Input token <Pikiran> is not in the model. Will return None type vector
Input token <Pikiran> is not in the mode

Input token <Vukovine> is not in the model. Will return None type vector
Input token <Vozuća> is not in the model. Will return None type vector
Input token <N-fixers> is not in the model. Will return None type vector
Input token <Kabindi> is not in the model. Will return None type vector
Input token <Isimba> is not in the model. Will return None type vector
Input token <ТУРНИР> is not in the model. Will return None type vector
Input token <Nyanboard> is not in the model. Will return None type vector
Input token <Ōhashi> is not in the model. Will return None type vector
Input token <Khedivial> is not in the model. Will return None type vector
Input token <Mecelle> is not in the model. Will return None type vector
Input token <Ulateig> is not in the model. Will return None type vector
Input token <Ulateig> is not in the model. Will return None type vector
Input token <Microbacterium> is not in the model. Will return None type vector
Input token <OyamO> is not in the model. Will return No

Input token <BVOAG> is not in the model. Will return None type vector
Input token <ACLOs> is not in the model. Will return None type vector
Input token <cabovers> is not in the model. Will return None type vector
Input token <4x2s> is not in the model. Will return None type vector
Input token <double-drive> is not in the model. Will return None type vector
Input token <twin-steer> is not in the model. Will return None type vector
Input token <Minh-controlled> is not in the model. Will return None type vector
Input token <Mansız> is not in the model. Will return None type vector
Input token <Mansız> is not in the model. Will return None type vector
Input token <Mansız> is not in the model. Will return None type vector
Input token <10th-Anniversary> is not in the model. Will return None type vector
Input token <10th-Anniversary> is not in the model. Will return None type vector
Input token <HIAG> is not in the model. Will return None type vector
Input token <Geächteten> is not in the mod

Input token <Colubrina> is not in the model. Will return None type vector
Input token <Colubrina> is not in the model. Will return None type vector
Input token <Hajrah> is not in the model. Will return None type vector
Input token <Hajrah> is not in the model. Will return None type vector
Input token <Ngātahi> is not in the model. Will return None type vector
Input token <Smithells> is not in the model. Will return None type vector
Input token <Sathiyavijayanagaram> is not in the model. Will return None type vector
Input token <Sathiyavijayanagaram> is not in the model. Will return None type vector
Input token <ttyrec> is not in the model. Will return None type vector
Input token <MWSG-2> is not in the model. Will return None type vector
Input token <MyRLife> is not in the model. Will return None type vector
Input token <MyGLife> is not in the model. Will return None type vector
Input token <Globaloria-WV> is not in the model. Will return None type vector
Input token <Skillpoint> is no

Input token <dihydroxylated> is not in the model. Will return None type vector
Input token <Cross-metathesis> is not in the model. Will return None type vector
Input token <Vestenamer> is not in the model. Will return None type vector
Input token <poly-norbornene> is not in the model. Will return None type vector
Input token <Pyryev> is not in the model. Will return None type vector
Input token <Rumyanova> is not in the model. Will return None type vector
Input token <Rumyanova> is not in the model. Will return None type vector
Input token <Munegascu> is not in the model. Will return None type vector
Input token <Niçard> is not in the model. Will return None type vector
Input token <Monégasques> is not in the model. Will return None type vector
Input token <Despœi> is not in the model. Will return None type vector
Input token <l'emblema> is not in the model. Will return None type vector
Input token <piciui> is not in the model. Will return None type vector
Input token <Gesü> is not in 

Input token <convict-era> is not in the model. Will return None type vector
Input token <Kargapolye> is not in the model. Will return None type vector
Input token <Ackelsberg> is not in the model. Will return None type vector
Input token <96-count> is not in the model. Will return None type vector
Input token <Gatorama> is not in the model. Will return None type vector
Input token <Oelsener> is not in the model. Will return None type vector
Input token <289B> is not in the model. Will return None type vector
Input token <BBFM> is not in the model. Will return None type vector
Input token <Holland-Bukit> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <Wijeysingha> is not in the model. Will return None type vector
Input token <Poovathingal> is not in the model. Will return None type vector
Input token <raga-based> is not in the model. Will return None type vector
Input token <vocology> is not in the mode

Input token <Yi-Luo> is not in the model. Will return None type vector
Input token <Pashtunization> is not in the model. Will return None type vector
Input token <mock-calypso> is not in the model. Will return None type vector
Input token <NAGCR> is not in the model. Will return None type vector
Input token <Prinncipal> is not in the model. Will return None type vector
Input token <Berretti> is not in the model. Will return None type vector
Input token <TIROS-1> is not in the model. Will return None type vector
Input token <Villamarín> is not in the model. Will return None type vector
Input token <Villamarín> is not in the model. Will return None type vector
Input token <Aouzou> is not in the model. Will return None type vector
Input token <Aouzou> is not in the model. Will return None type vector
Input token <Aouzou> is not in the model. Will return None type vector
Input token <Aouzou> is not in the model. Will return None type vector
Input token <AtLee> is not in the model. Will ret

Input token <DreamMix> is not in the model. Will return None type vector
Input token <un-phonetic> is not in the model. Will return None type vector
Input token <Burval> is not in the model. Will return None type vector
Input token <Boumédienne> is not in the model. Will return None type vector
Input token <IBC-13> is not in the model. Will return None type vector
Input token <零> is not in the model. Will return None type vector
Input token <喜> is not in the model. Will return None type vector
Input token <二> is not in the model. Will return None type vector
Input token <四> is not in the model. Will return None type vector
Input token <五> is not in the model. Will return None type vector
Input token <祿> is not in the model. Will return None type vector
Input token <起> is not in the model. Will return None type vector
Input token <欺> is not in the model. Will return None type vector
Input token <𨳍> is not in the model. Will return None type vector
Input token <八> is not in the model. Wi

Input token <Trimeticcia> is not in the model. Will return None type vector
Input token <herdbook> is not in the model. Will return None type vector
Input token <Alysiella> is not in the model. Will return None type vector
Input token <Kvartoft> is not in the model. Will return None type vector
Input token <Ozyptila> is not in the model. Will return None type vector
Input token <Ahland-Dreksler> is not in the model. Will return None type vector
Input token <Dreksler> is not in the model. Will return None type vector
Input token <Dreksler> is not in the model. Will return None type vector
Input token <Nandidae> is not in the model. Will return None type vector
Input token <VJM07> is not in the model. Will return None type vector
Input token <VJM07> is not in the model. Will return None type vector
Input token <Mr.Ota> is not in the model. Will return None type vector
Input token <Tachenbuch> is not in the model. Will return None type vector
Input token <912UL> is not in the model. Will 

Input token <Andolis> is not in the model. Will return None type vector
Input token <Dacast> is not in the model. Will return None type vector
Input token <Dacast> is not in the model. Will return None type vector
Input token <Dacast> is not in the model. Will return None type vector
Input token <Dacast> is not in the model. Will return None type vector
Input token <Dacast> is not in the model. Will return None type vector
Input token <Dacast> is not in the model. Will return None type vector
Input token <Dacast> is not in the model. Will return None type vector
Input token <Zohm> is not in the model. Will return None type vector
Input token <Megastygarctidinae> is not in the model. Will return None type vector
Input token <omegi> is not in the model. Will return None type vector
Input token <omegi> is not in the model. Will return None type vector
Input token <omegi> is not in the model. Will return None type vector
Input token <gomul> is not in the model. Will return None type vector

Input token <wheel-made> is not in the model. Will return None type vector
Input token <charcoal-fueled> is not in the model. Will return None type vector
Input token <Legends/Mannequin> is not in the model. Will return None type vector
Input token <twenty-car> is not in the model. Will return None type vector
Input token <now-synonymized> is not in the model. Will return None type vector
Input token <WXKX> is not in the model. Will return None type vector
Input token <Nohappimedium> is not in the model. Will return None type vector
Input token <rose-collared> is not in the model. Will return None type vector
Input token <Someșul> is not in the model. Will return None type vector
Input token <jaumei> is not in the model. Will return None type vector
Input token <buitenplaats> is not in the model. Will return None type vector
Input token <Boreel> is not in the model. Will return None type vector
Input token <Boreel> is not in the model. Will return None type vector
Input token <Cistotho

Input token <Cerodrillia> is not in the model. Will return None type vector
Input token <callus.The> is not in the model. Will return None type vector
Input token <Rodang> is not in the model. Will return None type vector
Input token <flag-stone-paved> is not in the model. Will return None type vector
Input token <Kurtoe> is not in the model. Will return None type vector
Input token <Kunre> is not in the model. Will return None type vector
Input token <Lhuntse> is not in the model. Will return None type vector
Input token <Dermomurex> is not in the model. Will return None type vector
Input token <Favartia> is not in the model. Will return None type vector
Input token <nigritus> is not in the model. Will return None type vector
Input token <symmetrizable> is not in the model. Will return None type vector
Input token <associahedron> is not in the model. Will return None type vector
Input token <Inodrillia> is not in the model. Will return None type vector
Input token <Turridae> is not in

Input token <Dykhead> is not in the model. Will return None type vector
Input token <ACUCA> is not in the model. Will return None type vector
Input token <Alleslev> is not in the model. Will return None type vector
Input token <Alleslev> is not in the model. Will return None type vector
Input token <Clerk/Register> is not in the model. Will return None type vector
Input token <D'artiste> is not in the model. Will return None type vector
Input token <Aslanhane> is not in the model. Will return None type vector
Input token <Aslanhane> is not in the model. Will return None type vector
Input token <Zhanbota> is not in the model. Will return None type vector
Input token <Birzhan> is not in the model. Will return None type vector
Input token <Birzhan> is not in the model. Will return None type vector
Input token <Gimnàstic> is not in the model. Will return None type vector
Input token <Gimnàstic> is not in the model. Will return None type vector
Input token <Vaško> is not in the model. Will 

Input token <CASG> is not in the model. Will return None type vector
Input token <CASG> is not in the model. Will return None type vector
Input token <CASG> is not in the model. Will return None type vector
Input token <ICAAAA> is not in the model. Will return None type vector
Input token <then-ongoing> is not in the model. Will return None type vector
Input token <ICAAAA> is not in the model. Will return None type vector
Input token <Dordell> is not in the model. Will return None type vector
Input token <NIRCA> is not in the model. Will return None type vector
Input token <Zavodoukovsk> is not in the model. Will return None type vector
Input token <Quintillian> is not in the model. Will return None type vector
Input token <Mocama> is not in the model. Will return None type vector
Input token <Brigaut> is not in the model. Will return None type vector
Input token <Pryzbylewski> is not in the model. Will return None type vector
Input token <Urci> is not in the model. Will return None ty

Input token <great-great-great-great-grandfather> is not in the model. Will return None type vector
Input token <30,000-40,000> is not in the model. Will return None type vector
Input token <Minnehallen> is not in the model. Will return None type vector
Input token <stafr> is not in the model. Will return None type vector
Input token <Omogo> is not in the model. Will return None type vector
Input token <583.66> is not in the model. Will return None type vector
Input token <Craigiebank> is not in the model. Will return None type vector
Input token <Feidhelm> is not in the model. Will return None type vector
Input token <longer-period> is not in the model. Will return None type vector
Input token <HwK> is not in the model. Will return None type vector
Input token <Asrah> is not in the model. Will return None type vector
Input token <alderflies> is not in the model. Will return None type vector
Input token <Dobsonfly> is not in the model. Will return None type vector
Input token <endopter

Input token <-LSB-> is not in the model. Will return None type vector
Input token <no-balled> is not in the model. Will return None type vector
Input token <7/460> is not in the model. Will return None type vector
Input token <0/41> is not in the model. Will return None type vector
Input token <301-run> is not in the model. Will return None type vector
Input token <non-first-class> is not in the model. Will return None type vector
Input token <8/489> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <SB60> is not in the model. Will return None type vector
Input token <Forgetville> is not in the model. Will return None type vector
Input token <Shagrick> is not in the model. Will return None type vector
Input token <Gjovaag> is not in the model. Will return None type vector
Input token <Toulvaddie> is not in the model. Will return None ty

Input token <ReadiBus> is not in the model. Will return None type vector
Input token <OYBike> is not in the model. Will return None type vector
Input token <creosote-saturated> is not in the model. Will return None type vector
Input token <Seattle-Victoria-Vancouver> is not in the model. Will return None type vector
Input token <Wniary> is not in the model. Will return None type vector
Input token <Titheradge> is not in the model. Will return None type vector
Input token <itcher> is not in the model. Will return None type vector
Input token <Punkradiocast.com> is not in the model. Will return None type vector
Input token <itcher> is not in the model. Will return None type vector
Input token <Tejitu> is not in the model. Will return None type vector
Input token <Shitaye> is not in the model. Will return None type vector
Input token <Klikor-Agbozume> is not in the model. Will return None type vector
Input token <Agbozume> is not in the model. Will return None type vector
Input token <Lex

Input token <Volkskas> is not in the model. Will return None type vector
Input token <Volkskas> is not in the model. Will return None type vector
Input token <Volkskas> is not in the model. Will return None type vector
Input token <Sarow> is not in the model. Will return None type vector
Input token <Ganschendorf> is not in the model. Will return None type vector
Input token <Sauchy-Cauchy> is not in the model. Will return None type vector
Input token <Sauchy-Cauchy> is not in the model. Will return None type vector
Input token <thermo-dynamic> is not in the model. Will return None type vector
Input token <Ketenis> is not in the model. Will return None type vector
Input token <Ingeram> is not in the model. Will return None type vector
Input token <Mathematici> is not in the model. Will return None type vector
Input token <Nairobi.In> is not in the model. Will return None type vector
Input token <Samajs> is not in the model. Will return None type vector
Input token <polychloros> is not 

Input token <35,000-piece> is not in the model. Will return None type vector
Input token <Chirisei> is not in the model. Will return None type vector
Input token <Pole-Carrier> is not in the model. Will return None type vector
Input token <Tebaida> is not in the model. Will return None type vector
Input token <Mazote> is not in the model. Will return None type vector
Input token <13-3A> is not in the model. Will return None type vector
Input token <13-3A> is not in the model. Will return None type vector
Input token <Boroșneu> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <Arraiza> is not in the model. Will return None type vector
Input token <Arraiza> is not in the model. Will return None type vector
Input token <Egharevba> is not in the model. Will return None type vector
Input token <Dante-esque> is not in the model. Will return None type vector
Input token <roughly-paved> is not in the model. Will

Input token <state-endangered> is not in the model. Will return None type vector
Input token <Bathavon> is not in the model. Will return None type vector
Input token <1983-08-21> is not in the model. Will return None type vector
Input token <Espadero> is not in the model. Will return None type vector
Input token <Espadero> is not in the model. Will return None type vector
Input token <Espadero> is not in the model. Will return None type vector
Input token <Espadero> is not in the model. Will return None type vector
Input token <Espadero> is not in the model. Will return None type vector
Input token <Espadero> is not in the model. Will return None type vector
Input token <Peyrellade> is not in the model. Will return None type vector
Input token <duel-enroll> is not in the model. Will return None type vector
Input token <Plerguer> is not in the model. Will return None type vector
Input token <Plerguer> is not in the model. Will return None type vector
Input token <Ratzwiller> is not in t

Input token <54-gun> is not in the model. Will return None type vector
Input token <Jōganji> is not in the model. Will return None type vector
Input token <Hietsu> is not in the model. Will return None type vector
Input token <tirailleurs> is not in the model. Will return None type vector
Input token <Juliette-Lassonde> is not in the model. Will return None type vector
Input token <Maskoutains> is not in the model. Will return None type vector
Input token <MCCQ> is not in the model. Will return None type vector
Input token <ETADEF> is not in the model. Will return None type vector
Input token <Essarts-lès-Sézanne> is not in the model. Will return None type vector
Input token <Givry-lès-Loisy> is not in the model. Will return None type vector
Input token <Matignicourt-Goncourt> is not in the model. Will return None type vector
Input token <Saint-Étienne-sur-Suippe> is not in the model. Will return None type vector
Input token <Suippe> is not in the model. Will return None type vector
In

Input token <Odazzi> is not in the model. Will return None type vector
Input token <Sadolikar-Katkar> is not in the model. Will return None type vector
Input token <Congregazione> is not in the model. Will return None type vector
Input token <breakthorough> is not in the model. Will return None type vector
Input token <12-a-side> is not in the model. Will return None type vector
Input token <minor-cricket> is not in the model. Will return None type vector
Input token <Holbeache> is not in the model. Will return None type vector
Input token <Dunagen> is not in the model. Will return None type vector
Input token <Makrinou> is not in the model. Will return None type vector
Input token <Vénerie> is not in the model. Will return None type vector
Input token <Anglo-Français> is not in the model. Will return None type vector
Input token <Albumtopplistan> is not in the model. Will return None type vector
Input token <Janezich> is not in the model. Will return None type vector
Input token <Cool

Input token <Verwaltungsgemeinschaft> is not in the model. Will return None type vector
Input token <Franconia-West> is not in the model. Will return None type vector
Input token <Gemeindeedikt> is not in the model. Will return None type vector
Input token <Beierlieb> is not in the model. Will return None type vector
Input token <Pettstadt> is not in the model. Will return None type vector
Input token <Rauhe> is not in the model. Will return None type vector
Input token <Verwaltungsgemeinschaft> is not in the model. Will return None type vector
Input token <Wählergemeinschaft> is not in the model. Will return None type vector
Input token <Pettstadt> is not in the model. Will return None type vector
Input token <Pettstadt> is not in the model. Will return None type vector
Input token <Stegaurach> is not in the model. Will return None type vector
Input token <Köhlerschmidt> is not in the model. Will return None type vector
Input token <Weiding> is not in the model. Will return None type 

Input token <Safien> is not in the model. Will return None type vector
Input token <Safien-Platz> is not in the model. Will return None type vector
Input token <Innerberg> is not in the model. Will return None type vector
Input token <Versam> is not in the model. Will return None type vector
Input token <Türelihus> is not in the model. Will return None type vector
Input token <Valendas-Sagogn> is not in the model. Will return None type vector
Input token <anarchist/communist> is not in the model. Will return None type vector
Input token <BlueBEEP> is not in the model. Will return None type vector
Input token <Laudewig> is not in the model. Will return None type vector
Input token <co-cured> is not in the model. Will return None type vector
Input token <Mannens-Grandsivaz> is not in the model. Will return None type vector
Input token <Montagny-la-Ville> is not in the model. Will return None type vector
Input token <Montagny-la-Ville> is not in the model. Will return None type vector
Inp

Input token <Neritoidea> is not in the model. Will return None type vector
Input token <Ostrá> is not in the model. Will return None type vector
Input token <Călugăreni> is not in the model. Will return None type vector
Input token <Gain-Scheduling> is not in the model. Will return None type vector
Input token <Neftchala> is not in the model. Will return None type vector
Input token <Montelanico> is not in the model. Will return None type vector
Input token <Coazzolo> is not in the model. Will return None type vector
Input token <Monchiero> is not in the model. Will return None type vector
Input token <Pesio> is not in the model. Will return None type vector
Input token <Donalong> is not in the model. Will return None type vector
Input token <dansoit> is not in the model. Will return None type vector
Input token <Interamna> is not in the model. Will return None type vector
Input token <Amahagane> is not in the model. Will return None type vector
Input token <Kagekiyo> is not in the mod

Input token <HDDA> is not in the model. Will return None type vector
Input token <Bandrui> is not in the model. Will return None type vector
Input token <creidim> is not in the model. Will return None type vector
Input token <Bandrui> is not in the model. Will return None type vector
Input token <creidim> is not in the model. Will return None type vector
Input token <creidim> is not in the model. Will return None type vector
Input token <Wistanstow> is not in the model. Will return None type vector
Input token <Wistanstow> is not in the model. Will return None type vector
Input token <Ankaret> is not in the model. Will return None type vector
Input token <FitzAlans> is not in the model. Will return None type vector
Input token <Peshale> is not in the model. Will return None type vector
Input token <anti-Welsh> is not in the model. Will return None type vector
Input token <Cherleton> is not in the model. Will return None type vector
Input token <serjeant-at-law> is not in the model. Wil

Input token <Wolności> is not in the model. Will return None type vector
Input token <1480-1548> is not in the model. Will return None type vector
Input token <Fredry> is not in the model. Will return None type vector
Input token <avant-corps> is not in the model. Will return None type vector
Input token <Nakielska> is not in the model. Will return None type vector
Input token <Wrocławska> is not in the model. Will return None type vector
Input token <Prinzenthal> is not in the model. Will return None type vector
Input token <Nakielska> is not in the model. Will return None type vector
Input token <Nakielska> is not in the model. Will return None type vector
Input token <Blumwe> is not in the model. Will return None type vector
Input token <brick-architecture> is not in the model. Will return None type vector
Input token <Nakelerstarße> is not in the model. Will return None type vector
Input token <Nakielska> is not in the model. Will return None type vector
Input token <Hlond> is not 

Input token <Świnoujscie> is not in the model. Will return None type vector
Input token <Szczecin-Goleniów> is not in the model. Will return None type vector
Input token <Schemescript> is not in the model. Will return None type vector
Input token <ureterovesical> is not in the model. Will return None type vector
Input token <mesometrium> is not in the model. Will return None type vector
Input token <Piconnerie> is not in the model. Will return None type vector
Input token <Isly> is not in the model. Will return None type vector
Input token <d'Ideville> is not in the model. Will return None type vector
Input token <Tustna> is not in the model. Will return None type vector
Input token <U.S.-terrorist> is not in the model. Will return None type vector
Input token <Adlouni> is not in the model. Will return None type vector
Input token <Boims> is not in the model. Will return None type vector
Input token <Smallwoods> is not in the model. Will return None type vector
Input token <MP/Home> is

Input token <Cicerbita> is not in the model. Will return None type vector
Input token <Bunyangabu> is not in the model. Will return None type vector
Input token <USh12> is not in the model. Will return None type vector
Input token <Bunyangabu> is not in the model. Will return None type vector
Input token <Al-Zawraa> is not in the model. Will return None type vector
Input token <Al-Zawraa> is not in the model. Will return None type vector
Input token <mtres> is not in the model. Will return None type vector
Input token <Churritos> is not in the model. Will return None type vector
Input token <Ruenroeng> is not in the model. Will return None type vector
Input token <Gilan-e> is not in the model. Will return None type vector
Input token <Pernštejn> is not in the model. Will return None type vector
Input token <Journois> is not in the model. Will return None type vector
Input token <Robitel> is not in the model. Will return None type vector
Input token <1860s-1880s> is not in the model. Wi

Input token <anti-trespassing> is not in the model. Will return None type vector
Input token <KingTrendle> is not in the model. Will return None type vector
Input token <ABC-DuMont> is not in the model. Will return None type vector
Input token <outrate> is not in the model. Will return None type vector
Input token <305,477,424> is not in the model. Will return None type vector
Input token <co-owned/co-managed> is not in the model. Will return None type vector
Input token <unbinilium> is not in the model. Will return None type vector
Input token <unbinilium> is not in the model. Will return None type vector
Input token <ununennium> is not in the model. Will return None type vector
Input token <mixed-isotope> is not in the model. Will return None type vector
Input token <fusion-fission> is not in the model. Will return None type vector
Input token <unbinilium> is not in the model. Will return None type vector
Input token <unbinilium> is not in the model. Will return None type vector
Inpu

Input token <A-Beat> is not in the model. Will return None type vector
Input token <Malferrari> is not in the model. Will return None type vector
Input token <cast-brass> is not in the model. Will return None type vector
Input token <banjo-style> is not in the model. Will return None type vector
Input token <turbo-generators> is not in the model. Will return None type vector
Input token <supporting-character> is not in the model. Will return None type vector
Input token <around/near> is not in the model. Will return None type vector
Input token <Phacopina> is not in the model. Will return None type vector
Input token <Phacopina> is not in the model. Will return None type vector
Input token <Phacopina> is not in the model. Will return None type vector
Input token <Anna-Klara> is not in the model. Will return None type vector
Input token <Dipsas> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <near-circl

Input token <Firaki> is not in the model. Will return None type vector
Input token <Švehla> is not in the model. Will return None type vector
Input token <Švehla> is not in the model. Will return None type vector
Input token <Human68k> is not in the model. Will return None type vector
Input token <SX-Window> is not in the model. Will return None type vector
Input token <Perazich> is not in the model. Will return None type vector
Input token <Perazich> is not in the model. Will return None type vector
Input token <Perazich> is not in the model. Will return None type vector
Input token <Perazich> is not in the model. Will return None type vector
Input token <klaf> is not in the model. Will return None type vector
Input token <Klaf> is not in the model. Will return None type vector
Input token <Forkosch> is not in the model. Will return None type vector
Input token <Brexit> is not in the model. Will return None type vector
Input token <step-daughters/wives> is not in the model. Will retur

Input token <Csepregi> is not in the model. Will return None type vector
Input token <Bonatsos> is not in the model. Will return None type vector
Input token <Bonatsos> is not in the model. Will return None type vector
Input token <melanogramma> is not in the model. Will return None type vector
Input token <Goscilo> is not in the model. Will return None type vector
Input token <green-mohawked> is not in the model. Will return None type vector
Input token <hemicryptophyte> is not in the model. Will return None type vector
Input token <Sehjowal> is not in the model. Will return None type vector
Input token <Sehjowal> is not in the model. Will return None type vector
Input token <precovery> is not in the model. Will return None type vector
Input token <molecular-biochemical> is not in the model. Will return None type vector
Input token <ceRNA> is not in the model. Will return None type vector
Input token <physiologically-relevant> is not in the model. Will return None type vector
Input to

Input token <country-meets-alt-rock> is not in the model. Will return None type vector
Input token <Biggert-Waters> is not in the model. Will return None type vector
Input token <1930-1982> is not in the model. Will return None type vector
Input token <Star-Wagon> is not in the model. Will return None type vector
Input token <Ceasefires> is not in the model. Will return None type vector
Input token <Lašče> is not in the model. Will return None type vector
Input token <VVKJ> is not in the model. Will return None type vector
Input token <Tržič> is not in the model. Will return None type vector
Input token <Palten> is not in the model. Will return None type vector
Input token <Blegoš> is not in the model. Will return None type vector
Input token <reconnaissance-in-force> is not in the model. Will return None type vector
Input token <Kapelski> is not in the model. Will return None type vector
Input token <XXXXIX> is not in the model. Will return None type vector
Input token <Pukovnik> is n

Input token <CRISTHCHURCH> is not in the model. Will return None type vector
Input token <Championchips> is not in the model. Will return None type vector
Input token <Oensingen> is not in the model. Will return None type vector
Input token <Shenckon> is not in the model. Will return None type vector
Input token <2004:2> is not in the model. Will return None type vector
Input token <1h25> is not in the model. Will return None type vector
Input token <associative-commutative> is not in the model. Will return None type vector
Input token <Pi-calculus> is not in the model. Will return None type vector
Input token <BIOCHAM> is not in the model. Will return None type vector
Input token <Sambruno> is not in the model. Will return None type vector
Input token <Bolik> is not in the model. Will return None type vector
Input token <18-race> is not in the model. Will return None type vector
Input token <gag-driven> is not in the model. Will return None type vector
Input token <de-Christianise> is

Input token <-LSB-> is not in the model. Will return None type vector
Input token <Kutila> is not in the model. Will return None type vector
Input token <Tharakki> is not in the model. Will return None type vector
Input token <Godwaghat> is not in the model. Will return None type vector
Input token <Papanchev> is not in the model. Will return None type vector
Input token <Turundzhev> is not in the model. Will return None type vector
Input token <Turundzhev> is not in the model. Will return None type vector
Input token <Nedela> is not in the model. Will return None type vector
Input token <Meandrusa> is not in the model. Will return None type vector
Input token <CLT-ufa> is not in the model. Will return None type vector
Input token <Bregal> is not in the model. Will return None type vector
Input token <Tillac> is not in the model. Will return None type vector
Input token <Shao'an> is not in the model. Will return None type vector
Input token <Wanchuan> is not in the model. Will return N

Input token <Chronographic> is not in the model. Will return None type vector
Input token <385/Kpts-II/1985> is not in the model. Will return None type vector
Input token <Dyera> is not in the model. Will return None type vector
Input token <Donok> is not in the model. Will return None type vector
Input token <Feerguson> is not in the model. Will return None type vector
Input token <overconvergent> is not in the model. Will return None type vector
Input token <OM621> is not in the model. Will return None type vector
Input token <Oelmotor> is not in the model. Will return None type vector
Input token <OM640> is not in the model. Will return None type vector
Input token <OM640> is not in the model. Will return None type vector
Input token <AE-300> is not in the model. Will return None type vector
Input token <W414> is not in the model. Will return None type vector
Input token <OM668> is not in the model. Will return None type vector
Input token <Milnfield> is not in the model. Will retur

Input token <Geiersgrundbach> is not in the model. Will return None type vector
Input token <Rothaarsteig> is not in the model. Will return None type vector
Input token <Drette> is not in the model. Will return None type vector
Input token <Wurstekommission> is not in the model. Will return None type vector
Input token <Salchendorf> is not in the model. Will return None type vector
Input token <Zehntwiese> is not in the model. Will return None type vector
Input token <coöperative> is not in the model. Will return None type vector
Input token <Hainchen> is not in the model. Will return None type vector
Input token <Netphen> is not in the model. Will return None type vector
Input token <Netphen> is not in the model. Will return None type vector
Input token <Netphen> is not in the model. Will return None type vector
Input token <Volkslauf> is not in the model. Will return None type vector
Input token <Hainchen> is not in the model. Will return None type vector
Input token <Netphen> is not

Input token <Haut-Uele> is not in the model. Will return None type vector
Input token <Logoa> is not in the model. Will return None type vector
Input token <Podporucznik> is not in the model. Will return None type vector
Input token <Torjus> is not in the model. Will return None type vector
Input token <BFLPE> is not in the model. Will return None type vector
Input token <Jonkmann> is not in the model. Will return None type vector
Input token <BFLPE> is not in the model. Will return None type vector
Input token <BFLPE> is not in the model. Will return None type vector
Input token <Kemmelmeier> is not in the model. Will return None type vector
Input token <BFLPE> is not in the model. Will return None type vector
Input token <BFLPE> is not in the model. Will return None type vector
Input token <BFLPE> is not in the model. Will return None type vector
Input token <class-average> is not in the model. Will return None type vector
Input token <RYiSE> is not in the model. Will return None typ

Input token <then-councillor> is not in the model. Will return None type vector
Input token <Pyrolith> is not in the model. Will return None type vector
Input token <Wheldale> is not in the model. Will return None type vector
Input token <J.A.Gee> is not in the model. Will return None type vector
Input token <13MW> is not in the model. Will return None type vector
Input token <Yaanai> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <SA-2A> is not in the model. Will return None type vector
Input token <SA-3A> is not in the model. Will return None type vector
Input token <mh1> is not in the model. Will return None type vector
Input token <Sec-36> is not in the model. Will return None type vector
Input token <Rijksmonument> is not in the model. Will return None type vector
Input token <windshaft> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None typ

Input token <Baldissera> is not in the model. Will return None type vector
Input token <Baldissera> is not in the model. Will return None type vector
Input token <Maesllwch> is not in the model. Will return None type vector
Input token <Wallsworth> is not in the model. Will return None type vector
Input token <Rósenkranz> is not in the model. Will return None type vector
Input token <GlobalCapital> is not in the model. Will return None type vector
Input token <HIT-6> is not in the model. Will return None type vector
Input token <Tonkino> is not in the model. Will return None type vector
Input token <Montessory> is not in the model. Will return None type vector
Input token <Bhaktiba> is not in the model. Will return None type vector
Input token <Gopaldas> is not in the model. Will return None type vector
Input token <Gopaldas> is not in the model. Will return None type vector
Input token <siphuncle> is not in the model. Will return None type vector
Input token <Barnsoceras> is not in th

Input token <No.VI> is not in the model. Will return None type vector
Input token <Abdolvand> is not in the model. Will return None type vector
Input token <Nebebew> is not in the model. Will return None type vector
Input token <Gebrhiwet> is not in the model. Will return None type vector
Input token <TpCo> is not in the model. Will return None type vector
Input token <Landau-Zener> is not in the model. Will return None type vector
Input token <Metal-oxo> is not in the model. Will return None type vector
Input token <Metal-oxo> is not in the model. Will return None type vector
Input token <IBHoF> is not in the model. Will return None type vector
Input token <ST2-22> is not in the model. Will return None type vector
Input token <Pejović> is not in the model. Will return None type vector
Input token <Pejović> is not in the model. Will return None type vector
Input token <Schmettau> is not in the model. Will return None type vector
Input token <Schmettau> is not in the model. Will return 

Input token <RISDIC> is not in the model. Will return None type vector
Input token <RISDIC> is not in the model. Will return None type vector
Input token <RISDIC> is not in the model. Will return None type vector
Input token <Almacs> is not in the model. Will return None type vector
Input token <33,947.50> is not in the model. Will return None type vector
Input token <Hālaliʻi> is not in the model. Will return None type vector
Input token <Konjare> is not in the model. Will return None type vector
Input token <Sasano-Bori> is not in the model. Will return None type vector
Input token <1931-built> is not in the model. Will return None type vector
Input token <off-sequence> is not in the model. Will return None type vector
Input token <blockship> is not in the model. Will return None type vector
Input token <1896-99> is not in the model. Will return None type vector
Input token <Berranger> is not in the model. Will return None type vector
Input token <Berranger> is not in the model. Will

Input token <Valois-Burgundy> is not in the model. Will return None type vector
Input token <f309r> is not in the model. Will return None type vector
Input token <203r> is not in the model. Will return None type vector
Input token <sanctoral> is not in the model. Will return None type vector
Input token <f111v> is not in the model. Will return None type vector
Input token <Sanctoral> is not in the model. Will return None type vector
Input token <column-wide> is not in the model. Will return None type vector
Input token <page-wide> is not in the model. Will return None type vector
Input token <101r> is not in the model. Will return None type vector
Input token <Ghent-Bruges> is not in the model. Will return None type vector
Input token <page-wide> is not in the model. Will return None type vector
Input token <f399r> is not in the model. Will return None type vector
Input token <el-Badri> is not in the model. Will return None type vector
Input token <Non-Exec> is not in the model. Will r

Input token <Sainte-Zacharie> is not in the model. Will return None type vector
Input token <10NES> is not in the model. Will return None type vector
Input token <brand/part> is not in the model. Will return None type vector
Input token <Holmens> is not in the model. Will return None type vector
Input token <Aborreparken> is not in the model. Will return None type vector
Input token <Tsimshianic> is not in the model. Will return None type vector
Input token <pre-Union> is not in the model. Will return None type vector
Input token <Dübs> is not in the model. Will return None type vector
Input token <M711> is not in the model. Will return None type vector
Input token <plinthed> is not in the model. Will return None type vector
Input token <UNPR> is not in the model. Will return None type vector
Input token <CT660s> is not in the model. Will return None type vector
Input token <bio-corridors> is not in the model. Will return None type vector
Input token <biocorridors> is not in the model.

Input token <Arcillas> is not in the model. Will return None type vector
Input token <Tindig> is not in the model. Will return None type vector
Input token <Sadogatake> is not in the model. Will return None type vector
Input token <jūryō> is not in the model. Will return None type vector
Input token <shikona> is not in the model. Will return None type vector
Input token <Coffmans> is not in the model. Will return None type vector
Input token <iVardensphere> is not in the model. Will return None type vector
Input token <Autraumaton> is not in the model. Will return None type vector
Input token <Bebugging> is not in the model. Will return None type vector
Input token <noue> is not in the model. Will return None type vector
Input token <Berlin-Wittenau> is not in the model. Will return None type vector
Input token <Boneens> is not in the model. Will return None type vector
Input token <SHRV> is not in the model. Will return None type vector
Input token <murrels> is not in the model. Will 

Input token <bay-winged> is not in the model. Will return None type vector
Input token <chopi> is not in the model. Will return None type vector
Input token <e-Season> is not in the model. Will return None type vector
Input token <LFCTV> is not in the model. Will return None type vector
Input token <promethea> is not in the model. Will return None type vector
Input token <promethea> is not in the model. Will return None type vector
Input token <promethea> is not in the model. Will return None type vector
Input token <promethea> is not in the model. Will return None type vector
Input token <promethea> is not in the model. Will return None type vector
Input token <smelt-whiting> is not in the model. Will return None type vector
Input token <albivitta> is not in the model. Will return None type vector
Input token <Procelsterna> is not in the model. Will return None type vector
Input token <Abrosio> is not in the model. Will return None type vector
Input token <Nanorana> is not in the mode

Input token <Sauguis-Saint-Étienne> is not in the model. Will return None type vector
Input token <ANBHF> is not in the model. Will return None type vector
Input token <ANBHF> is not in the model. Will return None type vector
Input token <maritainiennes/Maritain> is not in the model. Will return None type vector
Input token <electronic/trip> is not in the model. Will return None type vector
Input token <Ōu> is not in the model. Will return None type vector
Input token <Daishaka> is not in the model. Will return None type vector
Input token <Kirstead> is not in the model. Will return None type vector
Input token <Godeanu> is not in the model. Will return None type vector
Input token <IPACC> is not in the model. Will return None type vector
Input token <IPACC> is not in the model. Will return None type vector
Input token <M.Ps> is not in the model. Will return None type vector
Input token <M.Ps> is not in the model. Will return None type vector
Input token <non-kickers> is not in the mod

Input token <MMBTU/h> is not in the model. Will return None type vector
Input token <°C> is not in the model. Will return None type vector
Input token <Dosabhai> is not in the model. Will return None type vector
Input token <Jedlińsk> is not in the model. Will return None type vector
Input token <Jedlińsk> is not in the model. Will return None type vector
Input token <Bülzig> is not in the model. Will return None type vector
Input token <HipHopGame> is not in the model. Will return None type vector
Input token <1.500,000> is not in the model. Will return None type vector
Input token <40,902> is not in the model. Will return None type vector
Input token <206L-3> is not in the model. Will return None type vector
Input token <SIGM400> is not in the model. Will return None type vector
Input token <semi-marked> is not in the model. Will return None type vector
Input token <Ildirans> is not in the model. Will return None type vector
Input token <Theroc> is not in the model. Will return None 

Input token <Kezerle> is not in the model. Will return None type vector
Input token <Shenzhoupterus> is not in the model. Will return None type vector
Input token <chaoyangensis> is not in the model. Will return None type vector
Input token <Ochrophyta> is not in the model. Will return None type vector
Input token <subphyla> is not in the model. Will return None type vector
Input token <Hitrec> is not in the model. Will return None type vector
Input token <MEDEL> is not in the model. Will return None type vector
Input token <MEDEL> is not in the model. Will return None type vector
Input token <Umarzai> is not in the model. Will return None type vector
Input token <Trunev> is not in the model. Will return None type vector
Input token <Trunev> is not in the model. Will return None type vector
Input token <Kusowo> is not in the model. Will return None type vector
Input token <Roznowo> is not in the model. Will return None type vector
Input token <Stagniūnas> is not in the model. Will retu

Input token <Carpentero> is not in the model. Will return None type vector
Input token <Carpentero> is not in the model. Will return None type vector
Input token <Etsumi-Nan> is not in the model. Will return None type vector
Input token <Nagaragawa> is not in the model. Will return None type vector
Input token <Eilne> is not in the model. Will return None type vector
Input token <Indrechtach> is not in the model. Will return None type vector
Input token <Cathussach> is not in the model. Will return None type vector
Input token <Cathussach> is not in the model. Will return None type vector
Input token <Cináed> is not in the model. Will return None type vector
Input token <Themiscyra.Amazon> is not in the model. Will return None type vector
Input token <Ultra-Metallo> is not in the model. Will return None type vector
Input token <Spider-Boy> is not in the model. Will return None type vector
Input token <X-Patrol> is not in the model. Will return None type vector
Input token <Bat-Thing> i

Input token <Trzcińsko-Zdrój> is not in the model. Will return None type vector
Input token <Będzino> is not in the model. Will return None type vector
Input token <Detroit-New> is not in the model. Will return None type vector
Input token <Binaria> is not in the model. Will return None type vector
Input token <Gjelsten> is not in the model. Will return None type vector
Input token <Cowes-Torquay> is not in the model. Will return None type vector
Input token <13:24-33> is not in the model. Will return None type vector
Input token <Ammonian> is not in the model. Will return None type vector
Input token <Π> is not in the model. Will return None type vector
Input token <1794-1852> is not in the model. Will return None type vector
Input token <block-form> is not in the model. Will return None type vector
Input token <Sēja> is not in the model. Will return None type vector
Input token <Habrec> is not in the model. Will return None type vector
Input token <New-9> is not in the model. Will re

Input token <Cocytia> is not in the model. Will return None type vector
Input token <Ctenostola> is not in the model. Will return None type vector
Input token <Iontha> is not in the model. Will return None type vector
Input token <milky-colored> is not in the model. Will return None type vector
Input token <Ogowe> is not in the model. Will return None type vector
Input token <Naranjeros> is not in the model. Will return None type vector
Input token <safety-involved> is not in the model. Will return None type vector
Input token <62304:2006> is not in the model. Will return None type vector
Input token <Nezame> is not in the model. Will return None type vector
Input token <Nezame> is not in the model. Will return None type vector
Input token <Ōigimi> is not in the model. Will return None type vector
Input token <Modert> is not in the model. Will return None type vector
Input token <Holmeside> is not in the model. Will return None type vector
Input token <Brewery/Farringdon> is not in the

Input token <introdutione> is not in the model. Will return None type vector
Input token <Pachnand> is not in the model. Will return None type vector
Input token <90-Mile> is not in the model. Will return None type vector
Input token <four-flushing> is not in the model. Will return None type vector
Input token <Mark-Lee> is not in the model. Will return None type vector
Input token <boatramps> is not in the model. Will return None type vector
Input token <rondette> is not in the model. Will return None type vector
Input token <Villekulla> is not in the model. Will return None type vector
Input token <Vibble> is not in the model. Will return None type vector
Input token <Qassan> is not in the model. Will return None type vector
Input token <Lajee> is not in the model. Will return None type vector
Input token <microfranchises> is not in the model. Will return None type vector
Input token <Karlīne> is not in the model. Will return None type vector
Input token <CTKA> is not in the model. W

Input token <C-130Hs> is not in the model. Will return None type vector
Input token <d'Ewes> is not in the model. Will return None type vector
Input token <adenoblepharon> is not in the model. Will return None type vector
Input token <cryptostachyum> is not in the model. Will return None type vector
Input token <decurviscapum> is not in the model. Will return None type vector
Input token <fuscopurpureum> is not in the model. Will return None type vector
Input token <hirtulum> is not in the model. Will return None type vector
Input token <josephi> is not in the model. Will return None type vector
Input token <Saint-Fabien-de-Rimouski> is not in the model. Will return None type vector
Input token <NOBF> is not in the model. Will return None type vector
Input token <nitrosonium> is not in the model. Will return None type vector
Input token <ferrocenium> is not in the model. Will return None type vector
Input token <Raukumara> is not in the model. Will return None type vector
Input token <

Input token <As-Saha> is not in the model. Will return None type vector
Input token <Assaraya> is not in the model. Will return None type vector
Input token <Bai-Sombe> is not in the model. Will return None type vector
Input token <un-resilient> is not in the model. Will return None type vector
Input token <Jondor> is not in the model. Will return None type vector
Input token <Barbarick> is not in the model. Will return None type vector
Input token <Value-Change> is not in the model. Will return None type vector
Input token <Psycholinguists> is not in the model. Will return None type vector
Input token <Curteian> is not in the model. Will return None type vector
Input token <http://www.researchconnections.org> is not in the model. Will return None type vector
Input token <Argibay> is not in the model. Will return None type vector
Input token <Ivanišević> is not in the model. Will return None type vector
Input token <Analiontas> is not in the model. Will return None type vector
Input to

Input token <Ujszászy> is not in the model. Will return None type vector
Input token <Farkasréti> is not in the model. Will return None type vector
Input token <Szabadíts> is not in the model. Will return None type vector
Input token <field-hospital> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <Heusallee/Museumsmeile> is not in the model. Will return None type vector
Input token <Winands> is not in the model. Will return None type vector
Input token <Kainaiawa> is not in the model. Will return None type vector
Input token <गीता> is not in the model. Will return None type vector
Input token <Dhagamwar> is not in the model. Will return None type vector
Input token <भारतीय> is not in the model. Will return None type vector
Input token <Daintee> is not in the model. Will return None type vector
Input token <Blackpool-based> is not in the model. Will return None type vector
Input token <2,145,000> is not

Input token <Phone-a-Friend> is not in the model. Will return None type vector
Input token <Gaeilo> is not in the model. Will return None type vector
Input token <154.4000> is not in the model. Will return None type vector
Input token <professor-emeritus> is not in the model. Will return None type vector
Input token <Perrum> is not in the model. Will return None type vector
Input token <American-metal> is not in the model. Will return None type vector
Input token <Tramwaje> is not in the model. Will return None type vector
Input token <Gocław> is not in the model. Will return None type vector
Input token <phrynosomatid> is not in the model. Will return None type vector
Input token <aircover> is not in the model. Will return None type vector
Input token <Won-Shik> is not in the model. Will return None type vector
Input token <82884> is not in the model. Will return None type vector
Input token <43518> is not in the model. Will return None type vector
Input token <42335> is not in the mo

Input token <Rocoux> is not in the model. Will return None type vector
Input token <Lupaqa> is not in the model. Will return None type vector
Input token <Wiphala> is not in the model. Will return None type vector
Input token <Qullasuyu> is not in the model. Will return None type vector
Input token <Hettner> is not in the model. Will return None type vector
Input token <Hettner> is not in the model. Will return None type vector
Input token <Rischmann> is not in the model. Will return None type vector
Input token <2.38.0> is not in the model. Will return None type vector
Input token <gui/guilogin.c> is not in the model. Will return None type vector
Input token <Shorcan> is not in the model. Will return None type vector
Input token <LTMX> is not in the model. Will return None type vector
Input token <knockout-to-win> is not in the model. Will return None type vector
Input token <Koschemann> is not in the model. Will return None type vector
Input token <One-Punch> is not in the model. Wil

Input token <Hootabelle> is not in the model. Will return None type vector
Input token <cloudifier> is not in the model. Will return None type vector
Input token <Hootabelle> is not in the model. Will return None type vector
Input token <worm-flavoured> is not in the model. Will return None type vector
Input token <Giggleosaurus> is not in the model. Will return None type vector
Input token <Suine> is not in the model. Will return None type vector
Input token <shoe-gazer> is not in the model. Will return None type vector
Input token <Commodore-in-Charge> is not in the model. Will return None type vector
Input token <UKPASS> is not in the model. Will return None type vector
Input token <UKPASS> is not in the model. Will return None type vector
Input token <UKPASS> is not in the model. Will return None type vector
Input token <UKPASS> is not in the model. Will return None type vector
Input token <scacchianus> is not in the model. Will return None type vector
Input token <Værvarslinga> is

Input token <Acheneau> is not in the model. Will return None type vector
Input token <Balmis> is not in the model. Will return None type vector
Input token <Inspección> is not in the model. Will return None type vector
Input token <CISNS> is not in the model. Will return None type vector
Input token <Interterritorial> is not in the model. Will return None type vector
Input token <vicepresidency> is not in the model. Will return None type vector
Input token <CISNS> is not in the model. Will return None type vector
Input token <ayuntamientos> is not in the model. Will return None type vector
Input token <ayuntamientos> is not in the model. Will return None type vector
Input token <Interterritorial> is not in the model. Will return None type vector
Input token <INGESA> is not in the model. Will return None type vector
Input token <ASISA> is not in the model. Will return None type vector
Input token <Orthoprosthetic> is not in the model. Will return None type vector
Input token <4,572,055>

Input token <Shawnel> is not in the model. Will return None type vector
Input token <Batchian> is not in the model. Will return None type vector
Input token <postmedial> is not in the model. Will return None type vector
Input token <Tenellia> is not in the model. Will return None type vector
Input token <Denrike> is not in the model. Will return None type vector
Input token <Merlinka> is not in the model. Will return None type vector
Input token <Hüdepohl> is not in the model. Will return None type vector
Input token <189125> is not in the model. Will return None type vector
Input token <Katum> is not in the model. Will return None type vector
Input token <north-pole> is not in the model. Will return None type vector
Input token <Vettor> is not in the model. Will return None type vector
Input token <Muller-Cyran> is not in the model. Will return None type vector
Input token <CQFD> is not in the model. Will return None type vector
Input token <igit> is not in the model. Will return None

Input token <BerlinArtProjects> is not in the model. Will return None type vector
Input token <Pop-Cubism> is not in the model. Will return None type vector
Input token <Hymenobacter> is not in the model. Will return None type vector
Input token <Demarlo> is not in the model. Will return None type vector
Input token <Burton-Gibbs> is not in the model. Will return None type vector
Input token <icloudsyncd> is not in the model. Will return None type vector
Input token <Keychaindump> is not in the model. Will return None type vector
Input token <OSX.Keydnap> is not in the model. Will return None type vector
Input token <triple-span> is not in the model. Will return None type vector
Input token <Benachour> is not in the model. Will return None type vector
Input token <Asav> is not in the model. Will return None type vector
Input token <Asav> is not in the model. Will return None type vector
Input token <Kvída> is not in the model. Will return None type vector
Input token <Chuphal> is not i

Input token <Slapawitz> is not in the model. Will return None type vector
Input token <Evans-Collins> is not in the model. Will return None type vector
Input token <Poffos> is not in the model. Will return None type vector
Input token <Slapawitz> is not in the model. Will return None type vector
Input token <Slapowitz> is not in the model. Will return None type vector
Input token <Bhagawanpur> is not in the model. Will return None type vector
Input token <Bhagawanpur> is not in the model. Will return None type vector
Input token <aurantiogriseus> is not in the model. Will return None type vector
Input token <Goniobranchus> is not in the model. Will return None type vector
Input token <Goniobranchus> is not in the model. Will return None type vector
Input token <Sissak-Bardizbanian> is not in the model. Will return None type vector
Input token <Alrazian> is not in the model. Will return None type vector
Input token <Coronandi> is not in the model. Will return None type vector
Input toke

Input token <7:34.82> is not in the model. Will return None type vector
Input token <Campaccio> is not in the model. Will return None type vector
Input token <DOXXbet> is not in the model. Will return None type vector
Input token <Shikharkot> is not in the model. Will return None type vector
Input token <NaOCN> is not in the model. Will return None type vector
Input token <oxazolidone> is not in the model. Will return None type vector
Input token <Mavesyn> is not in the model. Will return None type vector
Input token <WBSC> is not in the model. Will return None type vector
Input token <MetroJazz> is not in the model. Will return None type vector
Input token <E531> is not in the model. Will return None type vector
Input token <OGTV> is not in the model. Will return None type vector
Input token <1,3,5-triamino-2,4,6-trinitrobenzene> is not in the model. Will return None type vector
Input token <2-aminopyridine> is not in the model. Will return None type vector
Input token <Sidwick> is no

Input token <Delagnes> is not in the model. Will return None type vector
Input token <Cernauti> is not in the model. Will return None type vector
Input token <Tivertsis> is not in the model. Will return None type vector
Input token <Tivertsi> is not in the model. Will return None type vector
Input token <Tivertsi> is not in the model. Will return None type vector
Input token <Kamyanets-Podilsky> is not in the model. Will return None type vector
Input token <ring-style> is not in the model. Will return None type vector
Input token <five-flavor> is not in the model. Will return None type vector
Input token <Squeezit> is not in the model. Will return None type vector
Input token <Molas-O-Mint> is not in the model. Will return None type vector
Input token <Wint-O-Green> is not in the model. Will return None type vector
Input token <1902-1991> is not in the model. Will return None type vector
Input token <Bonduca> is not in the model. Will return None type vector
Input token <D'Ambois> is n

Input token <c-BN> is not in the model. Will return None type vector
Input token <c-BN> is not in the model. Will return None type vector
Input token <RuB> is not in the model. Will return None type vector
Input token <ReB> is not in the model. Will return None type vector
Input token <ReB> is not in the model. Will return None type vector
Input token <ReB> is not in the model. Will return None type vector
Input token <Hall-Petch> is not in the model. Will return None type vector
Input token <Al-Battani> is not in the model. Will return None type vector
Input token <al-Marwazi> is not in the model. Will return None type vector
Input token <al-Khwārizmī> is not in the model. Will return None type vector
Input token <Mmst> is not in the model. Will return None type vector
Input token <Stolojan> is not in the model. Will return None type vector
Input token <Dâlja> is not in the model. Will return None type vector
Input token <Uricani> is not in the model. Will return None type vector
Inpu

Input token <431F81> is not in the model. Will return None type vector
Input token <431F81> is not in the model. Will return None type vector
Input token <431F81> is not in the model. Will return None type vector
Input token <431F81> is not in the model. Will return None type vector
Input token <Symferon> is not in the model. Will return None type vector
Input token <tulipomania> is not in the model. Will return None type vector
Input token <Reșița> is not in the model. Will return None type vector
Input token <Hsinchuang> is not in the model. Will return None type vector
Input token <1905/6> is not in the model. Will return None type vector
Input token <Sumerology> is not in the model. Will return None type vector
Input token <Lažánky> is not in the model. Will return None type vector
Input token <2016/2017> is not in the model. Will return None type vector
Input token <Cabra-Finglas> is not in the model. Will return None type vector
Input token <Seenpur> is not in the model. Will ret

Input token <Samoyao> is not in the model. Will return None type vector
Input token <Samoyao> is not in the model. Will return None type vector
Input token <Jasminez> is not in the model. Will return None type vector
Input token <Jasminez> is not in the model. Will return None type vector
Input token <Burueau> is not in the model. Will return None type vector
Input token <Casab-ahan> is not in the model. Will return None type vector
Input token <Nacube> is not in the model. Will return None type vector
Input token <Vidyapeetha> is not in the model. Will return None type vector
Input token <Udaharanam> is not in the model. Will return None type vector
Input token <Lovalekar> is not in the model. Will return None type vector
Input token <Sannata> is not in the model. Will return None type vector
Input token <Udaharanam> is not in the model. Will return None type vector
Input token <Udaharanam> is not in the model. Will return None type vector
Input token <Tambongan> is not in the model. 

Input token <Akpereogene> is not in the model. Will return None type vector
Input token <Ekpiteta> is not in the model. Will return None type vector
Input token <Ekpiteta> is not in the model. Will return None type vector
Input token <Ekpiteta> is not in the model. Will return None type vector
Input token <Ekpiteta> is not in the model. Will return None type vector
Input token <Afrodromia> is not in the model. Will return None type vector
Input token <Maryland-Israel> is not in the model. Will return None type vector
Input token <Misamu> is not in the model. Will return None type vector
Input token <electrohomeopathy> is not in the model. Will return None type vector
Input token <bureauracy> is not in the model. Will return None type vector
Input token <BARMM> is not in the model. Will return None type vector
Input token <Dischroochiton> is not in the model. Will return None type vector
Input token <Arnautoff> is not in the model. Will return None type vector
Input token <Arnautoff> is

Input token <Intermón> is not in the model. Will return None type vector
Input token <Shovon> is not in the model. Will return None type vector
Input token <Upplands-Bro> is not in the model. Will return None type vector
Input token <80003955> is not in the model. Will return None type vector
Input token <one-and-a-half-story> is not in the model. Will return None type vector
Input token <Kumbárová> is not in the model. Will return None type vector
Input token <Kumbárová> is not in the model. Will return None type vector
Input token <15th-year> is not in the model. Will return None type vector
Input token <signed-digit> is not in the model. Will return None type vector
Input token <PSIUP> is not in the model. Will return None type vector
Input token <Ćuprija> is not in the model. Will return None type vector
Input token <Moothon> is not in the model. Will return None type vector
Input token <RecordTV> is not in the model. Will return None type vector
Input token <Ibope> is not in the m

Input token <Kaipainen> is not in the model. Will return None type vector
Input token <Vatican-mandated> is not in the model. Will return None type vector
Input token <DC-Delaware-Maryland-Virginia-West> is not in the model. Will return None type vector
Input token <life-relationship> is not in the model. Will return None type vector
Input token <Zirwas> is not in the model. Will return None type vector
Input token <Wanderausstellungen> is not in the model. Will return None type vector
Input token <asbestos-clad> is not in the model. Will return None type vector
Input token <Pakhtunkhwa-Gilgit> is not in the model. Will return None type vector
Input token <9,081,428> is not in the model. Will return None type vector
Input token <approvals/NOCs> is not in the model. Will return None type vector
Input token <non-ABA-accredited> is not in the model. Will return None type vector
Input token <JAJDELSKI> is not in the model. Will return None type vector
Input token <Brousso> is not in the mo

Input token <vaerágya> is not in the model. Will return None type vector
Input token <vairagi> is not in the model. Will return None type vector
Input token <Mokṣopāya> is not in the model. Will return None type vector
Input token <Vairāgya> is not in the model. Will return None type vector
Input token <A-bikes> is not in the model. Will return None type vector
Input token <A-bikes> is not in the model. Will return None type vector
Input token <Liberabit> is not in the model. Will return None type vector
Input token <Rev.Fr.BENEDICT> is not in the model. Will return None type vector
Input token <Rev.Dr.Samuel> is not in the model. Will return None type vector
Input token <Beichlingen> is not in the model. Will return None type vector
Input token <Bouhereau> is not in the model. Will return None type vector
Input token <Bouhéreau> is not in the model. Will return None type vector
Input token <Kmeť> is not in the model. Will return None type vector
Input token <late-modernist> is not in 

Input token <Leinestraße> is not in the model. Will return None type vector
Input token <Hydrobromic> is not in the model. Will return None type vector
Input token <1-bromoalkanes> is not in the model. Will return None type vector
Input token <Hydrobromic> is not in the model. Will return None type vector
Input token <non-oxidising> is not in the model. Will return None type vector
Input token <Hydrobromic> is not in the model. Will return None type vector
Input token <aviation-gasoline> is not in the model. Will return None type vector
Input token <Kirwill> is not in the model. Will return None type vector
Input token <Iamskoy> is not in the model. Will return None type vector
Input token <Iamskoy> is not in the model. Will return None type vector
Input token <Kirwill> is not in the model. Will return None type vector
Input token <TV5-Etats-Unis> is not in the model. Will return None type vector
Input token <Aillagon> is not in the model. Will return None type vector
Input token <cybe

Input token <series-regulars> is not in the model. Will return None type vector
Input token <Düsar> is not in the model. Will return None type vector
Input token <Pogost> is not in the model. Will return None type vector
Input token <mineworks> is not in the model. Will return None type vector
Input token <Lelikozero> is not in the model. Will return None type vector
Input token <Velikogubskoe> is not in the model. Will return None type vector
Input token <Oshevnev> is not in the model. Will return None type vector
Input token <Bosarevo> is not in the model. Will return None type vector
Input token <Kavgora> is not in the model. Will return None type vector
Input token <densiflorum> is not in the model. Will return None type vector
Input token <mine-layers> is not in the model. Will return None type vector
Input token <XBs> is not in the model. Will return None type vector
Input token <00FNNG> is not in the model. Will return None type vector
Input token <Duczyńska> is not in the model

Input token <Warnscale> is not in the model. Will return None type vector
Input token <Blackbeck> is not in the model. Will return None type vector
Input token <Sinovirus> is not in the model. Will return None type vector
Input token <Atlantic-based> is not in the model. Will return None type vector
Input token <2640A> is not in the model. Will return None type vector
Input token <2644A> is not in the model. Will return None type vector
Input token <2640A> is not in the model. Will return None type vector
Input token <HP2640> is not in the model. Will return None type vector
Input token <multipages> is not in the model. Will return None type vector
Input token <beepland> is not in the model. Will return None type vector
Input token <HP645/7> is not in the model. Will return None type vector
Input token <TinyBASIC> is not in the model. Will return None type vector
Input token <HP/GL> is not in the model. Will return None type vector
Input token <264X> is not in the model. Will return No

Input token <All-Species> is not in the model. Will return None type vector
Input token <LSPN> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <48-man> is not in the model. Will return None type vector
Input token <speed-texting> is not in the model. Will return None type vector
Input token <d'Émission> is not in the model. Will return None type vector
Input token <Izvestkovaya> is not in the model. Will return None type vector
Input token <Dalnegorsk> is not in the model. Will return None type vector
Input token <B.Ch.> is not in the model. Will return None type vector
Input token <tansou/tsuiso> is not in the model. Will return None type vector
Input token <Karagiozis> is not in the model. Will return None type vector
Input token <Karagiozis> is not in the model. Will return None type vector
Input token <Ivat> is not in the model. Will return None type vector
Input token <Karagiozis> is not in the mod

Input token <Skipton-on-Swale> is not in the model. Will return None type vector
Input token <Osbald> is not in the model. Will return None type vector
Input token <Osbald> is not in the model. Will return None type vector
Input token <Osbald> is not in the model. Will return None type vector
Input token <P/ECE> is not in the model. Will return None type vector
Input token <Eljigidey> is not in the model. Will return None type vector
Input token <1790October> is not in the model. Will return None type vector
Input token <Oral/Aural> is not in the model. Will return None type vector
Input token <OSIE> is not in the model. Will return None type vector
Input token <https://www.selenaija.com> is not in the model. Will return None type vector
Input token <A447> is not in the model. Will return None type vector
Input token <Eupharma> is not in the model. Will return None type vector
Input token <GLPG1690> is not in the model. Will return None type vector
Input token <Appermont> is not in the

Input token <quickly-raised> is not in the model. Will return None type vector
Input token <Lijana> is not in the model. Will return None type vector
Input token <Lijana> is not in the model. Will return None type vector
Input token <Chancela> is not in the model. Will return None type vector
Input token <Burgraves> is not in the model. Will return None type vector
Input token <Reichsgut> is not in the model. Will return None type vector
Input token <mesostic> is not in the model. Will return None type vector
Input token <monologue-play> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <美日> is not in the model. Will return None type vector
Input token <HQ/Haoqing> is not in the model. Will return None type vector
Input token <Uliou> is not in the model. Will return None type vector
Input token <side-aisles> is not in the model. Will return None type vector
Input token <Notke> is not in the model. Will re

Input token <Rowell-Sirois> is not in the model. Will return None type vector
Input token <Joseph-Mathias> is not in the model. Will return None type vector
Input token <Jivarp> is not in the model. Will return None type vector
Input token <ThereIn> is not in the model. Will return None type vector
Input token <Skydancer/Of> is not in the model. Will return None type vector
Input token <Jivarp> is not in the model. Will return None type vector
Input token <Weilheimer> is not in the model. Will return None type vector
Input token <Barendsen> is not in the model. Will return None type vector
Input token <delimitated> is not in the model. Will return None type vector
Input token <necronyms> is not in the model. Will return None type vector
Input token <necronym> is not in the model. Will return None type vector
Input token <Molody> is not in the model. Will return None type vector
Input token <1.5-metre> is not in the model. Will return None type vector
Input token <Accomplisht> is not in

Input token <Oxfuird> is not in the model. Will return None type vector
Input token <Oxfuird> is not in the model. Will return None type vector
Input token <Specki> is not in the model. Will return None type vector
Input token <GuRoo> is not in the model. Will return None type vector
Input token <GuRoo> is not in the model. Will return None type vector
Input token <Ekajuk> is not in the model. Will return None type vector
Input token <antarala> is not in the model. Will return None type vector
Input token <antarala> is not in the model. Will return None type vector
Input token <preterites> is not in the model. Will return None type vector
Input token <ἀγαπάω/ἠγάπησεν> is not in the model. Will return None type vector
Input token <Khinalug> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <WNSA> is not in the model. Will return None typ

Input token <Wendefurth> is not in the model. Will return None type vector
Input token <Powderhall> is not in the model. Will return None type vector
Input token <Piershill> is not in the model. Will return None type vector
Input token <433,672> is not in the model. Will return None type vector
Input token <2,502,800> is not in the model. Will return None type vector
Input token <Lomariopsidaceae> is not in the model. Will return None type vector
Input token <Katleho> is not in the model. Will return None type vector
Input token <Moleko> is not in the model. Will return None type vector
Input token <f.ex.> is not in the model. Will return None type vector
Input token <ma.ja.de> is not in the model. Will return None type vector
Input token <208,903> is not in the model. Will return None type vector
Input token <Post-2008> is not in the model. Will return None type vector
Input token <Sedili> is not in the model. Will return None type vector
Input token <Ibiá> is not in the model. Will r

Input token <ReST-friendly> is not in the model. Will return None type vector
Input token <Kiteenjärvi> is not in the model. Will return None type vector
Input token <Chondrostoma> is not in the model. Will return None type vector
Input token <Ryomo> is not in the model. Will return None type vector
Input token <TI-05> is not in the model. Will return None type vector
Input token <Polycratia> is not in the model. Will return None type vector
Input token <Sergozero> is not in the model. Will return None type vector
Input token <Nyboe> is not in the model. Will return None type vector
Input token <Sennheim> is not in the model. Will return None type vector
Input token <Riksmål> is not in the model. Will return None type vector
Input token <Rykkinn> is not in the model. Will return None type vector
Input token <Jeverland> is not in the model. Will return None type vector
Input token <Étienne-François> is not in the model. Will return None type vector
Input token <Montaignac> is not in the

Input token <Federàl> is not in the model. Will return None type vector
Input token <Büyükçekmece> is not in the model. Will return None type vector
Input token <Gruzenberg> is not in the model. Will return None type vector
Input token <Spätrot-Rotgipfler> is not in the model. Will return None type vector
Input token <Newermind> is not in the model. Will return None type vector
Input token <D'Ocon> is not in the model. Will return None type vector
Input token <Yauyos> is not in the model. Will return None type vector
Input token <vicugnas> is not in the model. Will return None type vector
Input token <Anti-Bases> is not in the model. Will return None type vector
Input token <Tangimoana> is not in the model. Will return None type vector
Input token <Foxland> is not in the model. Will return None type vector
Input token <Multi-Academy> is not in the model. Will return None type vector
Input token <Foxland> is not in the model. Will return None type vector
Input token <盲拳> is not in the m

Input token <TranzCoastal> is not in the model. Will return None type vector
Input token <Chilkalguda> is not in the model. Will return None type vector
Input token <Al-Kausar> is not in the model. Will return None type vector
Input token <Umachandra> is not in the model. Will return None type vector
Input token <Subrahmanyaswamy> is not in the model. Will return None type vector
Input token <107J> is not in the model. Will return None type vector
Input token <Lingampally> is not in the model. Will return None type vector
Input token <Paxhill> is not in the model. Will return None type vector
Input token <U-86> is not in the model. Will return None type vector
Input token <Poslaću> is not in the model. Will return None type vector
Input token <Jašar> is not in the model. Will return None type vector
Input token <balticus> is not in the model. Will return None type vector
Input token <family/youth> is not in the model. Will return None type vector
Input token <Sanctifica> is not in the 

Input token <Drono> is not in the model. Will return None type vector
Input token <WVBL-LP> is not in the model. Will return None type vector
Input token <oralities> is not in the model. Will return None type vector
Input token <Couch-Stone> is not in the model. Will return None type vector
Input token <79093> is not in the model. Will return None type vector
Input token <BSk> is not in the model. Will return None type vector
Input token <Au-Berneck> is not in the model. Will return None type vector
Input token <0.330000> is not in the model. Will return None type vector
Input token <Kagina> is not in the model. Will return None type vector
Input token <HONOReform> is not in the model. Will return None type vector
Input token <HONOReform> is not in the model. Will return None type vector
Input token <Kanengoni> is not in the model. Will return None type vector
Input token <Saranggola> is not in the model. Will return None type vector
Input token <Þórdís> is not in the model. Will retur

Input token <Over-Mind> is not in the model. Will return None type vector
Input token <OM660> is not in the model. Will return None type vector
Input token <OM660> is not in the model. Will return None type vector
Input token <-LSB-> is not in the model. Will return None type vector
Input token <Bogatyanovskiy> is not in the model. Will return None type vector
Input token <Sobornaya> is not in the model. Will return None type vector
Input token <BAHAUDIN> is not in the model. Will return None type vector
Input token <BAHAUDIN> is not in the model. Will return None type vector
Input token <BOSAN> is not in the model. Will return None type vector
Input token <Front-Pak> is not in the model. Will return None type vector
Input token <legislative/national> is not in the model. Will return None type vector
Input token <Dotaku-shaped> is not in the model. Will return None type vector
Input token <Kamikoma> is not in the model. Will return None type vector
Input token <Plocamium> is not in the

In [14]:
# num of sentences in filtered data split vs. num of sentences after filtering out with Glove
print(num_sentences_tr_flt, len(idx_sent_filtered_tr))
print(num_sentences_dv_flt, len(idx_sent_filtered_dv))
print(num_sentences_ts_flt, len(idx_sent_filtered_ts))

214804 174061
28148 22752
30134 24412


In [15]:
# ========================= make dataset input ==========================

In [16]:
def LoadFilterIdxFile(filteridx_filename:str):
    with open(filteridx_filename, 'r', encoding='utf-8') as textfile:
        idx_filter = json.load(textfile)
    return idx_filter

In [17]:
#idx_filter = LoadFilterIdxFile('./dataset_filtered/dev_idx_filtered.txt')

In [18]:
def LoadPosInfo(pos_dir:str, split_name:str, idx_to_keep:list):
    print('\tloading POS info...')
    total_num_sent_after_trans = 0
    sent_idx_to_noun_idx_mapping = dict(zip(idx_to_keep, [[] for i in range(len(idx_to_keep))]))
    with os.scandir(pos_dir) as it:
        for entry in it:
            if entry.name.startswith(split_name): # load all pos info with given split name
                with open(entry.path, 'r', encoding='utf-8') as textfile:
                    pos_info = json.load(textfile)
                    for (sent_idx, sent_pos) in pos_info.items():
                        sent_idx = int(sent_idx)
                        if sent_idx in idx_to_keep: # ignore sentences based on idx filter
                            for token_pos in sent_pos:
                                if token_pos['pos'] == 'NN' or token_pos['pos'] == 'NNS':
                                    # only care for NN and NNS for now, ignore pronouns NNP
                                    # the token index given by stanford pos starts with 1 in a sent
                                    token_idx = token_pos['index'] - 1
                                    sent_idx_to_noun_idx_mapping[sent_idx].append(token_idx)
                                    
    with open('sent_idx_to_noun_idx_mapping_filtered_{}.json'.format(split_name), 'w', encoding='utf-8') as textfile:
        json.dump(sent_idx_to_noun_idx_mapping, textfile)
    
    if split_name == 'train':
        # estimate num of sentences after limiting one noun per sentence
        for value in sent_idx_to_noun_idx_mapping.values():
            if not len(value) == 0: 
                total_num_sent_after_trans += len(value)
        print('\tapprox. number of sentences after noun transformation in {} set: {}'.format(split_name, total_num_sent_after_trans))
    else:
        print('\tapprox. number of sentences after noun transformation in {} set: {}'.format(split_name, len(idx_to_keep)))
    
    return sent_idx_to_noun_idx_mapping

In [19]:
#sent_idx_to_noun_idx_mapping = LoadPosInfo('./dataset_pos', 'dev', idx_filter)

In [20]:
########## ============ filter out a certain noun? ================
# rank the number of nouns, go through the whole test set and find highest freq. noun
# take out those sentences (make sure also in filtered idx)
# create an additional file for those nouns as new test.noun

In [21]:
def FilterHighFreqNoun(pos_dir:str, split_name:str, idx_to_keep:list):
    print('\tchecking highest freq noun...')
    total_num_sent_noun1 = 0
    noun_to_sent_idx_mapping = dict()
    
    with os.scandir(pos_dir) as it:
        for entry in it:
            if entry.name.startswith(split_name): # load all pos info with given split name
                with open(entry.path, 'r', encoding='utf-8') as textfile:
                    pos_info = json.load(textfile)
                    for (sent_idx, sent_pos) in pos_info.items():
                        sent_idx = int(sent_idx)
                        if sent_idx in idx_to_keep: # ignore sentences based on idx filter
                            for token_pos in sent_pos:
                                if token_pos['pos'] == 'NN' or token_pos['pos'] == 'NNS':
                                    # only care for NN and NNS for now, ignore pronouns NNP
                                    # the token index given by stanford pos starts with 1 in a sent
                                    word = token_pos['word']
                                    token_idx = token_pos['index'] - 1
                                    if not word in noun_to_sent_idx_mapping:
                                        noun_to_sent_idx_mapping[word] = list()
                                    noun_to_sent_idx_mapping[word].append([sent_idx, token_idx])
    
    # filter out the noun with the most frequent appearance from sorting
    sorted_noun_to_sent_idx_mapping = sorted(noun_to_sent_idx_mapping.items(), key=lambda x: len(x[1]), reverse=True)
    noun_to_sent_idx_mapping = dict()
    sent_idx_filter = list()
    sent_idx_to_noun_idx_mapping = dict()
    
    for i in range(10):
        highfreq_noun = sorted_noun_to_sent_idx_mapping[i][0]
        highfreq_position = sorted_noun_to_sent_idx_mapping[i][1]
        for (sent_idx, noun_idx) in highfreq_position:
            if sent_idx not in sent_idx_filter:
                sent_idx_filter.append(sent_idx)
                sent_idx_to_noun_idx_mapping[sent_idx] = list()
            sent_idx_to_noun_idx_mapping[sent_idx].append(noun_idx)
        noun_to_sent_idx_mapping[highfreq_noun] = highfreq_position
    
    with open('top10noun_with_idx.json', 'w', encoding='utf-8') as textfile:
        json.dump(noun_to_sent_idx_mapping, textfile)
    
    return noun_to_sent_idx_mapping.keys(), sent_idx_filter, sent_idx_to_noun_idx_mapping

In [22]:
#highfreq_noun, sent_idx_filter, sent_idx_to_noun_idx_mapping = FilterHighFreqNoun('./dataset_pos', 'train', idx_filter)

In [23]:
def LoadSentenceFile(sent_filename:str, idx_to_keep:list):
    print('\tloading sentences from file...')
    idx_to_sentences_mapping = dict(zip(idx_to_keep, [[] for i in range(len(idx_to_keep))]))
    with open(sent_filename, 'r', encoding='utf-8') as textfile:
        sentences = json.load(textfile)
        for sent_idx, sent in enumerate(sentences):
            if sent_idx in idx_to_keep:
                sent = sent.split()
                
                for i in range(len(sent)):
                    if sent[i] == '-RCB-':
                        sent[i] = ')'
                    elif sent[i] == '-LCB-':
                        sent[i] = '('
                        
                idx_to_sentences_mapping[sent_idx] = sent
            
    return idx_to_sentences_mapping

In [24]:
#idx_to_sentences_mapping = LoadSentenceFile('./dataset_filtered/dev.json', idx_filter)

In [25]:
# pad sentences if it's not enough length
def Padding(sent:list, max_len:int, embedding_dim:int):
    padded = np.zeros((max_len, embedding_dim))
    if len(sent) < max_len: # when the sentence is shorter then the longest sent in set
        padded[:len(sent)] = sent
    return padded

In [26]:
def RandomSampleTokenIdx(list_of_idx_to_mask:list):
    if len(list_of_idx_to_mask) == 1:
        return list_of_idx_to_mask
    return list(np.random.choice(list_of_idx_to_mask, 1))

In [27]:
def Masking(noun_idx:int, sent:list, embedding_dim:int):
    sent[noun_idx] = np.zeros(embedding_dim)
    return sent

In [28]:
# for sentence with more than one noun, repeat and interchange the noun to be masked 
def SentToEmbedding(idx_to_sentence_mapping:dict, sent_idx_to_noun_idx_mapping:dict, glove_model, embedding_dim:int, max_len:int, split_name:str):
    print('\tconverting sentences to embeddings...')
    sentence_embeddings = list() # sentence embedding [300d, 300d, 300d]
    label_embeddings = list() # label embedding 300d
    label_positions = list() # label position in the sentence
    for (sent_idx, sentence) in idx_to_sentence_mapping.items():
        if not len(sent_idx_to_noun_idx_mapping[sent_idx]) == 0: # if no nouns in the current sentence, skip
            sent = list()
            for token in sentence:
                token_embedding = glove_model.get_vector(token)
                sent.append(token_embedding)
            
            if split_name != 'train': # if not training set, random sample a noun from the sent to mask
                noun_indices = RandomSampleTokenIdx(sent_idx_to_noun_idx_mapping[sent_idx])
            else: # if training set, mask nouns based on indices, mask one noun per sent
                noun_indices = sent_idx_to_noun_idx_mapping[sent_idx]
            
            for noun_idx in noun_indices:
                label_embeddings.append(sent[noun_idx]) # save label embedding
                label_positions.append(noun_idx) # save label index in the sentence
                sent = Masking(noun_idx, sent, embedding_dim) # mask noun at given position
                sent = Padding(sent, max_len, embedding_dim) # add padding with 300d 0s to the end
                sentence_embeddings.append(sent)

    return sentence_embeddings, label_embeddings, label_positions

In [29]:
#embedding_dim = 300
#max_len = int(mean_total + std_total)
#sentence_embeddings, label_embeddings, label_positions = SentToEmbedding(idx_to_sentences_mapping, sent_idx_to_noun_idx_mapping glove_model, embedding_dim, max_len, 'dev')
#print(np.shape(sentence_embeddings))
#print(np.shape(label_embeddings))
#print(np.shape(label_positions))

In [32]:
# create a custom dataset for dataloader to use
class WikiDataset(Dataset):
    def __init__(self, dataset, labels, label_positions):
        self.dataset = dataset
        self.labels = labels
        self.label_positions = label_positions
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        datapoint = self.dataset[idx]
        label = self.labels[idx]
        label_position = self.label_positions[idx]
        return datapoint, label, label_position

In [33]:
#### change the input to dataloader to sentence themselves
#### move embedding retrieval to withint dataset class 

class WikiDatasetNew(Dataset):
    def __init__(self, dataset, labels, label_positions):
        self.dataset = dataset
        self.labels = labels
        self.label_positions = label_positions
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        datapoint = self.dataset[idx]
        label = self.labels[idx]
        label_position = self.label_positions[idx]
        return datapoint, label, label_position

In [34]:
#data = WikiDataset(sentence_embeddings, label_embeddings, label_positions)
#loader = DataLoader(dataset=data, shuffle=False, pin_memory=False, batch_size=512)
#it = iter(loader)
#x,y,nidx =  next(it)
#print(x.shape)
#print(x.dtype)
#print(y.shape)
#print(y.dtype)
#print(nidx.shape)
#print(nidx.dtype)

In [35]:
def MakeDataloader(filteridx_filename:str, sent_filename:str, pos_dir:str, split_name:str, glove_model, embedding_dim:int, max_len:int, batch_size:int, num_workers:int):
    print('making data loader for {} split'.format(split_name))
    idx_to_keep = LoadFilterIdxFile(filteridx_filename)
    sent_idx_to_noun_idx_mappping = LoadPosInfo(pos_dir, split_name, idx_to_keep)
    idx_to_sentence_mapping = LoadSentenceFile(sent_filename, idx_to_keep)
    sentence_embeddings, label_embeddings, label_positions = SentToEmbedding(idx_to_sentence_mapping, sent_idx_to_noun_idx_mapping, glove_model, embedding_dim, max_len, split_name)
    data = WikiDataset(sentence_embeddings, label_embeddings, label_positions)
    
    print('\tfinalizing making data loader')
    
    if split_name == 'test':
        if_shuffle = False
    else:
        if_shuffle = True
        
    # NOTE: for gpu usage, do not use num_workers, turn pin_memory to True
    loader = DataLoader(dataset=data, batch_size=batch_size, shuffle=if_shuffle, num_workers=num_workers, pin_memory=False) 
    
    return loader

In [40]:
def MakeDataloaderNoun(filteridx_filename:str, sent_filename:str, pos_dir:str, split_name:str, glove_model, embedding_dim:int, max_len:int, batch_size:int, num_workers:int):
    print('making data loader for {} split'.format(split_name))
    idx_to_keep = LoadFilterIdxFile(filteridx_filename)
    nouns, sent_idx_filter, sent_idx_to_noun_idx_mapping = FilterHighFreqNoun(pos_dir, split_name, idx_to_keep)
    idx_to_sentence_mapping = LoadSentenceFile(sent_filename, sent_idx_filter)
    sentence_embeddings, label_embeddings, label_positions = SentToEmbedding(idx_to_sentence_mapping, sent_idx_to_noun_idx_mapping, glove_model, embedding_dim, max_len, split_name)
    data = WikiDataset(sentence_embeddings, label_embeddings, label_positions)
    
    print('\tfinalizing making data loader, num sentences in loader: {}'.format(len(label_positions)))
    
    if split_name == 'test':
        if_shuffle = False
    else:
        if_shuffle = True
        
    # NOTE: for gpu usage, do not use num_workers, turn pin_memory to True
    loader = DataLoader(dataset=data, batch_size=batch_size, shuffle=if_shuffle, num_workers=num_workers, pin_memory=False) 
    
    return loader

In [41]:
# sample noun from train
split_name = 'train'
filter_path = './dataset_filtered/{}_idx_filtered.txt'.format(split_name)
file_path = './dataset_filtered/{}.json'.format(split_name)
pos_dir = './dataset_pos'
embedding_dim = 300
batch_size = 128
num_workers = 1
max_len = int(22.514047250226433 + 14.624483763629705)
nounloader = MakeDataloaderNoun(filter_path, file_path, pos_dir, split_name, glove_model, embedding_dim, max_len, batch_size, num_workers)
print(len(nounloader))

making data loader for train split
	checking highest freq noun...
	loading sentences from file...
	converting sentences to embeddings...
	finalizing making data loader, num sentences in loader: 28904
226


In [30]:
# test make dataloader with dev
split_name = 'dev'
filter_path = './dataset_filtered/{}_idx_filtered.txt'.format(split_name)
file_path = './dataset_filtered/{}.json'.format(split_name)
pos_dir = './dataset_pos'
embedding_dim = 300
batch_size = 512
num_workers = 1
max_len = int(mean_total + std_total)
loader = MakeDataloader(filter_path, file_path, pos_dir, split_name, glove_model, embedding_dim, max_len, batch_size, num_workers)
print(len(loader))

making data loader for dev split
	loading POS info...
	approx. number of sentences after noun transformation in dev set: 22752
	loading sentences from file...
	converting sentences to embeddings...
	finalizing making data loader
43


In [31]:
# ================= training =====================

In [87]:
import torch
import torch.optim as optim
import torch.nn as nn
from lstm_model import LSTM

torch.manual_seed(32) # use random seeding

<torch._C.Generator at 0x1fa0029a790>

In [88]:
embedding_dim = 300
hidden_dim = 128
num_epochs = 30
num_batches = len(loader)

model = LSTM(embedding_dim=embedding_dim, hidden_dim=hidden_dim)
model = model.double()
loss_function = nn.CosineEmbeddingLoss()
optimizer = optim.Adam(model.parameters(), lr=0.05) # add learning rate decay later
batch_loss = np.zeros(num_batches)
avg_epoch_loss = np.zeros(num_epochs)
best_model_loss = 99999

In [89]:
for epoch in range(num_epochs): # number of epochs
    print('training epoch: {}...'.format(epoch+1))
    for batch_idx, (data, target, target_idx) in enumerate(loader):
        
        # clear out gradients at each batch so pytorch does not accumulates it
        optimizer.zero_grad()
        
        # get current batch_size, remember that most batch has size 512 but some do not
        batch_size = target.shape[0]
        
        # forward pass
        prediction = model(data, target_idx)
        
        # compute loss and gradients, update parameters by optim.step()
        mask = torch.ones(batch_size)
        loss = loss_function(prediction, target, mask)
        loss.backward()
        optimizer.step()
        
        # save batch loss (avg batch loss of all datapoints in batch, automatically obtained from loss)
        batch_loss[batch_idx] = loss
        if (batch_idx + 1) % 50 == 0 or (batch_idx + 1) == num_batches:
            print('\ttrained {}% of epoch {}, current batch loss: {}'.format(round((batch_idx+1)/num_batches*100, 2), epoch+1, loss))
    
    # compute average epoch loss (avg loss of all batches in the epoch)
    avg_epoch_loss[epoch] = np.mean(batch_loss)
    batch_loss = np.zeros(num_batches) # reset the batch loss
    if best_model_loss > avg_epoch_loss[epoch]:
        best_model_loss = avg_epoch_loss[epoch]
        torch.save(model.state_dict(), 'wiki_model.best.pt')
        print('\tsaving best model at epoch {}'.format(epoch))
    
    print('\tfinished training epoch: {}, average loss: {}'.format(epoch+1, avg_epoch_loss[epoch]))

with open('loss_per_epoch.txt', 'w', encoding='utf-8') as textfile:
    for ls in avg_epoch_loss:
        textfile.write('{}\n'.format(ls))

training epoch: 1...
	trained 100.0% of epoch 1, current batch loss: 0.6002731569877698
	saving best model at epoch 0
	finished training epoch: 1, average loss: 0.6857085827696457


In [None]:
# =========== write testing here! ================

In [None]:
def Top10Hit(prediction, label=str, glove_model):
    top10hit = glove_model.most_similar_embedding(prediction, topn=10, ignore_similarity=True) # sorted list of tokens
    if label in top10hit:
        idxtop10 = top10hit.index(label) + 1
        return idxtop10 # if label is in top 10 hit, return the position in top10
    return False

In [85]:
# make test data loader
split_name = 'test'
filter_path = './dataset_filtered/{}_idx_filtered.txt'.format(split_name)
file_path = './dataset_filtered/{}.json'.format(split_name)
pos_dir = './dataset_pos'
embedding_dim = 300
batch_size = 1
num_workers = 2
max_len = int(mean_total + std_total)
testloader = MakeDataloader(filter_path, file_path, pos_dir, split_name, glove_model, embedding_dim, max_len, batch_size, num_workers)
num_testdata = len(testloader)
print(num_testdata)

making data loader for test split
	loading POS info...
	approx. number of sentences after noun transformation in test set: 24412
	loading sentences from file...
	converting sentences to embeddings...
	finalizing making data loader
23197


In [95]:
model_path = './wiki_model.best.pt'
model = LSTM(embedding_dim=embedding_dim, hidden_dim=hidden_dim)
model.load_state_dict(torch.load(model_path))
model = model.eval() # set model to eval mode

avg_similarity = 0 # compute the avg similarity between each label and predictions
num_top10hit = 0 # compute number of top10hit predictions
avg_top10hit_position = 0 # compute the avg positions in top10 hit, if hit
num_top1hit = 0 # compute number of top1hit predictions

In [None]:
######## change the target in testing loader to string target 
with torch.no_grad():
    for data_idx, (data, target, target_idx) in enumerate(testloader):
        prediction = model(data, target_idx)
        
        # compute avg similarity between prediction and target (the closer to 1 the better)
        similarity = glove_model.similarity(prediction, taget)
        avg_similarity += similarity
        
        # compute top 10 hit of the prediction 
        top10idx = Top10Hit(prediction, target, glove_model)
        if top10idx:
            num_top10hit += 1
            avg_top10hit_position += top10idx
        
            # compute top 1 hit of the prediction 
            if top10idx == 1:
                num_top1hit += 1

avg_similarity = avg_similarity / num_testdata
avg_top10hit_position = avg_top10hit_position / num_top10hit

In [None]:
print('average similarity of test set: {}'.format(avg_similarity))
print('accuracy of top 1 hit: {}/{}, {}%'.format(num_top1hit, num_testdata, round(num_top1hit/num_testdata*100, 2)))
print('accuracy of top 10 hit: {}/{}, {}%'.format(num_top10hit, num_testdata, round(num_top10hit/num_testdata*100, 2)))
print('average position of top 10 hit on a scale from 1 to 10: {}'.format(avg_top10hit_position))

In [None]:
# ================= random playground ===============

In [74]:
#### https://discuss.pytorch.org/t/example-of-many-to-one-lstm/1728
#### example LSTM input format (timestamp/sent_len, batch_size, embedding_size)
#### many to one example
#### lookup usage for batch_first parameter to have (batch_size, timestamp, embedding_size)
import torch
import torch.nn as nn
from torch.autograd import Variable

time_steps = 10
batch_size = 3
in_size = 5
hidden_size = 7

model = nn.LSTM(in_size, hidden_size, num_layers=1, batch_first=True, bidirectional=True)
linear1 = nn.Linear(hidden_size, in_size)
input_seq = Variable(torch.randn(batch_size, time_steps, in_size))
output_seq, hidden = model(input_seq)
output = output_seq[[0, 1, 2], [0, 0, 0]] # change index here
output = torch.mean(output.view(3, 2, 7), dim=1)
output = linear1(output)

loss = nn.CosineEmbeddingLoss()
target = Variable(torch.randn(batch_size, in_size))
err = loss(output, target, torch.ones(batch_size))
#err.backward()

In [75]:
print(input_seq.shape)
#print(input_seq)
print(output.shape)
print(output)
print(target.shape)
#print(target)
print(err)

torch.Size([3, 10, 5])
torch.Size([3, 5])
tensor([[ 0.2934, -0.1371, -0.1866,  0.2318,  0.3225],
        [ 0.1928, -0.1855, -0.1669,  0.2161,  0.3401],
        [ 0.2074, -0.1780, -0.1629,  0.2111,  0.3307]],
       grad_fn=<AddmmBackward>)
torch.Size([3, 5])
tensor(0.8353, grad_fn=<MeanBackward0>)


In [76]:
noun_idx = torch.randint(0, 10, (3,)) # position of the target word
batch_idx = torch.arange(output_seq.shape[0]) # sent idx in batch
print(noun_idx)
print(batch_idx)
print(output_seq.shape)
test = output_seq[batch_idx, noun_idx] # to get a particular position from time stamp out from the batche first method
print(test.shape)
print(test)
print(test.view(3, 2, 7))
print(torch.mean(test.view(3, 2, 7), dim=1))

tensor([3, 8, 5])
tensor([0, 1, 2])
torch.Size([3, 10, 14])
torch.Size([3, 14])
tensor([[ 0.0143,  0.0745,  0.0758,  0.0042,  0.0181, -0.0925, -0.1477,  0.1347,
         -0.1398, -0.2290, -0.1909,  0.2565, -0.0859,  0.0152],
        [-0.1170, -0.0436,  0.1290, -0.0768, -0.1193,  0.1028,  0.0426, -0.0431,
          0.0380, -0.0869, -0.2066,  0.2715, -0.2567,  0.0138],
        [-0.2574,  0.0803,  0.0571, -0.1203, -0.1661,  0.0448,  0.0889, -0.0884,
         -0.0417, -0.2822, -0.1843,  0.3771, -0.1820,  0.2089]],
       grad_fn=<IndexBackward>)
tensor([[[ 0.0143,  0.0745,  0.0758,  0.0042,  0.0181, -0.0925, -0.1477],
         [ 0.1347, -0.1398, -0.2290, -0.1909,  0.2565, -0.0859,  0.0152]],

        [[-0.1170, -0.0436,  0.1290, -0.0768, -0.1193,  0.1028,  0.0426],
         [-0.0431,  0.0380, -0.0869, -0.2066,  0.2715, -0.2567,  0.0138]],

        [[-0.2574,  0.0803,  0.0571, -0.1203, -0.1661,  0.0448,  0.0889],
         [-0.0884, -0.0417, -0.2822, -0.1843,  0.3771, -0.1820,  0.2089]]],
  

In [None]:
#### https://discuss.pytorch.org/t/how-to-retrieve-the-sample-indices-of-a-mini-batch/7948/12
#### mini batch example 
for batch_idx, (data, target, idx) in enumerate(loader):
    print('Batch idx {}, dataset index {}'.format(
        batch_idx, idx))

In [None]:
#### https://discuss.pytorch.org/t/correctly-feeding-lstm-with-minibatch-time-sequence-data/52101/2
#### batch_first example

In [None]:
#### always look at torch documentation https://pytorch.org/docs/stable/nn.html

In [None]:
# NOTE: bidirectional lstm will output 2x the hidden size
# reshape, get prediction from both forward and backward process, avg out the prediction???

In [83]:
import torch.nn.functional as F

test1 = Variable(torch.randn(batch_size, in_size))
print(test1)
test2 = F.normalize(test1, dim=1)
print(test2)
for t in test2:
    print(np.linalg.norm(t))

tensor([[ 0.3917,  0.7974, -0.2882,  1.3368, -0.5921],
        [ 1.1710,  0.9337, -1.0539, -0.4907,  0.7063],
        [ 0.9793,  0.0461, -1.1660,  2.3212, -1.0903]])
tensor([[ 0.2258,  0.4596, -0.1661,  0.7705, -0.3413],
        [ 0.5788,  0.4615, -0.5209, -0.2425,  0.3491],
        [ 0.3283,  0.0155, -0.3909,  0.7782, -0.3655]])
1.0
0.9999999
1.0
