In [1]:
import gensim
import json
import tqdm
import operator

### load queries

In [2]:
# read the queries
queries = {}
with open('../data/robust/stopped_queries_lower.txt') as f:
    for line in tqdm.tqdm(f):
        q_id, q_text = line.strip().split('\t')
        q_id = int(q_id)
        queries[q_id] = q_text

250it [00:00, 26100.21it/s]


## Load Glove 

In [3]:
from gensim.scripts.glove2word2vec import glove2word2vec
glove_dir = '/mnt/scratch2/shnaseri/dataset/glove/glove.6B/'
glove2word2vec(glove_input_file=glove_dir+"glove.6B.300d.txt", word2vec_output_file=glove_dir + "gensim_glove_vectors_300d.txt")

(400000, 300)

In [4]:
glove_model = gensim.models.KeyedVectors.load_word2vec_format(glove_dir+"gensim_glove_vectors_300d.txt",binary=False)

### Similarity between the query q with the whole collection words

In [5]:
most_sim_terms_per_query = []
for q in tqdm.tqdm(queries):
    temp_dict = {}
    temp_dict['topicNumber'] = str(q)
    try:
        sim_terms = glove_model.most_similar(positive=queries[q].lower().split(' '), topn=1000)
        arr = []
        for t in sim_terms:
            arr.append({'word':t[0], 'weight':t[1]})
        temp_dict['terms'] = arr
        most_sim_terms_per_query.append(temp_dict)
    except:
        print(q, queries[q])

100%|██████████| 250/250 [00:13<00:00, 18.93it/s]


### Similarity between the query q and the terms in the topk PRF documents

In [6]:
def load_run(path):
    result = {}
    with open(path) as f:
        for line in tqdm.tqdm(f):
            q, _, d, rank, score, method = line.strip().split(' ')
            q = int(q)
            rank = int(rank)
            score = float(score)
            result.setdefault(q, {})
            result[q][d] = score
    return result

In [7]:
def get_topk_ret_doc_p_query(ret_result, topk):
    sorted_run = {}
    for q in ret_result:
        sorted_docs = sorted(ret_result[q].items(), key=operator.itemgetter(1), reverse=True)
        sorted_run[q] = sorted_docs[:topk]
    return sorted_run

In [8]:
def get_sim_terms_json_format(terms):
    terms_json_format = []
    for t in terms:
        terms_json_format.append({'word':t[0], 'weight':str(t[1])})
    return terms_json_format

In [9]:
# load the text of bm25 retrieved documents from a json file in the format {doc_id:doc_text}
bm25_ret_docs_file = '/path/to/bm25_document_text.json'
bm25_ret_docs_text = json.load(open(bm25_ret_docs_file))

In [10]:
# load the bm25 run on robust collection in the trec format
bm25_run_file = '/path/to/bm25_run.run'
bm25_run = load_run()

235963it [00:00, 428755.11it/s]


In [11]:
# Get the topk documents (PRF documents)
topk_bm25_run = get_topk_ret_doc_p_query(bm25_run, 10)

In [12]:
# extract the terms of the topk prf documents

topk_res_terms = {}
# iterates over all queries
for q in bm25_run:
    # iterates over the topk retrieved docs
    terms = set()
    for doc in topk_bm25_run[q]:
        doc_id = doc[0]
        # get the terms of retrieved doc
        doc_terms = set(bm25_ret_docs_text[doc_id].strip().split(' '))
        terms = terms.union(doc_terms)
        topk_res_terms[q] = terms

In [13]:
# find the similarity between query q and the topk terms of the PRF document
most_sim_terms_per_query = []
for q in queries:
    print(q, queries[q])
    temp_dict = {}
    temp_dict['topicNumber'] = str(q)
    try:
        arr = []
        c = 0 
        for t in topk_res_terms[q]:
            # Some terms in the PRF documents may not be in the glove vocabulary
            if t not in glove_model.vocab:
                print(t, ' not in glove vocab')
                continue
            c += 1
            sim_score = glove_model.n_similarity(queries[q].strip().lower().split(' '), [t])
            arr.append([t, sim_score])
        sorted_arr = sorted(arr, key=lambda x:x[1], reverse=True)
        terms_json_format = get_sim_terms_json_format(sorted_arr)
        temp_dict['terms'] = terms_json_format
        most_sim_terms_per_query.append(temp_dict)
        print('----------------------------------------------------------------')
    except Exception as e:
        print(e)
        print(q, queries[q])

301 international organized crime
bakatin  not in glove vocab
dekrety  not in glove vocab
counternarcotic  not in glove vocab
loeschnak  not in glove vocab
tanomento  not in glove vocab
preinvestigations  not in glove vocab
dm102  not in glove vocab
voel  not in glove vocab
dm250  not in glove vocab
nadein  not in glove vocab
werthebach  not in glove vocab
wolny  not in glove vocab
----------------------------------------------------------------
302 poliomyelitis post polio
anganwadis  not in glove vocab
nathani  not in glove vocab
diarrohoeal  not in glove vocab
unimmunized  not in glove vocab
polymyxin  not in glove vocab
shaktisinh  not in glove vocab
tunzale  not in glove vocab
athit  not in glove vocab
09494  not in glove vocab
kasumova  not in glove vocab
chhea  not in glove vocab
1yes  not in glove vocab
1no  not in glove vocab
debilities  not in glove vocab
bounkhouang  not in glove vocab
phatthaya  not in glove vocab
urairat  not in glove vocab
--------------------------------

s1ws  not in glove vocab
zanville  not in glove vocab
civilizers  not in glove vocab
terrordome  not in glove vocab
----------------------------------------------------------------
310 radio waves brain cancer
quackeries  not in glove vocab
sively  not in glove vocab
hoppszallern  not in glove vocab
hopitals  not in glove vocab
spectroscopists  not in glove vocab
myelography  not in glove vocab
nrpb  not in glove vocab
phosphocreatine  not in glove vocab
diasonics  not in glove vocab
diasonic  not in glove vocab
dimentia  not in glove vocab
megabrain  not in glove vocab
mopt  not in glove vocab
noninvasively  not in glove vocab
chemicalk  not in glove vocab
cartilege  not in glove vocab
stehbens  not in glove vocab
buyline  not in glove vocab
yuppiedom  not in glove vocab
noninva  not in glove vocab
jected  not in glove vocab
innerquest  not in glove vocab
experi  not in glove vocab
haimovich  not in glove vocab
----------------------------------------------------------------
311 indus

lf32  not in glove vocab
ekerdt  not in glove vocab
zapico  not in glove vocab
asturia  not in glove vocab
4bn  not in glove vocab
lossmaking  not in glove vocab
lf35  not in glove vocab
lf53  not in glove vocab
serps  not in glove vocab
macminn  not in glove vocab
br19  not in glove vocab
1nn  not in glove vocab
lf49  not in glove vocab
lf39  not in glove vocab
5bn  not in glove vocab
aleene  not in glove vocab
marmonte  not in glove vocab
dilnot  not in glove vocab
----------------------------------------------------------------
319 new fuel sources
846b  not in glove vocab
08625  not in glove vocab
80mw  not in glove vocab
846c  not in glove vocab
2911  not in glove vocab
nonutility  not in glove vocab
250mw  not in glove vocab
1project  not in glove vocab
naaqs  not in glove vocab
exceedance  not in glove vocab
200kw  not in glove vocab
1agency  not in glove vocab
1final  not in glove vocab
3312  not in glove vocab
1date  not in glove vocab
300mw  not in glove vocab
11mw  not in gl

6bn  not in glove vocab
610m  not in glove vocab
280m  not in glove vocab
273m  not in glove vocab
75bn  not in glove vocab
oustees  not in glove vocab
5bn  not in glove vocab
wagstyl  not in glove vocab
beanos  not in glove vocab
----------------------------------------------------------------
332 income tax evasion
dm4bn  not in glove vocab
790m  not in glove vocab
y5m  not in glove vocab
7bn  not in glove vocab
y7bn  not in glove vocab
76bn  not in glove vocab
y26m  not in glove vocab
dm73bn  not in glove vocab
dm17bn  not in glove vocab
dm7bn  not in glove vocab
dm11bn  not in glove vocab
dm24bn  not in glove vocab
74bn  not in glove vocab
y118m  not in glove vocab
dr300bn  not in glove vocab
dm9  not in glove vocab
dr1m  not in glove vocab
dr400bn  not in glove vocab
dm3bn  not in glove vocab
contral  not in glove vocab
144m  not in glove vocab
dr50bn  not in glove vocab
dm75bn  not in glove vocab
y50m  not in glove vocab
-----------------------------------------------------------

phunsom  not in glove vocab
bevering  not in glove vocab
7122  not in glove vocab
khing  not in glove vocab
flpma  not in glove vocab
drk  not in glove vocab
4191  not in glove vocab
8090  not in glove vocab
14626  not in glove vocab
85027  not in glove vocab
layu  not in glove vocab
----------------------------------------------------------------
341 airport security
babatope  not in glove vocab
zaidman  not in glove vocab
aremu  not in glove vocab
18384  not in glove vocab
bline  not in glove vocab
dhunkaal  not in glove vocab
anabodhe  not in glove vocab
sc183  not in glove vocab
shurm  not in glove vocab
guardforce  not in glove vocab
4910  not in glove vocab
defranchised  not in glove vocab
qalaad  not in glove vocab
----------------------------------------------------------------
342 diplomatic expulsion
ostgaard  not in glove vocab
mashat  not in glove vocab
movahed  not in glove vocab
dagsrevyen  not in glove vocab
pakhtusov  not in glove vocab
unware  not in glove vocab
rapite

musculo  not in glove vocab
ukader  not in glove vocab
biomagnification  not in glove vocab
iccec  not in glove vocab
17bn  not in glove vocab
bromosuc  not in glove vocab
dipyridyl  not in glove vocab
----------------------------------------------------------------
350 health computer terminals
83m  not in glove vocab
computeritis  not in glove vocab
equitec  not in glove vocab
touchlink  not in glove vocab
vzp  not in glove vocab
usdollars  not in glove vocab
thortec  not in glove vocab
0344  not in glove vocab
0793  not in glove vocab
vdt  not in glove vocab
455199  not in glove vocab
80bn  not in glove vocab
57m  not in glove vocab
shoars  not in glove vocab
vdts  not in glove vocab
raytech  not in glove vocab
wedgestone  not in glove vocab
862222  not in glove vocab
----------------------------------------------------------------
351 falkland petroleum exploration
ernment  not in glove vocab
----------------------------------------------------------------
352 british chunnel impac

12bn  not in glove vocab
crimeline  not in glove vocab
669m  not in glove vocab
hirawi  not in glove vocab
333m  not in glove vocab
tohave  not in glove vocab
5bn  not in glove vocab
6bn  not in glove vocab
rengan  not in glove vocab
----------------------------------------------------------------
368 vitro fertilization
citali  not in glove vocab
machelle  not in glove vocab
wisot  not in glove vocab
intrafallopian  not in glove vocab
----------------------------------------------------------------
369 anorexia nervosa bulimia
averidge  not in glove vocab
marrazzi  not in glove vocab
8bn  not in glove vocab
68521  not in glove vocab
153m  not in glove vocab
slimmers  not in glove vocab
89m  not in glove vocab
infelicity  not in glove vocab
y93  not in glove vocab
27bn  not in glove vocab
fursa  not in glove vocab
legitimizers  not in glove vocab
167bn  not in glove vocab
gulpy  not in glove vocab
8203  not in glove vocab
anagrammatically  not in glove vocab
shiningly  not in glove voc

fiaca  not in glove vocab
hampar  not in glove vocab
dorena  not in glove vocab
rearwin  not in glove vocab
newworks  not in glove vocab
talesnik  not in glove vocab
troesch  not in glove vocab
tuescher  not in glove vocab
osterlund  not in glove vocab
triefus  not in glove vocab
vanderhook  not in glove vocab
----------------------------------------------------------------
380 obesity medical treatment
vlcds  not in glove vocab
cholecystokinins  not in glove vocab
adifax  not in glove vocab
21487  not in glove vocab
8bn  not in glove vocab
nannying  not in glove vocab
xerophthalmia  not in glove vocab
21208  not in glove vocab
optifast  not in glove vocab
fastin  not in glove vocab
felliti  not in glove vocab
sibutamine  not in glove vocab
noradrenalin  not in glove vocab
fenfluramines  not in glove vocab
beta3  not in glove vocab
therafast  not in glove vocab
norred  not in glove vocab
neurotransmitting  not in glove vocab
samotin  not in glove vocab
ponderax  not in glove vocab
5bn 

6bn  not in glove vocab
164m  not in glove vocab
mwir  not in glove vocab
3013  not in glove vocab
29860  not in glove vocab
sdwa  not in glove vocab
subsoils  not in glove vocab
imwir  not in glove vocab
biotreated  not in glove vocab
hswa  not in glove vocab
spading  not in glove vocab
treatability  not in glove vocab
saleability  not in glove vocab
rulemakings  not in glove vocab
uncropped  not in glove vocab
95402  not in glove vocab
6926  not in glove vocab
7003  not in glove vocab
topsoils  not in glove vocab
1current  not in glove vocab
d043  not in glove vocab
mtru  not in glove vocab
kourick  not in glove vocab
nesterova  not in glove vocab
nonwastewaters  not in glove vocab
impracticability  not in glove vocab
rototill  not in glove vocab
inexpediency  not in glove vocab
1mixed  not in glove vocab
250kg  not in glove vocab
----------------------------------------------------------------
389 illegal technology transfer
conyugues  not in glove vocab
kentaur  not in glove vocab


supervene  not in glove vocab
sanhani  not in glove vocab
easeful  not in glove vocab
suhayl  not in glove vocab
ayb  not in glove vocab
unheeding  not in glove vocab
boegl  not in glove vocab
----------------------------------------------------------------
394 home schooling
yifulou  not in glove vocab
6511  not in glove vocab
persent  not in glove vocab
0068  not in glove vocab
neimenggu  not in glove vocab
drygas  not in glove vocab
4476  not in glove vocab
transcentury  not in glove vocab
niemenggu  not in glove vocab
xiumei  not in glove vocab
aspossible  not in glove vocab
----------------------------------------------------------------
395 tourism
ustta  not in glove vocab
5695  not in glove vocab
shilsh  not in glove vocab
6985  not in glove vocab
23689  not in glove vocab
ghardaqah  not in glove vocab
19982  not in glove vocab
sation  not in glove vocab
3510  not in glove vocab
cardran  not in glove vocab
schetter  not in glove vocab
20230  not in glove vocab
nontrade  not in 

8025  not in glove vocab
829500  not in glove vocab
yellowtails  not in glove vocab
59008  not in glove vocab
----------------------------------------------------------------
409 legal pan am 103
157m  not in glove vocab
shortcutting  not in glove vocab
dalkamoni  not in glove vocab
telexed  not in glove vocab
----------------------------------------------------------------
410 schengen agreement
outrive  not in glove vocab
ffr1bn  not in glove vocab
echevin  not in glove vocab
haulier  not in glove vocab
legill  not in glove vocab
archirafi  not in glove vocab
delorozoy  not in glove vocab
textline  not in glove vocab
lfr10  not in glove vocab
wogau  not in glove vocab
wijsenbeek  not in glove vocab
----------------------------------------------------------------
411 salvaging shipwreck treasure
herdendorf  not in glove vocab
martinucci  not in glove vocab
sotres  not in glove vocab
portichuelo  not in glove vocab
braer  not in glove vocab
marex  not in glove vocab
pointvicente  not i

kums  not in glove vocab
----------------------------------------------------------------
424 suicides
diyena  not in glove vocab
breth  not in glove vocab
mikelsone  not in glove vocab
everse  not in glove vocab
----------------------------------------------------------------
425 counterfeiting money
parfumes  not in glove vocab
xiongxiong  not in glove vocab
kerlau  not in glove vocab
nizniy  not in glove vocab
nikerkhayev  not in glove vocab
banknot  not in glove vocab
demoninations  not in glove vocab
spravochnik  not in glove vocab
350bn  not in glove vocab
skv  not in glove vocab
r70  not in glove vocab
ciculation  not in glove vocab
deutchmarks  not in glove vocab
cherniy  not in glove vocab
fantiki  not in glove vocab
----------------------------------------------------------------
426 law enforcement dogs
strebeck  not in glove vocab
histen  not in glove vocab
leahan  not in glove vocab
uncanine  not in glove vocab
068  not in glove vocab
batiquitos  not in glove vocab
guyse  

kliridhis  not in glove vocab
andoniou  not in glove vocab
----------------------------------------------------------------
434 estonia economy
skr500m  not in glove vocab
kadanik  not in glove vocab
tolstikov  not in glove vocab
agentuur  not in glove vocab
pollisinski  not in glove vocab
skr92m  not in glove vocab
rasman  not in glove vocab
alisinvesteeringute  not in glove vocab
kaupo  not in glove vocab
skr60m  not in glove vocab
fm3bn  not in glove vocab
375m  not in glove vocab
fm400m  not in glove vocab
aevaleht  not in glove vocab
skr850m  not in glove vocab
----------------------------------------------------------------
435 curbing population growth
11bn  not in glove vocab
5030  not in glove vocab
310m  not in glove vocab
macrocontrol  not in glove vocab
crunelle  not in glove vocab
macroregulatory  not in glove vocab
shaozhou  not in glove vocab
shirao  not in glove vocab
haikuan  not in glove vocab
doomsters  not in glove vocab
ningsun  not in glove vocab
20bn  not in glov

britischer  not in glove vocab
architekt  not in glove vocab
69m  not in glove vocab
visionware  not in glove vocab
chemring  not in glove vocab
compco  not in glove vocab
staatsgallerie  not in glove vocab
baggeridge  not in glove vocab
84m  not in glove vocab
mangt  not in glove vocab
lossmaking  not in glove vocab
hibit  not in glove vocab
sunsail  not in glove vocab
operat  not in glove vocab
hungarhotels  not in glove vocab
85p  not in glove vocab
randomat  not in glove vocab
unpompous  not in glove vocab
sekers  not in glove vocab
39p  not in glove vocab
bogod  not in glove vocab
----------------------------------------------------------------
448 ship losses
065  not in glove vocab
124m  not in glove vocab
04m  not in glove vocab
54m  not in glove vocab
seleyang  not in glove vocab
07m  not in glove vocab
porcess  not in glove vocab
34p  not in glove vocab
inpections  not in glove vocab
09m  not in glove vocab
29p  not in glove vocab
44p  not in glove vocab
muhaysin  not in glov

20505  not in glove vocab
thopha  not in glove vocab
----------------------------------------------------------------
611 kurds germany violence
prereleased  not in glove vocab
dauke  not in glove vocab
diyabakir  not in glove vocab
birzele  not in glove vocab
bernrath  not in glove vocab
komkar  not in glove vocab
antifa  not in glove vocab
nonapproved  not in glove vocab
----------------------------------------------------------------
612 tibet protesters
berphiaume  not in glove vocab
jiangzi  not in glove vocab
xikaze  not in glove vocab
gyalcan  not in glove vocab
syas  not in glove vocab
gyamtso  not in glove vocab
toinzhu  not in glove vocab
----------------------------------------------------------------
613 berlin wall disposal
feske  not in glove vocab
jelloian  not in glove vocab
promenaded  not in glove vocab
invalidenstrasse  not in glove vocab
rettenwender  not in glove vocab
streletz  not in glove vocab
vopo  not in glove vocab
bulkiest  not in glove vocab
vopos  not in 

bushwackin  not in glove vocab
kripashankar  not in glove vocab
6002  not in glove vocab
bensmiller  not in glove vocab
----------------------------------------------------------------
627 russian food crisis
starodubstev  not in glove vocab
novokuznets  not in glove vocab
igityan  not in glove vocab
burbulis  not in glove vocab
krilov  not in glove vocab
shumeyko  not in glove vocab
genady  not in glove vocab
saebyol  not in glove vocab
pravitelstvennyi  not in glove vocab
kuzbas  not in glove vocab
constitutent  not in glove vocab
aslanidi  not in glove vocab
rbs4  not in glove vocab
----------------------------------------------------------------
628 us invasion panama
typaldos  not in glove vocab
impulsions  not in glove vocab
ozores  not in glove vocab
----------------------------------------------------------------
629 abortion clinic attack
vagrancies  not in glove vocab
repro  not in glove vocab
orangethorpe  not in glove vocab
--------------------------------------------------

769851  not in glove vocab
longimembris  not in glove vocab
macularius  not in glove vocab
bl4  not in glove vocab
4310  not in glove vocab
4401  not in glove vocab
uncuddly  not in glove vocab
unknotting  not in glove vocab
744707  not in glove vocab
pohakuloa  not in glove vocab
786728  not in glove vocab
macrodactylum  not in glove vocab
20737  not in glove vocab
bl2  not in glove vocab
coolpassion  not in glove vocab
bontebok  not in glove vocab
20782  not in glove vocab
4916  not in glove vocab
787484  not in glove vocab
reimports  not in glove vocab
788912  not in glove vocab
134a  not in glove vocab
134b  not in glove vocab
lepidochelys  not in glove vocab
cagier  not in glove vocab
89108  not in glove vocab
croceum  not in glove vocab
704301  not in glove vocab
675990  not in glove vocab
4901  not in glove vocab
766567  not in glove vocab
biocontrols  not in glove vocab
13556  not in glove vocab
744554  not in glove vocab
smalstig  not in glove vocab
788471  not in glove vocab


247b  not in glove vocab
reigart  not in glove vocab
apiol  not in glove vocab
butylazo  not in glove vocab
aldicarb  not in glove vocab
ab23  not in glove vocab
lucel  not in glove vocab
diethyltin  not in glove vocab
1location  not in glove vocab
1year  not in glove vocab
8339  not in glove vocab
6560  not in glove vocab
hexachlorophene  not in glove vocab
nofa  not in glove vocab
cresylphosphate  not in glove vocab
za67  not in glove vocab
----------------------------------------------------------------
657 school prayer banned
urumiyeh  not in glove vocab
noncurriculum  not in glove vocab
monafeqin  not in glove vocab
patsey  not in glove vocab
khamene  not in glove vocab
farrakan  not in glove vocab
mergens  not in glove vocab
----------------------------------------------------------------
658 teenage pregnancy
7bn  not in glove vocab
8890  not in glove vocab
642m  not in glove vocab
8bn  not in glove vocab
51c  not in glove vocab
6bn  not in glove vocab
aetcs  not in glove vocab

zvyagilskiy  not in glove vocab
sorzano  not in glove vocab
gerlein  not in glove vocab
volksunion  not in glove vocab
precandidates  not in glove vocab
yemetz  not in glove vocab
ukrinform  not in glove vocab
pliushch  not in glove vocab
----------------------------------------------------------------
671 salvation army benefits
cger  not in glove vocab
sagic  not in glove vocab
continentale  not in glove vocab
6875  not in glove vocab
aslk  not in glove vocab
restof  not in glove vocab
reffin  not in glove vocab
mckorn  not in glove vocab
tilen  not in glove vocab
----------------------------------------------------------------
672 nra membership profile
gunshine  not in glove vocab
owningest  not in glove vocab
wessons  not in glove vocab
graceanne  not in glove vocab
gaudot  not in glove vocab
avtomat  not in glove vocab
supenski  not in glove vocab
morphonios  not in glove vocab
intratec  not in glove vocab
mandall  not in glove vocab
rugers  not in glove vocab
hiers  not in glove

1383c  not in glove vocab
1382c  not in glove vocab
ec3v  not in glove vocab
tecs  not in glove vocab
coloroll  not in glove vocab
1381a  not in glove vocab
1382j  not in glove vocab
1382b  not in glove vocab
3nl  not in glove vocab
----------------------------------------------------------------
685 oscar winner selection
angenieux  not in glove vocab
smotheringly  not in glove vocab
064  not in glove vocab
netalya  not in glove vocab
veraz  not in glove vocab
negoda  not in glove vocab
bennent  not in glove vocab
macminn  not in glove vocab
flanneled  not in glove vocab
chumo  not in glove vocab
aleene  not in glove vocab
----------------------------------------------------------------
686 argentina pegging dollar
niedvaras  not in glove vocab
marketmakers  not in glove vocab
16bn  not in glove vocab
bimonetary  not in glove vocab
31bn  not in glove vocab
flexibilising  not in glove vocab
42bn  not in glove vocab
8bn  not in glove vocab
61bn  not in glove vocab
6bn  not in glove voca

3174  not in glove vocab
0313  not in glove vocab
8306  not in glove vocab
5765  not in glove vocab
15744  not in glove vocab
2964  not in glove vocab
8071  not in glove vocab
26051  not in glove vocab
r4bn  not in glove vocab
3775  not in glove vocab
r3bn  not in glove vocab
r907  not in glove vocab
oromigna  not in glove vocab
6676  not in glove vocab
11852  not in glove vocab
3281  not in glove vocab
13922  not in glove vocab
velopment  not in glove vocab
3185  not in glove vocab
28595  not in glove vocab
5867  not in glove vocab
8258  not in glove vocab
13072  not in glove vocab
evelopment  not in glove vocab
7850  not in glove vocab
10251  not in glove vocab
7395  not in glove vocab
0187  not in glove vocab
3347  not in glove vocab
7702  not in glove vocab
8180  not in glove vocab
31872  not in glove vocab
12141  not in glove vocab
6345  not in glove vocab
3465  not in glove vocab
3773  not in glove vocab
r345  not in glove vocab
elopment  not in glove vocab
6525  not in glove voc

#### Save the result in Json format

In [14]:
SAVE_DIR = '/mnt/scratch2/shnaseri/Experiments/context_query_exp/Robust/exp-terms/'
with open(SAVE_DIR + 'glove_exp_terms_of_topk_docs1.json','w') as output:
    output.write('[')
    for i in range(len(most_sim_terms_per_query) - 1):
        output.write(json.dumps(most_sim_terms_per_query[i]) + ",\n")
    output.write(json.dumps(most_sim_terms_per_query[len(most_sim_terms_per_query) - 1]) + "\n")
    output.write(']')