# Examples for query expansion

In [1]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

## K-grams matching

In [7]:
import nltk
from collections import defaultdict
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
words = ['play', 'pley', 'plai', 'game']
I = defaultdict(lambda: defaultdict(lambda: 0))
for w in words:
    for a, b, c in nltk.ngrams("#s" + w + "#e", n=3):
        I[w]["{}{}{}".format(a, b, c)] += 1
T = pd.DataFrame(I).T
T.fillna(0, inplace=True)

In [6]:
T

Unnamed: 0,#sp,spl,pla,lay,ay#,y#e,ple,ley,ey#,lai,ai#,i#e,#sg,sga,gam,ame,me#,e#e
play,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
pley,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
plai,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
game,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0


In [10]:
sigma = cosine_similarity(T, T)
S = round(pd.DataFrame(sigma, index=T.index, columns=T.index), 2)

In [11]:
S

Unnamed: 0,play,pley,plai,game
play,1.0,0.5,0.5,0.0
pley,0.5,1.0,0.33,0.0
plai,0.5,0.33,1.0,0.0
game,0.0,0.0,0.0,1.0


## WordNet

In [12]:
from nltk.corpus import wordnet as wn

In [20]:
q = 'President Lincoln'

In [21]:
n_syns = wn.synsets('President', pos=wn.NOUN)

In [22]:
synsets = wn.synsets('Lincoln', pos=wn.NOUN)

In [23]:
for s in synsets:
    print(s.name(), s.definition())

lincoln.n.01 16th President of the United States; saved the Union during the American Civil War and emancipated the slaves; was assassinated by Booth (1809-1865)
lincoln.n.02 capital of the state of Nebraska; located in southeastern Nebraska; site of the University of Nebraska
lincoln.n.03 long-wooled mutton sheep originally from Lincolnshire


In [24]:
for n in n_syns:
    print(n.name(), n.definition())

president.n.01 an executive officer of a firm or corporation
president_of_the_united_states.n.01 the person who holds the office of head of state of the United States government
president.n.03 the chief executive of a republic
president.n.04 the officer who presides at the meetings of an organization
president.n.05 the head administrative officer of a college or university
president_of_the_united_states.n.02 the office of the United States head of state
