In [4]:
import numpy as np
import operator
from utils.methods import length, unit, cosine, find_vecs, sort_disct, find_sim, find_insts
from utils.tree import Tree
import gensim

In [None]:
# Load Google's pre-trained Word2Vec model.

model = gensim.models.KeyedVectors.load_word2vec_format('data/word2vec/GoogleNews-vectors-negative300.bin.gz', binary=True) 
print('... model loaded ...')

In [5]:
########################################################################
# 3.1.2 Categorization
# Example 1.

support_vector_words_months = ['November','December','May','September'] 

h_months = Tree(model, support_vector_words_months, None)
h_months.find_insts()

support_vector = ['hand', 'foot'] 
h_adult = Tree(model, support_vector, None)
h_adult.find_insts()

support_vector = ['man', 'queen'] 
h_adult = Tree(model, support_vector, None)
h_adult.find_insts()
 
support_vector = ['car','speed','driver','wheel'] 
h_adult = Tree(model, support_vector, None)
h_adult.find_insts()


support vectors:
November
December
May
September

instances of the source:
('October', 0.9762672386560065)
('September', 0.96938921721851)
('February', 0.9637033596820024)
('November', 0.9633622069561678)
('December', 0.9584515023448367)
('August', 0.9554799903357435)
('January', 0.9509219051885732)
('March', 0.9468773748143877)
('June', 0.9400733172495658)
('May', 0.9341389748230425)
('April', 0.9319802111775765)
('July', 0.9183570070605671)

support vectors:
hand
foot

instances of the source:
('hand', 0.7833214506427162)
('foot', 0.7833213661261891)
('finger', 0.4948128818839388)
('wrist', 0.48082118778060884)

support vectors:
man
queen

instances of the source:
('queen', 0.7637349490360181)
('man', 0.7637349028094987)
('woman', 0.7087423887679983)
('girl', 0.6168551117777956)

support vectors:
car
speed
driver
wheel

instances of the source:
('driver', 0.793975434377619)
('wheel', 0.7766899449466298)
('car', 0.7647040329710155)
('drivers', 0.6399795637636363)
('vehicle', 0.6388562

In [6]:
########################################################################
# 3.1.3 Meaning of semantic tree branches
# Example 2.

support_vector_adult = ['Spain', 'France', 'Russia', 'Germany', 'USA']
print(support_vector_adult)
h_adult = Tree(model, support_vector_adult, None)
h_adult.find_insts()
h_adult.branch_instances(0)
h_adult.branch_instances(1)
h_adult.branch_instances(2)
h_adult.branch_instances(3)
h_adult.branch_instances(4)


['Spain', 'France', 'Russia', 'Germany', 'USA']
support vectors:
Spain
France
Russia
Germany
USA

instances of the source:
('Germany', 0.8207244343704916)
('France', 0.7906501469634922)
('Spain', 0.7694617964344214)
('Italy', 0.7069019526103362)
('Belgium', 0.6898427925536972)
('Russia', 0.6897856545660481)
('Europe', 0.6845493595446867)
('Austria', 0.6771322686068174)
('Switzerland', 0.6593649249589146)
('Czech_Republic', 0.6567072064651759)
('Netherlands', 0.6544353348100079)
('Romania', 0.6523835433455439)
('Hungary', 0.650733015275954)
('Poland', 0.6496222789304603)
('Argentina', 0.6487135047045006)
('Croatia', 0.6249655467424288)
('Portugal', 0.6197267377046075)
('Britain', 0.6190645436412245)
('USA', 0.6178472944775794)
('Brazil', 0.6176541162405118)
('Sweden', 0.6143366832749093)
('Ukraine', 0.6017091777667769)
('Finland', 0.6008335044182567)
('European', 0.5919446803345122)
('Denmark', 0.5882160960853273)
('Japan', 0.5854512179479348)
('Bulgaria', 0.5833336924872465)
('Turkey',

In [7]:
########################################################################
# 3.1.4 Grammatical categories
# Example 3
   
support_vector = ['walk', 'walked']
h_months = Tree(model, support_vector, None)
h_months.find_insts()
h_months.branch_instances(0)
h_months.branch_instances(1)

support_vector = ['well', 'better', 'best']
h_months = Tree(model, support_vector, None)
h_months.find_insts()
h_months.branch_instances(0)
h_months.branch_instances(1)
h_months.branch_instances(2)

support_vector = ['dog', 'dogs']
h_months = Tree(model, support_vector, None)
h_months.find_insts()
h_months.branch_instances(0)
h_months.branch_instances(1)


support vectors:
walk
walked

instances of the source:
('walked', 0.9173832544772685)
('walk', 0.917383173678289)

instances of the branch: walk
walk

instances of the branch: walked
ran
walked
threw
drove
sat
fled
grabbed
crashed
flew
blew
Announces
slid
exploded
erupted
wounding
shook
©
marched
collided
shouted
pronounced_dead
stormed
article_contains
tore
waved

support vectors:
well
better
best

instances of the source:
('better', 0.820290809011908)
('best', 0.7942518850953415)
('well', 0.7493072171584936)
('good', 0.6923575467776534)
('poorly', 0.5248129240309759)
('differently', 0.5224479725968255)
('quicker', 0.4980097905891259)
('toughest', 0.4200598031254735)
('report_abuse_button', 0.15180945501359164)

instances of the branch: well
well

instances of the branch: better
better
worse
easier
stronger
faster
harder
tougher
safer
quicker
healthier

instances of the branch: best
best
worst
greatest
strongest
toughest
finest
hottest

support vectors:
dog
dogs

instances of the sour

In [8]:
########################################################################
# A.2.1 Antonomy 
# Example 5

# unsymetric antonomy
print('man woman')
support_vector_adult = ['man', 'woman'] 
h_adult = Tree(model, support_vector_adult, None)
h_adult.find_insts()
h_adult.branch_instances(0)

print('man',length(model['man']))
print('woman',length(model['woman']))

# symmetric antonomy
print('mod branch woman')
unit_mail = unit(h_adult.branches[0])
opposite_mail = np.dot(-unit_mail, h_adult.branches[1])*-unit_mail
find_insts(model, opposite_mail)

print('upward branch woman')
find_insts(model,  h_adult.branches[1]-opposite_mail)

# regular branch
print('regular branch woman')
h_adult.branch_instances(1)




man woman
support vectors:
man
woman

instances of the source:
('woman', 0.9397875662903468)
('man', 0.9397875350771672)

instances of the branch: man
man
guys
guy
Man
hero
Sports_Writer
winger
genius

mod branch woman
instances of the source:
('spokeswoman', 1.3230135600306563)
('pregnancy', 1.2458015066122183)
('herself', 1.1384455826818505)
('actress', 1.1368947430234801)
('Women', 1.095780629464288)
('Ms.', 1.0927896261158152)
('pregnant', 1.0876568870214482)
('Ms', 1.0431265764979043)
('mothers', 1.0251475764965092)
('she', 0.9910941598718414)
('breast_cancer', 0.988529243000423)
('gender', 0.9700311563896022)
('babies', 0.9097078050478751)
('woman', 0.9075840693912199)

upward branch woman
instances of the source:
('woman', 2.495722339544731)
('man', 2.1714892435286086)
('girl', 2.0729242629009677)
('Woman', 1.9518481599159394)
('boy', 1.9088902769996612)
('gunman', 1.9045658495286248)
('teenager', 1.8680841919996838)
('stabbed', 1.7433729040253876)
('stabbing', 1.690368145454633

actress
abortion
Ms
pregnant
Miss
daughters
Mrs
lady
babies
breast_cancer
mothers
nurse
grandmother
couples
gender
pregnancy
Woman
report_abuse_button
infant
raped
breast
widow

man 2.3106172
woman 2.655624


In [9]:
########################################################################
# A.2.2 Polisemy
# Example 6


print('words similar to chair')
find_sim(model, model['chair'], 10)

support_vector_manager = ['director', 'chairman','head','executive','president', 'speaker']
cat_manager = Tree(model, support_vector_manager, None)
cat_manager.find_insts()

print('words similar to chair - manager') 
find_sim(model, model['chair']-cat_manager.root, 10)




words similar to chair
('chair', 3.0518358)
('chairs', 2.4021974)
('Chair', 2.2853484)
('chairperson', 1.9232249)
('chairwoman', 1.9139162)
('chairman', 1.8803706)
('Vice_Chair', 1.7232699)
('Co_Chair', 1.6692963)
('chairing', 1.6576846)
('Chairs', 1.6466459)

support vectors:
director
chairman
head
executive
president
speaker

instances of the source:
('chairman', 0.824242959710922)
('executive', 0.811630502155094)
('president', 0.8114869095626118)
('director', 0.8066601614459316)
('vice_president', 0.7496077721147711)
('chief_executive', 0.6886873734192565)
('Executive_Director', 0.6797398696994327)
('managing_director', 0.6657381076417622)
('chief', 0.6600173476653418)
('head', 0.6511749202501224)
('chair', 0.6466832897856418)
('Chairman', 0.6463830475232717)
('CEO', 0.6447943926014384)
('Vice_President', 0.598830861716218)
('Director', 0.5968860293473758)
('VP', 0.5906316697042124)
('Chair', 0.5903537539692406)
('secretary', 0.5835946329778882)
('founder', 0.5835017314809737)
('coo

<list_reverseiterator at 0x7f749d282f28>

In [10]:
########################################################################
# A.2.3 Hypernomy
# Example 7

# hypernym lack between months and months
support_vector_months = ['February', 'October', 'December', 'June', 'August', 'July', 'November', 
                       'September', 'April', 'March', 'January', 'May']
h_months = Tree(model, support_vector_months, None)
h_months.find_insts()

print('vectors similart to month')
find_sim(model, model['months'], 10)

print('vectors similart to months')
find_sim(model, model['month'], 10)


len_months_abstr = length(h_months.root)
print('len_months root: '+ str(len_months_abstr))
len_months = length(model['months'])
print('len_months: '+ str(len_months))
print('')

print('len_months sim vectors:')
find_sim(model, h_months.root, 10)

lack = model['months'] - h_months.root
h_months_lack = Tree(model, ['month_lack'], [lack])
h_months_lack.find_insts()

print('lack months sim vectors:')
find_sim(model, lack, 10)

support vectors:
February
October
December
June
August
July
November
September
April
March
January
May

instances of the source:
('October', 0.9783126195039022)
('February', 0.971259553630031)
('August', 0.9686985545183694)
('March', 0.9638293096998848)
('September', 0.9603501127078066)
('June', 0.9600558294060397)
('April', 0.9585525296205124)
('January', 0.9551238379726366)
('July', 0.9475423806442835)
('May', 0.9471191411241837)
('November', 0.9457843919721464)
('December', 0.942204245169785)

vectors similart to month
('months', 2.401113)
('weeks', 2.0805054)
('years', 1.8744059)
('days', 1.8518896)
('month', 1.6351727)
('decades', 1.4139879)
('year', 1.3904314)
('hours', 1.3581333)
('week', 1.3386409)
('Months', 1.3271725)

vectors similart to months
('month', 2.153279)
('week', 1.8012607)
('year', 1.6479722)
('months', 1.4663965)
('weeks', 1.4055755)
('August', 1.3277794)
('February', 1.321259)
('October', 1.3167071)
('January', 1.3116376)
('day', 1.2929561)

len_months root: 2.0

<list_reverseiterator at 0x7f75bc299588>