In [1]:
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from glob import glob


In [2]:
def getModel(modelFile):
    model = KeyedVectors.load_word2vec_format(modelFile)
    return model

# https://datascience-enthusiast.com/DL/Operations_on_word_vectors.html
def similar_cos(u, v):
    distance = 0.0

    # Compute the dot product between u and v (≈1 line)
    dot = np.dot(u, v)
    # Compute the L2 norm of u (≈1 line)
#     norm_u = np.sqrt(np.sum(u**2))
    norm_u = np.linalg.norm(u)
    
    # Compute the L2 norm of v (≈1 line)
#     norm_v = np.sqrt(np.sum(v**2))
    norm_v = np.linalg.norm(v)
    
    # Compute the cosine similarity defined by formula (1) (≈1 line)
    cosine_similarity = dot / (norm_u * norm_v)

    return cosine_similarity

def extract_relation(target, related):
    return target - related

def find_best_related_word(analogy_vector, model):
#     related = target + relation
    max_sim = 0.0
    best_word = None
    for word in model.vocab:
#         relation1 = relation.reshape(1,-1)
#         relation2 = extract_relation(model.word_vec(target), model.word_vec(word)).reshape(1,-1)
        sim = similar_cos(analogy_vector, model[word])
        if(sim > max_sim):
            best_word = word
    
    return best_word;
    

In [3]:
def processTriadAnalogy(word1,word2,word3, modelsList):
    result = []
    for modelFileRef in modelsList:
        model = getModel(modelFileRef)
        if(word1 in model and word2 in model and word3 in model):
            
            v_word1 = model.word_vec(word1)
            v_word2 = model.word_vec(word2)
            v_word3 = model.word_vec(word3)
    #         basisRelation = extract_relation(v_word1, v_word2)
            basisRelation = v_word1 - v_word2 + v_word3
            bestWordOnModel = find_best_related_word(basisRelation, model)
            result.append('Model: ' + modelFileRef + '/n Best Word : ' + bestWordOnModel)
        else:
            print('Fail:Word not found in model')
    return result
            

In [4]:
# result = processTriadAnalogy(word1,word2,word3,modelsList)

- Teste 1 - Biblioteca KeyedVector e seus metodos de similaridade

In [5]:
modelsList = glob('../models/cbow_*.txt')
model = [getModel(x) for x in modelsList]

print(modelsList)


['../models/cbow_s300.txt', '../models/cbow_s100.txt', '../models/cbow_s50.txt']


In [21]:
model[0].most_similar(positive=['barack', 'obama'])

[('eisenhower', 0.47269853949546814),
 ('lula', 0.460972398519516),
 ('bush', 0.4601280987262726),
 ('ratsiraka', 0.43053072690963745),
 ('clinton', 0.4282917082309723),
 ('saakashvili', 0.4188674986362457),
 ('uribe', 0.4161287844181061),
 ('draghi', 0.40998852252960205),
 ('netanyahu', 0.403103768825531),
 ('yushchenko', 0.3994860053062439)]

In [8]:
word1 = 'rei'
word2 = 'homem'
word3 = 'rainha'

for index, x in enumerate(model):
    nMostSimilar = x.most_similar(positive=[word1,word3], negative=[word2])

    print(modelsList[index], nMostSimilar)
    print('Positive:  ', word1,word3, '\nNegative:',word2)
    print()

../models/cbow_s300.txt [('princesa', 0.5880060791969299), ('infanta', 0.5544092655181885), ('rainha-mãe', 0.5047087669372559), ('ex-rainha', 0.5002995729446411), ('imperatriz', 0.4982605278491974), ('raínha', 0.498224675655365), ('rainha-consorte', 0.4923296570777893), ('duquesa', 0.4890612065792084), ('condessa', 0.4880494177341461), ('regente', 0.460879385471344)]
Positive:   rei rainha 
Negative: homem

../models/cbow_s100.txt [('raínha', 0.6748343706130981), ('princesa', 0.6687842607498169), ('rainha-consorte', 0.6617846488952637), ('rainha-mãe', 0.6472653746604919), ('duquesa', 0.6418556571006775), ('pártia', 0.6328529119491577), ('imperatriz', 0.628162682056427), ('primogénita', 0.6190635561943054), ('condessa', 0.6179429292678833), ('coroação', 0.6084483861923218)]
Positive:   rei rainha 
Negative: homem

../models/cbow_s50.txt [('duquesa', 0.7385571002960205), ('princesa', 0.724755048751831), ('grã-duquesa', 0.7228216528892517), ('imperatriz', 0.7180843949317932), ('rainha-con

In [None]:
word1 = 'rei'
word2 = 'homem'
word3 = 'mulher'

listm = [modelsList[0]]

result = processTriadAnalogy(word1, word2, word3, listm)

Troca de Palavras 

In [13]:
word1 = 'rei'
word2 = 'homem'
word3 = 'mulher'

model = getModel(modelsList[2])

list = model.most_similar(positive=[word1,word3], negative=[word2])

In [14]:
print(modelsList[2], list)
print('Positive:  ', word1,word3, '\nNegative:',word2)

../models/cbow_s100.txt [('esposa', 0.688875675201416), ('filha', 0.6741011142730713), ('governanta', 0.6724554896354675), ('sobrinha', 0.669994056224823), ('madrasta', 0.6670832633972168), ('concubina', 0.6659663915634155), ('dama-de-companhia', 0.6614803075790405), ('benção', 0.659320056438446), ('múmia', 0.6535747051239014), ('bênção', 0.6500141620635986)]
Positive:   rei mulher 
Negative: homem


## Testando os métodos indicados pelo Paper - FastText 300 e  Glove 300

In [5]:
models = []
fast = '../models/skip_s300fast.txt'
glove = '../models/glove_s300.txt'
models.append(getModel(fast))
models.append(getModel(glove))

In [6]:
model = models[0]
word1 = 'rei'
word2 = 'homem'
word3 = 'mulher'

list = model.most_similar(positive=[word1,word3], negative=[word2])

In [8]:
model = models[1]
list2 = model.most_similar(positive=[word1,word3], negative=[word2])b

In [9]:
print(list)
print(list2)

[('rainha', 0.7449502348899841), ('rainha-regente', 0.6796571016311646), ('princesa-regente', 0.6528101563453674), ('esposa', 0.6449019908905029), ('princesa', 0.6373282670974731), ('consorte', 0.6351529955863953), ('«rainha', 0.6276739835739136), ('filha', 0.6266849040985107), ('desposa', 0.6201632022857666), ('rainha-a', 0.6200650930404663)]
[('rainha', 0.7193283438682556), ('filha', 0.6310628652572632), ('esposa', 0.627386212348938), ('princesa', 0.6068346500396729), ('isabel', 0.5972704887390137), ('irmã', 0.5631763935089111), ('consorte', 0.5383861064910889), ('trono', 0.5359665155410767), ('príncipe', 0.5172233581542969), ('joana', 0.5126430988311768)]


In [16]:
models[0].most_similar(positive=['primeira-dama','obama'])

[('primeira-damas', 0.8656221628189087),
 ('barack', 0.8468867540359497),
 ('ex-primeira-dama', 0.7975351214408875),
 ('obamas', 0.762374997138977),
 ('clinton', 0.7279771566390991),
 ('obama.a', 0.7279264330863953),
 ('segunda-dama', 0.721454918384552),
 ('vice-primeira-dama', 0.7174794673919678),
 ('robama', 0.713536262512207),
 ('hillary', 0.7119442224502563)]

## Avaliando precisao dos modelos


In [20]:
def eval_analogy_accuracy(modelRef, testSet):
    model = getModel(modelRef)
    acc = model.accuracy(testSet)
    print(acc[0]['correct'])
    corr = len(acc[0]['correct'])
    incorr = len(acc[0]['incorrect'])
    
    return [corr, incorr]

In [21]:
modelsList = glob('../models/*fast.txt')
testSet = glob('../testsets/*Analogies*')
acc = {}
for model in modelsList :
    for test in testSet:        
        acc_ratings = eval_analogy_accuracy(model,test)
        acc[model+test] = acc_ratings
        accuracy = acc_ratings[0] / (acc_ratings[0] + acc_ratings[1])
        print(model + '@' + test + '-' + str(accuracy) + '%')

[('ATENAS', 'GRÉCIA', 'PEQUIM', 'CHINA'), ('ATENAS', 'GRÉCIA', 'BERLIM', 'ALEMANHA'), ('ATENAS', 'GRÉCIA', 'CAIRO', 'EGITO'), ('ATENAS', 'GRÉCIA', 'HAVANA', 'CUBA'), ('ATENAS', 'GRÉCIA', 'HELSÍNQUIA', 'FINLÂNDIA'), ('ATENAS', 'GRÉCIA', 'LONDRES', 'INGLATERRA'), ('ATENAS', 'GRÉCIA', 'MADRID', 'ESPANHA'), ('ATENAS', 'GRÉCIA', 'MOSCOVO', 'RÚSSIA'), ('ATENAS', 'GRÉCIA', 'OSLO', 'NORUEGA'), ('ATENAS', 'GRÉCIA', 'OTTAWA', 'CANADÁ'), ('ATENAS', 'GRÉCIA', 'PARIS', 'FRANÇA'), ('ATENAS', 'GRÉCIA', 'ROMA', 'ITÁLIA'), ('ATENAS', 'GRÉCIA', 'ESTOCOLMO', 'SUÉCIA'), ('ATENAS', 'GRÉCIA', 'TÓQUIO', 'JAPÃO'), ('PEQUIM', 'CHINA', 'BERLIM', 'ALEMANHA'), ('PEQUIM', 'CHINA', 'CAIRO', 'EGITO'), ('PEQUIM', 'CHINA', 'HAVANA', 'CUBA'), ('PEQUIM', 'CHINA', 'HELSÍNQUIA', 'FINLÂNDIA'), ('PEQUIM', 'CHINA', 'LONDRES', 'INGLATERRA'), ('PEQUIM', 'CHINA', 'MOSCOVO', 'RÚSSIA'), ('PEQUIM', 'CHINA', 'OSLO', 'NORUEGA'), ('PEQUIM', 'CHINA', 'OTTAWA', 'CANADÁ'), ('PEQUIM', 'CHINA', 'PARIS', 'FRANÇA'), ('PEQUIM', 'CHINA', 'ROM

[('ATENAS', 'GRÉCIA', 'BAGDÁ', 'IRAQUE'), ('ATENAS', 'GRÉCIA', 'PEQUIM', 'CHINA'), ('ATENAS', 'GRÉCIA', 'BERLIM', 'ALEMANHA'), ('ATENAS', 'GRÉCIA', 'CAIRO', 'EGITO'), ('ATENAS', 'GRÉCIA', 'HAVANA', 'CUBA'), ('ATENAS', 'GRÉCIA', 'CABUL', 'AFEGANISTÃO'), ('ATENAS', 'GRÉCIA', 'LONDRES', 'INGLATERRA'), ('ATENAS', 'GRÉCIA', 'MADRID', 'ESPANHA'), ('ATENAS', 'GRÉCIA', 'MOSCOU', 'RÚSSIA'), ('ATENAS', 'GRÉCIA', 'OSLO', 'NORUEGA'), ('ATENAS', 'GRÉCIA', 'OTTAWA', 'CANADÁ'), ('ATENAS', 'GRÉCIA', 'PARIS', 'FRANÇA'), ('ATENAS', 'GRÉCIA', 'ROMA', 'ITÁLIA'), ('ATENAS', 'GRÉCIA', 'ESTOCOLMO', 'SUÉCIA'), ('ATENAS', 'GRÉCIA', 'TEERÃ', 'IRÃ'), ('ATENAS', 'GRÉCIA', 'TÓQUIO', 'JAPÃO'), ('BAGDÁ', 'IRAQUE', 'PEQUIM', 'CHINA'), ('BAGDÁ', 'IRAQUE', 'BERLIM', 'ALEMANHA'), ('BAGDÁ', 'IRAQUE', 'HAVANA', 'CUBA'), ('BAGDÁ', 'IRAQUE', 'CABUL', 'AFEGANISTÃO'), ('BAGDÁ', 'IRAQUE', 'MOSCOU', 'RÚSSIA'), ('BAGDÁ', 'IRAQUE', 'OSLO', 'NORUEGA'), ('BAGDÁ', 'IRAQUE', 'OTTAWA', 'CANADÁ'), ('BAGDÁ', 'IRAQUE', 'PARIS', 'FRANÇA')

In [3]:
modelsList = glob('../models/*fast.txt')
model  =  getModel(modelsList[0])


In [28]:
model.most_similar(positive=['atenas','pequim'],negative=['grécia'])

[('seul', 0.6353664398193359),
 ('pequim-taipé', 0.6326602101325989),
 ('xangai', 0.6200852394104004),
 ('pequim-xangai', 0.6111860275268555),
 ('pequim-pyongyang', 0.6070454120635986),
 ('pequim-china', 0.6038728952407837),
 ('taipé', 0.5943089723587036),
 ('xiaoshuang', 0.5923561453819275),
 ('pequim.o', 0.590029239654541),
 ('xangai-china', 0.5886864066123962)]

In [29]:
model.most_similar(positive=['atenas','china'],negative=['grécia'])

[('pequim', 0.8225739002227783),
 ('xangai-china', 0.7064758539199829),
 ('tianjing', 0.699294924736023),
 ('shenzhen-china', 0.689010500907898),
 ('zhongzhou', 0.6873874664306641),
 ('zhendong', 0.6834413409233093),
 ('chinesa', 0.6772347688674927),
 ('zhongjing', 0.6769419312477112),
 ('xangai', 0.6768250465393066),
 ('zhongyang', 0.6758601665496826)]

In [30]:
model.most_similar(positive=['grécia','china'],negative=['atenas'])

[('coreia', 0.6270121335983276),
 ('taiwan', 0.621976375579834),
 ('tailândia', 0.6031404137611389),
 ('china.', 0.5803037881851196),
 ('coréia', 0.558039665222168),
 ('japão', 0.5508037209510803),
 ('china-taiwan', 0.550620436668396),
 ('índia', 0.5471624135971069),
 ('malásia', 0.5458478927612305),
 ('rússia', 0.5385729670524597)]

In [31]:
model.most_similar(positive=['grécia','pequim'],negative=['atenas'])

[('china', 0.7808929681777954),
 ('taiwan', 0.658018171787262),
 ('pequim-pyongyang', 0.6476016044616699),
 ('coreia', 0.6296229958534241),
 ('pequim-china', 0.609553337097168),
 ('china-taiwan', 0.6077865362167358),
 ('indonésia', 0.6074162721633911),
 ('pequim-taipé', 0.6063328385353088),
 ('tibete', 0.6040211915969849),
 ('china.', 0.5988819599151611)]

## Exploração de Relações

### Pais-Presidente

In [3]:
def analogy(pos, neg):
    return model.most_similar(pos,neg)

In [4]:
modelsList = glob('../models/*fast.txt')
model  =  getModel(modelsList[0])

In [11]:
analogy(['alemanha','pedro'],['merkel'])

[('almerico', 0.5518990755081177),
 ('casimiro', 0.5496413707733154),
 ('espanha', 0.5445067882537842),
 ('d.álvaro', 0.5442548394203186),
 ('espanha—', 0.5391455888748169),
 ('defrederico', 0.5370157957077026),
 ('carlos', 0.536718487739563),
 ('frederico', 0.5309009552001953),
 ('landerico', 0.5286349654197693),
 ('joão', 0.5255224704742432)]

In [17]:
analogy(['alemanha','espanha'],['merkel'])

[('espanha—', 0.7498310804367065),
 ('espanha00', 0.6783173084259033),
 ('espanhae', 0.6701262593269348),
 ('espanhaa', 0.6699966192245483),
 ('espanha-holanda', 0.6659545302391052),
 ('espanha-suécia', 0.6606740355491638),
 ('espanha.', 0.6600045561790466),
 ('espanha-', 0.6597695350646973),
 ('itália', 0.6564512252807617),
 ('espanha-dinamarca', 0.6559625864028931)]

In [20]:
analogy(['merkel','espanha'],['alemanha'])

[('rajoy', 0.7286856174468994),
 ('aznar', 0.6592538356781006),
 ('psoe', 0.620030403137207),
 ('zapatero', 0.6065688133239746),
 ('aznarez', 0.5837963223457336),
 ('aznares', 0.5837029814720154),
 ('valls', 0.5767079591751099),
 ('sanchèz', 0.5551716089248657),
 ('tsipras', 0.5524406433105469),
 ('governo.pedro', 0.5391397476196289)]

In [21]:
analogy(['merkel','espanha'],['rajoy'])

[('alemanha', 0.764701783657074),
 ('—alemanha', 0.6655969619750977),
 ('alemanhã', 0.6514356136322021),
 ('alemanhao', 0.6505365371704102),
 ('flalemanha', 0.6500216722488403),
 ('alemanha.', 0.6379072666168213),
 ('áustria', 0.6368333101272583),
 ('alemanhae', 0.635574460029602),
 ('daalemanha', 0.6325020790100098),
 ('frança', 0.6310713291168213)]

In [22]:
analogy(['merkel','espanha'],['pedro'])

[('alemanha', 0.6142593622207642),
 ('merkels', 0.6087908744812012),
 ('—alemanha', 0.5496107339859009),
 ('alemanhã', 0.5418405532836914),
 ('daalemanha', 0.5393727421760559),
 ('aalemanha', 0.5302841663360596),
 ('grã-bretanha', 0.5299718379974365),
 ('merkell', 0.5251693725585938),
 ('rfa', 0.5224390029907227),
 ('alemanha.', 0.5212670564651489)]

In [16]:
analogy(['eua','merkel'],['obama'])

[('alemanha', 0.5969653725624084),
 ('merkels', 0.5662009119987488),
 ('unidos-alemanha', 0.5261813402175903),
 ('norte-alemanha', 0.524412989616394),
 ('unidos', 0.5220237970352173),
 ('—alemanha', 0.5149574279785156),
 ('eua-alemanha', 0.5114097595214844),
 ('merkell', 0.5098249912261963),
 ('alemanhã', 0.5077592134475708),
 ('rfa', 0.5076024532318115)]

Podemos observar algumas "sujeiras" na base, como palavras do vocabulário iguais [alemanha, -alemanha, alemanha., aalemanha, etc] que possivelmente poderiam acresentar o conhecimento porém são distinguidas.

In [23]:
analogy(['eua','rajoy'],['obama'])

[('espanha', 0.5240928530693054),
 ('psoe', 0.5091129541397095),
 ('ciudadanos', 0.48313695192337036),
 ('psoe-ciudadanos', 0.47759440541267395),
 ('euskadiko', 0.4676378667354584),
 ('pnv', 0.46438828110694885),
 ('psg-eg', 0.46226945519447327),
 ('catalunha', 0.46061208844184875),
 ('psc-psoe', 0.45780810713768005),
 ('ciu', 0.44975438714027405)]

In [25]:
analogy(['eua','conte'],['obama'])

[('aconte', 0.4805164933204651),
 ('reconte', 0.4697737991809845),
 ('contare', 0.46969348192214966),
 ('contar', 0.4668620824813843),
 ('laconte', 0.4613804817199707),
 ('cont_y', 0.44842708110809326),
 ('contando', 0.4448748230934143),
 ('contart', 0.44243723154067993),
 ('contate-me', 0.44098061323165894),
 ('dinco', 0.43836963176727295)]

In [28]:
analogy(['eua','berlusconi'],['obama'])

[('berlusconni', 0.6438901424407959),
 ('berlusconis', 0.6181439757347107),
 ('anti-berlusconi', 0.5776601433753967),
 ('fininvest', 0.5587810277938843),
 ('itália', 0.5479344725608826),
 ('fininveste', 0.526879072189331),
 ('berlusconiano', 0.5187187194824219),
 ('bossi', 0.5073366165161133),
 ('berlusconiana', 0.5062928795814514),
 ('publitália', 0.4963296353816986)]

In [31]:
analogy(['eua','monti'],['obama'])

[('donnini', 0.4999428391456604),
 ('brugnetti', 0.48805248737335205),
 ('pettini', 0.482662558555603),
 ('rigamonti', 0.48097920417785645),
 ('gargani', 0.47859567403793335),
 ('morettini', 0.47735702991485596),
 ('guidoni', 0.4716886878013611),
 ('roccettini', 0.4710380434989929),
 ('tognetti', 0.4643009305000305),
 ('castagnetti', 0.46322035789489746)]

In [33]:
analogy(['eua','enrico'],['obama'])

[('henrico', 0.5625894665718079),
 ('valerio', 0.5209310054779053),
 ('vitagliano', 0.5094318985939026),
 ('defrancesco', 0.5074387788772583),
 ('battistini', 0.49750638008117676),
 ('panrico', 0.497505247592926),
 ('riccardo', 0.4942808151245117),
 ('fabrizio', 0.4913637936115265),
 ('itália', 0.4912784695625305),
 ('piercarlo', 0.4887745976448059)]

In [34]:
analogy(['eua','matteo'],['obama'])

[('itália', 0.5320799946784973),
 ('matteoti', 0.5318585634231567),
 ('itáliano', 0.5174192786216736),
 ('matteotti', 0.5150493383407593),
 ('matteoli', 0.5115050077438354),
 ('pierini', 0.5032851099967957),
 ('bartoloni', 0.5032413005828857),
 ('francesco', 0.5031288862228394),
 ('vittorini', 0.5030046105384827),
 ('itáliao', 0.5030040144920349)]

In [35]:
analogy(['eua','renzi'],['obama'])

[('itália', 0.5503702163696289),
 ('firenzi', 0.5223995447158813),
 ('unidos', 0.4993197023868561),
 ('matteo', 0.49648967385292053),
 ('renzis', 0.4830038249492645),
 ('fenzi', 0.47789183259010315),
 ('pierini', 0.47173431515693665),
 ('fratangelo', 0.4644283652305603),
 ('matteotti', 0.4622066617012024),
 ('berlusconi', 0.459841251373291)]

In [39]:
analogy(['china','macarrão'],['yakisoba'])

[('xangai-china', 0.5721220970153809),
 ('chineses', 0.5484108924865723),
 ('chinês', 0.5439406633377075),
 ('eu-china', 0.5283564329147339),
 ('jianchang', 0.5267597436904907),
 ('chinesa', 0.5229658484458923),
 ('chines', 0.5208600759506226),
 ('china.', 0.5207402110099792),
 ('cuba-china', 0.5201283097267151),
 ('chinesas', 0.519325852394104)]

In [5]:
analogy(['merkel','eua'],['alemanha'])

[('obama', 0.6659965515136719),
 ('barack', 0.655023455619812),
 ('clinton.a', 0.594444751739502),
 ('clinton', 0.5942842364311218),
 ('clinton-assad', 0.5905671119689941),
 ('clinton.o', 0.5777859687805176),
 ('kerry', 0.5762988328933716),
 ('biden', 0.5712915658950806),
 ('obamas', 0.5705500841140747),
 ('albright', 0.5607388019561768)]

In [6]:
analogy(['merkel','italia'],['alemanha'])

[('renzi', 0.5919277667999268),
 ('italiani', 0.5549381971359253),
 ('ditalia', 0.5436686277389526),
 ('italiamo', 0.5381110310554504),
 ('italia0', 0.536308765411377),
 ('berlusconi', 0.5361664891242981),
 ('d´italia', 0.5331833362579346),
 ('fitalia', 0.525377631187439),
 ('camerone', 0.5245084762573242),
 ('lavoratti', 0.5155940055847168)]

In [7]:
analogy(['merkel','brasil'],['alemanha'])

[('brasil)em', 0.5761305689811707),
 ('\x93dilma', 0.5755376815795898),
 ('brasil)\x94,', 0.5744104385375977),
 ('brasilês', 0.5669643878936768),
 ('presidenta', 0.5624068975448608),
 ('brasil\x94.', 0.5589420199394226),
 ('brasilmais', 0.5587708950042725),
 ('brasil\x94,', 0.5558153390884399),
 ('brasilnão', 0.5549610257148743),
 ('brasil.é', 0.5547523498535156)]

In [8]:
analogy(['merkel','holanda'],['alemanha'])

[('dijssebloem', 0.614707887172699),
 ('dijsselbloem', 0.6047036051750183),
 ('merkell', 0.596217155456543),
 ('juncker', 0.5896487236022949),
 ('merkels', 0.58531653881073),
 ('hollande', 0.5834004878997803),
 ('merkel.o', 0.5777435898780823),
 ('holande', 0.5736743211746216),
 ('junckers', 0.571640133857727),
 ('rutte', 0.5627857446670532)]

In [7]:
model.most_similar(positive=['estados','unidos'])

[('eua', 0.8173750638961792),
 ('unidos,e', 0.7958360910415649),
 ('unidos.', 0.7799003720283508),
 ('unidos-', 0.7789234519004822),
 ('unidos.é', 0.7481932640075684),
 ('estadosunidos', 0.7360846400260925),
 ('unidos.fica', 0.7329683303833008),
 ('estados-unidos', 0.7277584075927734),
 ('unidos.ela', 0.7264972925186157),
 ('e.u.a', 0.7021260857582092)]

In [8]:
model.most_similar(positive=['holanda'])

[('holanda.', 0.8068545460700989),
 ('holandas', 0.7937363982200623),
 ('roterdã-holanda', 0.7753633856773376),
 ('frança/holanda', 0.7715115547180176),
 ('holanda-bélgica', 0.7678250670433044),
 ('holand', 0.7434194087982178),
 ('holanda.o', 0.7411854863166809),
 ('bélgica', 0.7325699925422668),
 ('espanha-holanda', 0.7311862111091614),
 ('holanda-alemanha', 0.7242059707641602)]

In [10]:
model.most_similar(positive=['holanda','país'])

[('frança/holanda', 0.6957878470420837),
 ('holandas', 0.6895419359207153),
 ('bélgica', 0.6713705658912659),
 ('espanha', 0.6704224944114685),
 ('portugal', 0.6673567295074463),
 ('inglaterra-holanda', 0.6671522259712219),
 ('espanha-holanda', 0.6555101871490479),
 ('frança', 0.6544818878173828),
 ('holanda-bélgica', 0.6528982520103455),
 ('abélgica', 0.647580623626709)]

In [7]:
analogy(['neve','calor'],['frio'])

[('chuva', 0.5868128538131714),
 ('granizo', 0.5454442501068115),
 ('evaporação', 0.5385028123855591),
 ('aguaneve', 0.5326476097106934),
 ('chuva.a', 0.5262962579727173),
 ('nevascas', 0.5163105726242065),
 ('granizos', 0.5094814300537109),
 ('fotoevaporação', 0.5086624026298523),
 ('chuva.', 0.5079367160797119),
 ('térmica', 0.5078336000442505)]

In [8]:
analogy(['cachoro','felino'],['canino'])

[('cachorrinho', 0.635928750038147),
 ('cachora', 0.6300210952758789),
 ('cachorrito', 0.6155412197113037),
 ('cachorrona', 0.6097216606140137),
 ('cachorrinhos', 0.60662442445755),
 ('pássaro', 0.591745913028717),
 ('cachorrinha', 0.5879471302032471),
 ('cachorro', 0.5865308046340942),
 ('robô-girafa', 0.5783734321594238),
 ('bunduda', 0.5777561664581299)]

In [11]:
analogy(['gato','canino'],['cachorro'])

[('caninos', 0.6211661100387573),
 ('felino', 0.608090877532959),
 ('gatopardo', 0.5799950957298279),
 ('canini', 0.5735528469085693),
 ('canin', 0.5718479156494141),
 ('canina', 0.5691990256309509),
 ('anofelino', 0.5584392547607422),
 ('gato-dourado', 0.5544600486755371),
 ('gato-cão', 0.5531497597694397),
 ('gato-leopardo', 0.5505276322364807)]

# Referencia

https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf