In [85]:
!pip install -U spacy



In [86]:
!pip install pyinflect



In [87]:
import json

import numpy as np
import pandas as pd

import spacy
import en_core_web_sm
import pyinflect

import gensim.downloader as api

In [88]:
# малая модель spacy
nlp = en_core_web_sm.load()

# малая модель glove wiki
# внимание - очень долго скачивает, если она еще не установлена
model = api.load("glove-wiki-gigaword-100")

In [89]:
# изменение степени прилагательного с помощью pyinflect
for token in nlp('Little Red Riding Hood'):
    if token.pos_=='ADJ':
        print(token.text, token._.inflect('JJS'))
        print(token.text, token._.inflect('JJR'))
        print(token.text, token._.inflect('JJ'))

Little Littlest
Little Littler
Little Little


In [90]:
# сходные слова, синонимы
model.similar_by_word('lived')

[('living', 0.7721222639083862),
 ('resided', 0.7559422850608826),
 ('emigrated', 0.6997494101524353),
 ('family', 0.6992907524108887),
 ('lives', 0.6960945129394531),
 ('died', 0.6883507370948792),
 ('survived', 0.6847712993621826),
 ('whom', 0.6718282103538513),
 ('once', 0.6713927984237671),
 ('couple', 0.6712842583656311)]

In [91]:
# антонимы – добавляем пару позитив-негатив с противоположными значениями
model.most_similar(positive=['lived','bad'], negative=['good'])

[('died', 0.6619753241539001),
 ('resided', 0.638906717300415),
 ('fled', 0.6153265833854675),
 ('emigrated', 0.6144971251487732),
 ('survived', 0.6038041710853577),
 ('disappeared', 0.6005083918571472),
 ('living', 0.590585470199585),
 ('abandoned', 0.5663079023361206),
 ('lives', 0.5635175704956055),
 ('immigrated', 0.5618083477020264)]

In [92]:
# фильтрация стоп слов с помощью спейси
word = 'lived'
antonyms = model.most_similar(positive=[word,'bad'], negative=['good'])
# get words from tuples
antonyms = [ _[0] for _ in antonyms]
# filter stop words
antonyms = [_.text for _ in nlp(' '.join(antonyms)) if not _.is_stop]
print('Потенциальные антонимы', word)
antonyms

Потенциальные антонимы lived


['died',
 'resided',
 'fled',
 'emigrated',
 'survived',
 'disappeared',
 'living',
 'abandoned',
 'lives',
 'immigrated']

In [93]:
# проверка similarity двух слов с помощью gensim
for ant in antonyms:
    print(ant, model.similarity('lived', ant))

died 0.68835086
resided 0.7559424
fled 0.61928064
emigrated 0.69974947
survived 0.68477136
disappeared 0.6009228
living 0.77212226
abandoned 0.60050607
lives 0.6960944
immigrated 0.6520551


In [94]:
# как посчитать вручную similarity
# можно взять из gensim вектор слова и посчитать косинусное расстояние
lived_vec = model['lived']
living_vec = model['living']
cosine_similarity = (lived_vec @ living_vec)/(np.linalg.norm(lived_vec)*np.linalg.norm(living_vec))
cosine_similarity

0.77212226

In [95]:
# сходные слова, синонимы
model.similar_by_word('prettiest')

[('loveliest', 0.7358776926994324),
 ('ugliest', 0.7176482677459717),
 ('sweetest', 0.6964283585548401),
 ('nicest', 0.6708787083625793),
 ('cutest', 0.66517174243927),
 ('weirdest', 0.6551819443702698),
 ('oddest', 0.6532436609268188),
 ('strangest', 0.6526945233345032),
 ('classiest', 0.6503055691719055),
 ('craziest', 0.6463013291358948)]

In [96]:
# антонимы – добавляем пару позитив-негатив с противоположными значениями
model.most_similar(positive=['excessively','bad'], negative=['good'])

[('unnecessarily', 0.6572654843330383),
 ('unduly', 0.6301813721656799),
 ('appallingly', 0.622342586517334),
 ('overly', 0.6145561337471008),
 ('excessive', 0.6109001636505127),
 ('unconscionably', 0.5999709367752075),
 ('atrociously', 0.5830274224281311),
 ('unreasonably', 0.5811731219291687),
 ('dangerously', 0.5733420848846436),
 ('burdened', 0.5672959089279175)]

In [97]:
# сходные слова, синонимы
model.similar_by_word('excessively')

[('overly', 0.7340081930160522),
 ('unreasonably', 0.6992622017860413),
 ('unduly', 0.6895744800567627),
 ('unnecessarily', 0.6870430707931519),
 ('ridiculously', 0.6779029369354248),
 ('unconscionably', 0.6673431992530823),
 ('inordinately', 0.6629543900489807),
 ('extraordinarily', 0.65824955701828),
 ('absurdly', 0.6548056602478027),
 ('appallingly', 0.653618335723877)]

In [98]:
# сходные слова, синонимы
model.similar_by_word('woman')

[('girl', 0.8472671508789062),
 ('man', 0.832349419593811),
 ('mother', 0.827568769454956),
 ('boy', 0.7720510363578796),
 ('she', 0.7632068395614624),
 ('child', 0.7601762413978577),
 ('wife', 0.7505022883415222),
 ('her', 0.7445706129074097),
 ('herself', 0.7426273822784424),
 ('daughter', 0.726445734500885)]

In [99]:

for token in nlp('It suited the girl so extremely well that everybody called her Little Red Riding Hood'):
    if token.pos_=='VERB':
        print(token.text, token._.inflect('VB'))
        print(token.text, token._.inflect('VBZ'))
        print(token.text, token._.inflect('VBD'))


suited suit
suited suits
suited suited
called call
called calls
called called


In [100]:

for token in nlp('One day her mother, having made some cakes, said to her, "Go, my dear, and see how your grandmother is doing, for I hear she has been very ill.'):
    if token.pos_=='NOUN':
        print(token.text, token._.inflect('NN'))
        print(token.text, token._.inflect('NNS'))


day day
day days
mother mother
mother mothers
cakes cake
cakes cakes
dear dear
dear dears
grandmother grandmother
grandmother grandmothers


In [101]:
# на случайные близкие слова и анти-слова
sent = 'Take her a cake, and this little pot of butter.'
new_sent_1, new_sent_2 = sent, sent
i=5
for token in nlp(sent):
    if token.pos_ in ['VERB']:
        m, n = np.random.randint(0, i, 2)

        new_word_1 = model.most_similar(token.text.lower(), topn=i)[m][0]
        new_word_2 = model.most_similar(positive = [token.text.lower(), 'bad'],
                                        negative = ['good'],
                                        topn=i)[n][0]

        new_word_1 = new_word_1.title() if token.text.istitle() else new_word_1
        new_word_2 = new_word_2.title() if token.text.istitle() else new_word_2

        new_sent_1 = new_sent_1.replace(token.text, new_word_1)
        new_sent_2 = new_sent_2.replace(token.text, new_word_2)

print(sent)
print(new_sent_1)
print(new_sent_2)

Take her a cake, and this little pot of butter.
Give her a cake, and this little pot of butter.
Go her a cake, and this little pot of butter.


In [102]:
# фильтрация стоп слов с помощью спейси
word = 'butter'
antonyms = model.most_similar(positive=[word,'bad'], negative=['good'])
# get words from tuples
antonyms = [ _[0] for _ in antonyms]
# filter stop words
antonyms = [_.text for _ in nlp(' '.join(antonyms)) if not _.is_stop]
print('Потенциальные антонимы', word)
antonyms

Потенциальные антонимы butter


['margarine',
 'melted',
 'melt',
 'cheese',
 'flour',
 'peanut',
 'syrup',
 'chopped',
 'sour',
 'stir']

In [103]:
# проверка similarity двух слов с помощью gensim
for ant in antonyms:
    print(ant, model.similarity('butter', ant))

margarine 0.8079419
melted 0.7524646
melt 0.6629708
cheese 0.77912
flour 0.75192404
peanut 0.7813067
syrup 0.7206409
chopped 0.6637064
sour 0.58706975
stir 0.7044655


In [104]:
# фильтрация стоп слов с помощью спейси
word = 'immediately'
antonyms = model.most_similar(positive=[word,'bad'], negative=['good'])
# get words from tuples
antonyms = [ _[0] for _ in antonyms]
# filter stop words
antonyms = [_.text for _ in nlp(' '.join(antonyms)) if not _.is_stop]
print('Потенциальные антонимы', word)
antonyms

Потенциальные антонимы immediately


['forced',
 'soon',
 'shortly',
 'caused',
 'promptly',
 'temporarily',
 'quickly',
 'avoid',
 'incident',
 'apparently']

In [105]:
# проверка similarity двух слов с помощью gensim
for ant in antonyms:
    print(ant, model.similarity('immediately', ant))

forced 0.6699287
soon 0.8437643
shortly 0.7703622
caused 0.55561686
promptly 0.77182955
temporarily 0.6881569
quickly 0.8234099
avoid 0.6605484
incident 0.64048964
apparently 0.7204884


In [106]:
for token in nlp('As she was going through the wood, she met with a wolf, who had a very great mind to eat her up, but he dared not, because of some woodcutters working nearby in the forest.'):
    if token.pos_=='NOUN':
        print(token.text, token._.inflect('NN'))
        print(token.text, token._.inflect('NNS'))


wood wood
wood woods
wolf wolf
wolf wolves
mind mind
mind minds
woodcutters woodcutter
woodcutters woodcutters
forest forest
forest forests


In [107]:
for token in nlp('As she was going through the wood, she met with a wolf, who had a very great mind to eat her up, but he dared not, because of some woodcutters working nearby in the forest.'):
    if token.pos_=='VERB':
        print(token.text, token._.inflect('VB'))
        print(token.text, token._.inflect('VBZ'))
        print(token.text, token._.inflect('VBD'))


going go
going goes
going went
met meet
met meets
met met
had have
had has
had had
eat eat
eat eats
eat ate
dared dare
dared dares
dared dared
working work
working works
working wrought


In [108]:
for token in nlp('As she was going through the wood, she met with a wolf, who had a very great mind to eat her up, but he dared not, because of some woodcutters working nearby in the forest.'):
    if token.pos_=='ADJ':
        print(token.text, token._.inflect('JJS'))
        print(token.text, token._.inflect('JJR'))
        print(token.text, token._.inflect('JJ'))

great greatest
great greater
great great


In [109]:
for token in nlp('He asked her where she was going'):
    if token.pos_=='VERB':
        print(token.text, token._.inflect('VB'))
        print(token.text, token._.inflect('VBZ'))
        print(token.text, token._.inflect('VBD'))


asked ask
asked asks
asked asked
going go
going goes
going went


In [110]:
# сходные слова, синонимы
model.similar_by_word('dangerous')

[('difficult', 0.7275011539459229),
 ('potentially', 0.7120583653450012),
 ('serious', 0.6858029365539551),
 ('destructive', 0.6796632409095764),
 ('danger', 0.6740216612815857),
 ('unpredictable', 0.6732596158981323),
 ('violent', 0.6720824837684631),
 ('deadly', 0.6691234111785889),
 ('extremely', 0.6673955917358398),
 ('vulnerable', 0.6656883955001831)]

In [111]:
# фильтрация стоп слов с помощью спейси
word = 'carry'
antonyms = model.most_similar(positive=[word,'bad'], negative=['good'])
# get words from tuples
antonyms = [ _[0] for _ in antonyms]
# filter stop words
antonyms = [_.text for _ in nlp(' '.join(antonyms)) if not _.is_stop]
print('Потенциальные антонимы', word)
antonyms

Потенциальные антонимы carry


['carried',
 'carrying',
 'avoid',
 'stop',
 'causing',
 'prevent',
 'handle',
 'caused',
 'massive',
 'forced']

In [112]:
# проверка similarity двух слов с помощью gensim
for ant in antonyms:
    print(ant, model.similarity('carry', ant))

carried 0.77079064
carrying 0.7303496
avoid 0.5921266
stop 0.6480684
causing 0.45596516
prevent 0.61330664
handle 0.6858238
caused 0.42871833
massive 0.53908134
forced 0.5081295


In [113]:
for token in nlp('"Does she live far off?" said the wolf'):
    if token.pos_=='VERB':
        print(token.text, token._.inflect('VB'))
        print(token.text, token._.inflect('VBZ'))
        print(token.text, token._.inflect('VBD'))


live live
live lives
live lived
said say
said says
said said


In [114]:
for token in nlp('"Oh I say," answered Little Red Riding Hood; "it is beyond that mill you see there, at the first house in the village."'):
    if token.pos_=='ADJ':
        print(token.text, token._.inflect('JJS'))
        print(token.text, token._.inflect('JJR'))
        print(token.text, token._.inflect('JJ'))

first firstest
first firster
first first


In [115]:
# фильтрация стоп слов с помощью спейси
word = 'way'
antonyms = model.most_similar(positive=[word,'bad'], negative=['good'])
# get words from tuples
antonyms = [ _[0] for _ in antonyms]
# filter stop words
antonyms = [_.text for _ in nlp(' '.join(antonyms)) if not _.is_stop]
print('Потенциальные антонимы', word)
antonyms

Потенциальные антонимы way


['turn', 'going', 'wrong', 'things']

In [116]:
# проверка similarity двух слов с помощью gensim
for ant in antonyms:
    print(ant, model.similarity('way', ant))

turn 0.82955754
going 0.8569827
wrong 0.71910506
things 0.8247758


In [117]:
# сходные слова, синонимы
model.similar_by_word('there')

[('no', 0.8889176249504089),
 ('only', 0.8587634563446045),
 ('some', 0.8572323322296143),
 ('but', 0.854871928691864),
 ('so', 0.8500323295593262),
 ('this', 0.8480582237243652),
 ('they', 0.8431913256645203),
 ('though', 0.8426817655563354),
 ('now', 0.8422872424125671),
 ('what', 0.8381678462028503)]

In [118]:
# сходные слова, синонимы
model.similar_by_word('path')

[('paths', 0.7559518814086914),
 ('toward', 0.6760907173156738),
 ('direction', 0.6682589650154114),
 ('way', 0.6522889137268066),
 ('road', 0.6374226212501526),
 ('towards', 0.634418249130249),
 ('clear', 0.633200466632843),
 ('journey', 0.6222858428955078),
 ('approach', 0.6212446093559265),
 ('trail', 0.6101217269897461)]

In [119]:
# сходные слова, синонимы
model.similar_by_word('roundabout')

[('thoroughfare', 0.613814115524292),
 ('intersection', 0.6035764813423157),
 ('boulevard', 0.5774850845336914),
 ('broadway', 0.5733689665794373),
 ('junction', 0.5629505515098572),
 ('esplanade', 0.5551373958587646),
 ('gate', 0.546811044216156),
 ('terminates', 0.5445003509521484),
 ('avenue', 0.5416954159736633),
 ('crossroads', 0.5413249135017395)]

In [120]:
# фильтрация стоп слов с помощью спейси
word = 'roundabout'
antonyms = model.most_similar(positive=[word,'bad'], negative=['good'])
# get words from tuples
antonyms = [ _[0] for _ in antonyms]
# filter stop words
antonyms = [_.text for _ in nlp(' '.join(antonyms)) if not _.is_stop]
print('Потенциальные антонимы', word)
antonyms

Потенциальные антонимы roundabout


['intersection',
 'hairpin',
 'thoroughfare',
 'intersecting',
 '-',
 'pass',
 'terminates',
 'freeway',
 'broadway',
 'i-5',
 'expressway']

In [121]:
# проверка similarity двух слов с помощью gensim
for ant in antonyms:
    print(ant, model.similarity('roundabout', ant))

intersection 0.6035765
hairpin 0.5092227
thoroughfare 0.61381406
intersecting 0.49770108
- 0.014921978
pass 0.14181137
terminates 0.54450023
freeway 0.4979262
broadway 0.57336897
i-5 0.4723395
expressway 0.47346207


In [122]:
# сходные слова, синонимы
model.similar_by_word('butterflies')

[('moths', 0.7957603931427002),
 ('insects', 0.7567256689071655),
 ('dragonflies', 0.7413623332977295),
 ('orchids', 0.7138190865516663),
 ('caterpillars', 0.6863438487052917),
 ('beetles', 0.6748114228248596),
 ('birds', 0.6679182648658752),
 ('bees', 0.6228375434875488),
 ('flies', 0.6177154779434204),
 ('hummingbirds', 0.6128782629966736)]

In [123]:
# сходные слова, синонимы
model.similar_by_word('house')

[('office', 0.7581615447998047),
 ('senate', 0.7204986810684204),
 ('room', 0.7149738669395447),
 ('houses', 0.6888046264648438),
 ('capitol', 0.6851760149002075),
 ('building', 0.684728741645813),
 ('home', 0.672031044960022),
 ('clinton', 0.6707026958465576),
 ('congressional', 0.669257640838623),
 ('mansion', 0.665092408657074)]

In [124]:
for token in nlp('He knocked at the door: tap, tap.'):
    if token.pos_=='VERB':
        print(token.text, token._.inflect('VB'))
        print(token.text, token._.inflect('VBZ'))
        print(token.text, token._.inflect('VBD'))


knocked knock
knocked knockes
knocked knocked


In [125]:
# сходные слова, синонимы
model.similar_by_word('there')

[('no', 0.8889176249504089),
 ('only', 0.8587634563446045),
 ('some', 0.8572323322296143),
 ('but', 0.854871928691864),
 ('so', 0.8500323295593262),
 ('this', 0.8480582237243652),
 ('they', 0.8431913256645203),
 ('though', 0.8426817655563354),
 ('now', 0.8422872424125671),
 ('what', 0.8381678462028503)]

In [126]:
# сходные слова, синонимы
model.similar_by_word('counterfeiting')

[('laundering', 0.7594428062438965),
 ('counterfeit', 0.6539453864097595),
 ('piracy', 0.6462855339050293),
 ('smuggling', 0.617916464805603),
 ('trafficking', 0.6048997044563293),
 ('forgery', 0.6027125120162964),
 ('peddling', 0.5857939124107361),
 ('illicit', 0.5817586779594421),
 ('evasion', 0.5638552308082581),
 ('extortion', 0.5566224455833435)]

In [127]:
# сходные слова, синонимы
model.similar_by_word('butter')

[('margarine', 0.8079419136047363),
 ('cream', 0.7990189790725708),
 ('peanut', 0.7813066840171814),
 ('cheese', 0.7791200876235962),
 ('chocolate', 0.7580447196960449),
 ('melted', 0.7524645328521729),
 ('flour', 0.7519240379333496),
 ('sauce', 0.7376376986503601),
 ('vanilla', 0.7348540425300598),
 ('baking', 0.7293545603752136)]

In [128]:
# антонимы – добавляем пару позитив-негатив с противоположными значениями
model.most_similar(positive=['good','bad'], negative=['good'])

[('worse', 0.7929712533950806),
 ('things', 0.7653602957725525),
 ('too', 0.7630148530006409),
 ('thing', 0.7609667778015137),
 ('lot', 0.7443646788597107),
 ('kind', 0.7408681511878967),
 ('because', 0.739879846572876),
 ('really', 0.7376540899276733),
 ("n't", 0.7336540818214417),
 ('little', 0.7281355857849121)]

In [129]:
# сходные слова, синонимы
model.similar_by_word('good')

[('better', 0.893191397190094),
 ('sure', 0.8314563035964966),
 ('really', 0.8297762274742126),
 ('kind', 0.8288268446922302),
 ('very', 0.8260800242424011),
 ('we', 0.8234355449676514),
 ('way', 0.8215398192405701),
 ('think', 0.8205099105834961),
 ('thing', 0.8171301484107971),
 ("'re", 0.8141680955886841)]

In [130]:
# сходные слова, синонимы
model.similar_by_word('bobbin')

[('moire', 0.5209357738494873),
 ('doody', 0.5124744176864624),
 ('lace', 0.5009957551956177),
 ('belling', 0.4997113049030304),
 ('weft', 0.4916817843914032),
 ('rollin', 0.48743078112602234),
 ('poochie', 0.4815025329589844),
 ('bobbins', 0.4781877398490906),
 ('bramo', 0.4764481782913208),
 ('floss', 0.4735659062862396)]

In [131]:
# сходные слова, синонимы
model.similar_by_word('pulled')

[('pulling', 0.7936134934425354),
 ('off', 0.7768003344535828),
 ('back', 0.7689379453659058),
 ('picked', 0.7561824321746826),
 ('pull', 0.7560582160949707),
 ('away', 0.7401852011680603),
 ('pushed', 0.7299758195877075),
 ('knocked', 0.7287973761558533),
 ('grabbed', 0.7275064587593079),
 ('rolled', 0.7274501323699951)]

In [132]:
# сходные слова, синонимы
model.similar_by_word('moment')

[('moments', 0.8475146889686584),
 ('thing', 0.8067348599433899),
 ('mind', 0.7597180008888245),
 ('sort', 0.7511561512947083),
 ('kind', 0.7459704875946045),
 ('something', 0.7444869875907898),
 ('happened', 0.7356367707252502),
 ('remember', 0.7342726588249207),
 ('what', 0.7298220992088318),
 ('happens', 0.7289209961891174)]

In [133]:
# сходные слова, синонимы
model.similar_by_word('door')

[('doors', 0.8562019467353821),
 ('window', 0.8181455135345459),
 ('room', 0.7539054751396179),
 ('inside', 0.7108698487281799),
 ('floor', 0.7052059769630432),
 ('sitting', 0.6876002550125122),
 ('front', 0.680144727230072),
 ('waiting', 0.6790406703948975),
 ('hand', 0.6632920503616333),
 ('locked', 0.6594761610031128)]

In [134]:
# сходные слова, синонимы
model.similar_by_word('expecting')

[('expect', 0.8273691534996033),
 ('anticipating', 0.7498417496681213),
 ('expectations', 0.7260172367095947),
 ('expects', 0.7201017141342163),
 ('optimistic', 0.7089706659317017),
 ('anticipate', 0.6928597688674927),
 ('predicting', 0.6916440725326538),
 ('seeing', 0.6915954947471619),
 ('anticipated', 0.682197093963623),
 ('surprised', 0.6792380809783936)]

In [135]:
# сходные слова, синонимы
model.similar_by_word('hearing')

[('hearings', 0.7677700519561768),
 ('court', 0.7499337196350098),
 ('proceedings', 0.7430045008659363),
 ('jury', 0.7308874726295471),
 ('testimony', 0.7229984402656555),
 ('trial', 0.722489058971405),
 ('hear', 0.7202497720718384),
 ('judge', 0.7131830453872681),
 ('case', 0.7046642899513245),
 ('heard', 0.6963950395584106)]

In [136]:
# фильтрация стоп слов с помощью спейси
word = 'believing'
antonyms = model.most_similar(positive=[word,'bad'], negative=['good'])
# get words from tuples
antonyms = [ _[0] for _ in antonyms]
# filter stop words
antonyms = [_.text for _ in nlp(' '.join(antonyms)) if not _.is_stop]
print('Потенциальные антонимы', word)
antonyms

Потенциальные антонимы believing


['blaming',
 'blame',
 'afraid',
 'scared',
 'unaware',
 'fear',
 'fearful',
 'knowing',
 'worried',
 'suspecting']

In [137]:
# проверка similarity двух слов с помощью gensim
for ant in antonyms:
    print(ant, model.similarity('believing', ant))

blaming 0.515885
blame 0.5642224
afraid 0.6741363
scared 0.5688521
unaware 0.6319026
fear 0.61684257
fearful 0.5719971
knowing 0.75471795
worried 0.62038374
suspecting 0.58531433


In [138]:
# сходные слова, синонимы
model.similar_by_word('grandchild')

[('granddaughter', 0.6973433494567871),
 ('grandparent', 0.6881345510482788),
 ('grandchildren', 0.688111424446106),
 ('niece', 0.679613471031189),
 ('granddaughters', 0.6711245179176331),
 ('grandparents', 0.6521943807601929),
 ('nieces', 0.6283237934112549),
 ('grandsons', 0.6256321668624878),
 ('firstborn', 0.6148455142974854),
 ('great-grandchild', 0.6085379719734192)]

In [139]:
# сходные слова, синонимы
model.similar_by_word('softening')

[('weakening', 0.7141451835632324),
 ('hardening', 0.7134581208229065),
 ('easing', 0.6805393099784851),
 ('moderating', 0.6737066507339478),
 ('firming', 0.6558446884155273),
 ('soften', 0.6458730101585388),
 ('slowing', 0.638678789138794),
 ('softness', 0.6294131875038147),
 ('slowdown', 0.6200348138809204),
 ('dampening', 0.6113205552101135)]

In [140]:
# сходные слова, синонимы
model.similar_by_word('latch')

[('latches', 0.7570903301239014),
 ('locking', 0.5653749704360962),
 ('latching', 0.5628824830055237),
 ('deadbolt', 0.5432531833648682),
 ('handrails', 0.540752649307251),
 ('choke', 0.536155641078949),
 ('throttle', 0.5289207100868225),
 ('vibrate', 0.5224570631980896),
 ('fasten', 0.5202547907829285),
 ('latched', 0.5198674201965332)]

In [141]:
# на случайные близкие слова и анти-слова
noun = "Little Red Riding Hood pulled the bobbin, and the door opened."
new_noun_1, new_noun_2 = noun, noun
i=5
for token in nlp(noun):
    if token.pos_ in ['NOUN', 'VERB', 'ADV', 'ADJ']:
        m, n = np.random.randint(0, i, 2)

        new_word_1 = model.most_similar(token.text.lower(), topn=i)[m][0]
        new_word_2 = model.most_similar(positive = [token.text.lower(), 'bad'],
                                        negative = ['good'],
                                        topn=i)[n][0]

        new_word_1 = new_word_1.title() if token.text.istitle() else new_word_1
        new_word_2 = new_word_2.title() if token.text.istitle() else new_word_2

        new_noun_1 = new_noun_1.replace(token.text, new_word_1)
        new_noun_2 = new_noun_2.replace(token.text, new_word_2)

print(noun)
print(new_noun_1)
print(new_noun_2)

Little Red Riding Hood pulled the bobbin, and the door opened.
Too Red Riding Hood back the weft, and the window closed.
Bit Red Riding Hood pull the bramo, and the locked closed.


In [142]:
for token in nlp('Little Red Riding Hood pulled the bobbin, and the door opened.'):
    if token.pos_=='VERB':
        print(token.text, token._.inflect('VB'))
        print(token.text, token._.inflect('VBZ'))
        print(token.text, token._.inflect('VBD'))


pulled pull
pulled pulls
pulled pulled
opened open
opened opens
opened opened


In [143]:
# сходные слова, синонимы
model.similar_by_word('seeing')

[('looking', 0.8002535104751587),
 ('watching', 0.7961147427558899),
 ('saw', 0.7877046465873718),
 ('everyone', 0.7772706747055054),
 ('see', 0.7740123271942139),
 ("'re", 0.7711707949638367),
 ('talking', 0.7640077471733093),
 ('getting', 0.7607728242874146),
 ('seen', 0.7606708407402039),
 ('why', 0.7542959451675415)]

In [144]:
# сходные слова, синонимы
model.similar_by_word('bedclothes')

[('bedcovers', 0.664415180683136),
 ('bedrolls', 0.6588523387908936),
 ('schoolbags', 0.6473395228385925),
 ('inkwells', 0.6444849371910095),
 ('nightclothes', 0.6198785901069641),
 ('woollens', 0.614671528339386),
 ('bunkmates', 0.6104326248168945),
 ('luggages', 0.6023505330085754),
 ('barcalounger', 0.600226879119873),
 ('sideboards', 0.5999171137809753)]

In [145]:
# на случайные близкие слова и анти-слова
noun = "Little Red Riding Hood took off her clothes and got into bed."
new_noun_1, new_noun_2 = noun, noun
i=5
for token in nlp(noun):
    if token.pos_ in ['NOUN', 'VERB', 'ADV', 'ADJ']:
        m, n = np.random.randint(0, i, 2)

        new_word_1 = model.most_similar(token.text.lower(), topn=i)[m][0]
        new_word_2 = model.most_similar(positive = [token.text.lower(), 'bad'],
                                        negative = ['good'],
                                        topn=i)[n][0]

        new_word_1 = new_word_1.title() if token.text.istitle() else new_word_1
        new_word_2 = new_word_2.title() if token.text.istitle() else new_word_2

        new_noun_1 = new_noun_1.replace(token.text, new_word_1)
        new_noun_2 = new_noun_2.replace(token.text, new_word_2)

print(noun)
print(new_noun_1)
print(new_noun_2)

Little Red Riding Hood took off her clothes and got into bed.
Just Red Riding Hood came off her shoes and 've into mattress.
Too Red Riding Hood followed off her shoes and getting into asleep.


In [146]:
# сходные слова, синонимы
model.similar_by_word('bed')

[('beds', 0.7630816102027893),
 ('sleeping', 0.7616755366325378),
 ('room', 0.7250885963439941),
 ('bedroom', 0.6915375590324402),
 ('mattress', 0.6799733638763428),
 ('sitting', 0.670059084892273),
 ('bathroom', 0.6695295572280884),
 ('sleep', 0.6524335741996765),
 ('couch', 0.6479793787002563),
 ('slept', 0.6461016535758972)]

In [147]:
# сходные слова, синонимы
model.similar_by_word('amazed')

[('astonished', 0.9051270484924316),
 ('astounded', 0.8492513298988342),
 ('thrilled', 0.8213180303573608),
 ('appalled', 0.7945465445518494),
 ('delighted', 0.7768217921257019),
 ('awed', 0.7736601829528809),
 ('horrified', 0.7710898518562317),
 ('intrigued', 0.7709209322929382),
 ('mystified', 0.7645464539527893),
 ('elated', 0.7514320611953735)]

In [148]:
# сходные слова, синонимы
model.similar_by_word('arms')

[('weapons', 0.7807133197784424),
 ('hands', 0.6645297408103943),
 ('hand', 0.6610875725746155),
 ('weapon', 0.624072790145874),
 ('armed', 0.6143966317176819),
 ('arm', 0.6058312058448792),
 ('holding', 0.6043851971626282),
 ('guns', 0.6008465886116028),
 ('iraq', 0.5929712057113647),
 ('u.n.', 0.5912413597106934)]

In [149]:
# на случайные близкие слова и анти-слова
noun = "All the better to hug you with, my dear."
new_noun_1, new_noun_2 = noun, noun
i=5
for token in nlp(noun):
    if token.pos_ in ['NOUN', 'VERB', 'ADV', 'ADJ']:
        m, n = np.random.randint(0, i, 2)

        new_word_1 = model.most_similar(token.text.lower(), topn=i)[m][0]
        new_word_2 = model.most_similar(positive = [token.text.lower(), 'bad'],
                                        negative = ['good'],
                                        topn=i)[n][0]

        new_word_1 = new_word_1.title() if token.text.istitle() else new_word_1
        new_word_2 = new_word_2.title() if token.text.istitle() else new_word_2

        new_noun_1 = new_noun_1.replace(token.text, new_word_1)
        new_noun_2 = new_noun_2.replace(token.text, new_word_2)

print(noun)
print(new_noun_1)
print(new_noun_2)

All the better to hug you with, my dear.
All the make to hugged you with, my hello.
All the getting to goodbye you with, my hello.


In [150]:
for token in nlp('All the better to hug you with, my dear.'):
    if token.pos_=='ADJ':
        print(token.text, token._.inflect('JJS'))
        print(token.text, token._.inflect('JJR'))
        print(token.text, token._.inflect('JJ'))
        print(token.text, token._.inflect('NNS'))

better best
better better
better well
better wells


In [151]:
# сходные слова, синонимы
model.similar_by_word('legs')

[('knees', 0.7576848864555359),
 ('neck', 0.7544339299201965),
 ('limbs', 0.7435048818588257),
 ('torso', 0.718532145023346),
 ('leg', 0.7134101986885071),
 ('abdomen', 0.7132607102394104),
 ('fingers', 0.7129844427108765),
 ('necks', 0.7127721309661865),
 ('toes', 0.7070165276527405),
 ('ankles', 0.7006694674491882)]

In [152]:
for token in nlp('All the better to run with, my child.'):
    if token.pos_=='VERB':
        print(token.text, token._.inflect('VB'))
        print(token.text, token._.inflect('VBZ'))
        print(token.text, token._.inflect('VBD'))


run run
run runs
run ran


In [153]:
# сходные слова, синонимы
model.similar_by_word('run')

[('running', 0.8024195432662964),
 ('runs', 0.7739369869232178),
 ('ran', 0.7490553259849548),
 ('out', 0.7392130494117737),
 ('go', 0.728611171245575),
 ('third', 0.7266499400138855),
 ('allowed', 0.7226610779762268),
 ('first', 0.7184870839118958),
 ('second', 0.7178599834442139),
 ('start', 0.7167410850524902)]

In [154]:
# сходные слова, синонимы
model.similar_by_word('ears')

[('ear', 0.8169093728065491),
 ('eyes', 0.7445225715637207),
 ('noses', 0.7245702743530273),
 ('lips', 0.7063229084014893),
 ('fingers', 0.7009091377258301),
 ('mouths', 0.6857833862304688),
 ('nose', 0.6725651621818542),
 ('legs', 0.664069652557373),
 ('tongue', 0.634791910648346),
 ('tongues', 0.6197501420974731)]

In [155]:
# на случайные близкие слова и анти-слова
noun = "All the better to hug you with, my dear."
new_noun_1, new_noun_2 = noun, noun
i=5
for token in nlp(noun):
    if token.pos_ in ['NOUN', 'VERB', 'ADV', 'ADJ']:
        m, n = np.random.randint(0, i, 2)

        new_word_1 = model.most_similar(token.text.lower(), topn=i)[m][0]
        new_word_2 = model.most_similar(positive = [token.text.lower(), 'bad'],
                                        negative = ['good'],
                                        topn=i)[n][0]

        new_word_1 = new_word_1.title() if token.text.istitle() else new_word_1
        new_word_2 = new_word_2.title() if token.text.istitle() else new_word_2

        new_noun_1 = new_noun_1.replace(token.text, new_word_1)
        new_noun_2 = new_noun_2.replace(token.text, new_word_2)

print(noun)
print(new_noun_1)
print(new_noun_2)

All the better to hug you with, my dear.
All the get to kiss you with, my hello.
All the even to kiss you with, my daddy.


In [156]:
# сходные слова, синонимы
model.similar_by_word('child')

[('children', 0.8553194999694824),
 ('mother', 0.7771131992340088),
 ('parents', 0.7735786437988281),
 ('girl', 0.7634811997413635),
 ('woman', 0.7601762413978577),
 ('boy', 0.7529667019844055),
 ('sex', 0.7461725473403931),
 ('pregnant', 0.7461138963699341),
 ('infant', 0.7323657870292664),
 ('daughter', 0.7290228605270386)]

In [157]:
# на случайные близкие слова и анти-слова
noun = "Grandmother, what big eyes you have!"
new_noun_1, new_noun_2 = noun, noun
i=5
for token in nlp(noun):
    if token.pos_ in ['NOUN', 'VERB', 'ADV', 'ADJ']:
        m, n = np.random.randint(0, i, 2)

        new_word_1 = model.most_similar(token.text.lower(), topn=i)[m][0]
        new_word_2 = model.most_similar(positive = [token.text.lower(), 'bad'],
                                        negative = ['good'],
                                        topn=i)[n][0]

        new_word_1 = new_word_1.title() if token.text.istitle() else new_word_1
        new_word_2 = new_word_2.title() if token.text.istitle() else new_word_2

        new_noun_1 = new_noun_1.replace(token.text, new_word_1)
        new_noun_2 = new_noun_2.replace(token.text, new_word_2)

print(noun)
print(new_noun_1)
print(new_noun_2)

Grandmother, what big eyes you have!
Grandmother, what huge smile you been!
Grandmother, what huge lips you they!


In [158]:
for token in nlp('Grandmother, what big eyes you have!'):
    if token.pos_=='VERB':
        print(token.text, token._.inflect('VB'))
        print(token.text, token._.inflect('VBZ'))
        print(token.text, token._.inflect('VBD'))


have have
have has
have had


In [159]:
# сходные слова, синонимы
model.similar_by_word('with')

[('over', 0.8443337082862854),
 ('both', 0.8336119055747986),
 ('and', 0.8332647681236267),
 ('two', 0.8263282775878906),
 ('well', 0.8233078122138977),
 ('while', 0.8223927021026611),
 ('by', 0.8067958354949951),
 ('as', 0.8017653226852417),
 ('.', 0.7975766062736511),
 ('one', 0.7949095368385315)]

In [160]:
# сходные слова, синонимы
model.similar_by_word('teeth')

[('bones', 0.7548375725746155),
 ('tooth', 0.7203940749168396),
 ('claws', 0.7042812705039978),
 ('jaw', 0.6967788934707642),
 ('jaws', 0.6908931136131287),
 ('legs', 0.6576513648033142),
 ('bone', 0.6443898677825928),
 ('tongue', 0.6401115655899048),
 ('skull', 0.6396577954292297),
 ('skin', 0.6392791271209717)]

In [161]:
# на случайные близкие слова и анти-слова
noun = "All the better to eat you up with."
new_noun_1, new_noun_2 = noun, noun
i=5
for token in nlp(noun):
    if token.pos_ in ['NOUN', 'VERB', 'ADV', 'ADJ']:
        m, n = np.random.randint(0, i, 2)

        new_word_1 = model.most_similar(token.text.lower(), topn=i)[m][0]
        new_word_2 = model.most_similar(positive = [token.text.lower(), 'bad'],
                                        negative = ['good'],
                                        topn=i)[n][0]

        new_word_1 = new_word_1.title() if token.text.istitle() else new_word_1
        new_word_2 = new_word_2.title() if token.text.istitle() else new_word_2

        new_noun_1 = new_noun_1.replace(token.text, new_word_1)
        new_noun_2 = new_noun_2.replace(token.text, new_word_2)

print(noun)
print(new_noun_1)
print(new_noun_2)

All the better to eat you up with.
All the make to eaten you up with.
All the even to consume you up with.


In [162]:
# сходные слова, синонимы
model.similar_by_word('wicked')

[('evil', 0.6611456274986267),
 ('mischievous', 0.6498479247093201),
 ('cruel', 0.6207956075668335),
 ('wacky', 0.5978850722312927),
 ('crazy', 0.5977882146835327),
 ('silly', 0.5966265797615051),
 ('sneaky', 0.5902952551841736),
 ('vicious', 0.5887827277183533),
 ('dumb', 0.5886535048484802),
 ('clever', 0.5797110795974731)]

In [163]:
# сходные слова, синонимы
model.similar_by_word('attractive')

[('desirable', 0.730512261390686),
 ('unattractive', 0.6811944246292114),
 ('alluring', 0.6517535448074341),
 ('appealing', 0.6353302001953125),
 ('relatively', 0.6313267946243286),
 ('cheaper', 0.6293203830718994),
 ('very', 0.6291751861572266),
 ('priced', 0.6290417909622192),
 ('suitable', 0.6281996965408325),
 ('expensive', 0.6248573660850525)]

In [164]:
# сходные слова, синонимы
model.similar_by_word('dinner')

[('breakfast', 0.8415963053703308),
 ('dinners', 0.8142550587654114),
 ('lunch', 0.8033258318901062),
 ('luncheon', 0.7748416662216187),
 ('guests', 0.7565879821777344),
 ('banquet', 0.7558953166007996),
 ('brunch', 0.7384738326072693),
 ('meal', 0.714841902256012),
 ('meals', 0.6837010979652405),
 ('buffet', 0.682805597782135)]

In [165]:
# сходные слова, синонимы
model.similar_by_word('various')

[('numerous', 0.8572206497192383),
 ('several', 0.8507537841796875),
 ('other', 0.8382675647735596),
 ('variety', 0.8348256349563599),
 ('different', 0.8308271765708923),
 ('these', 0.8201767206192017),
 ('such', 0.8062143325805664),
 ('include', 0.7874671220779419),
 ('many', 0.7858366966247559),
 ('including', 0.7760022282600403)]

In [166]:
# сходные слова, синонимы
model.similar_by_word('charming')

[('lovely', 0.805105447769165),
 ('beautiful', 0.7719314098358154),
 ('delightful', 0.7702623009681702),
 ('gorgeous', 0.7595885396003723),
 ('quirky', 0.7593000531196594),
 ('handsome', 0.7494663000106812),
 ('seductive', 0.740882158279419),
 ('amusing', 0.7366154193878174),
 ('likable', 0.7323250770568848),
 ('elegant', 0.7291433811187744)]

In [167]:
# сходные слова, синонимы
model.similar_by_word('streets')

[('street', 0.7399185299873352),
 ('downtown', 0.7268213033676147),
 ('sidewalks', 0.7258997559547424),
 ('crowded', 0.71346116065979),
 ('neighborhoods', 0.6979953050613403),
 ('across', 0.6944648027420044),
 ('lined', 0.6841650605201721),
 ('crowds', 0.6825905442237854),
 ('roads', 0.6752520799636841),
 ('outside', 0.668502151966095)]

In [168]:
# сходные слова, синонимы
model.similar_by_word('gentle')

[('playful', 0.6820974946022034),
 ('charming', 0.64178866147995),
 ('lovely', 0.6381846070289612),
 ('breezy', 0.6363114714622498),
 ('cool', 0.6339117884635925),
 ('pleasant', 0.6335440874099731),
 ('quiet', 0.6328959465026855),
 ('easygoing', 0.6242066025733948),
 ('thoughtful', 0.623829185962677),
 ('wry', 0.6205047965049744)]