## Spacy Vectors

In [40]:
import spacy

In [41]:
nlp = spacy.load('en_core_web_lg')

In [42]:
nlp(u'lion kills a deer').vector.shape

(300,)

In [43]:
tokens = nlp(u'css love hate html')

In [44]:
for token1 in tokens:
    for token2 in tokens:
        print(token1.text, token2.text, token1.similarity(token2))

css css 1.0
css love -0.03444082289934158
css hate 0.008504980243742466
css html 0.9298768639564514
love css -0.03444082289934158
love love 1.0
love hate 0.5708349943161011
love html -0.02205815725028515
hate css 0.008504980243742466
hate love 0.5708349943161011
hate hate 1.0
hate html 0.04499774053692818
html css 0.9298768639564514
html love -0.02205815725028515
html hate 0.04499774053692818
html html 1.0


In [64]:
nlp.vocab.vectors.shape

(514157, 300)

In [46]:
tokens = nlp(u'dog cat asdlkj')

In [47]:
for token in tokens:
    print(token.text, token.has_vector, token.vector_norm, token.is_oov)

dog True 75.254234 False
cat True 63.188496 False
asdlkj False 0.0 True


In [48]:
from scipy import spatial

In [49]:
cosine_similarity = lambda vec1, vec2: 1 - spatial.distance.cosine(vec1, vec2)

In [59]:
king = nlp.vocab['king'].vector
man = nlp.vocab['man'].vector
woman = nlp.vocab['woman'].vector
queen = nlp.vocab['queen'].vector

In [60]:
# king - man + women ===> NEW_VECTOR should be similar to queen
new_vector = king - man + woman

In [66]:
computed_similarities = []

# iterate over all words and find the most similar
for word_string in nlp.vocab.strings:
    word = nlp.vocab[word_string]
    if word.has_vector:
        if word.is_lower:
            if word.is_alpha:
                similarity = cosine_similarity(new_vector, word.vector)
                computed_similarities.append((word.text, similarity))

In [67]:
computed_similarities = sorted(computed_similarities, key=lambda item: -item[1])

In [68]:
computed_similarities[:10]

[('king', 0.8489541411399841),
 ('kings', 0.7189058065414429),
 ('princes', 0.709700882434845),
 ('consort', 0.707391083240509),
 ('princeling', 0.7026306986808777),
 ('monarch', 0.6899287104606628),
 ('princelings', 0.687301516532898),
 ('princesses', 0.6814026832580566),
 ('prince', 0.6562830209732056),
 ('kingship', 0.649807333946228)]

## NLTK Sentiment (VADER) Analysis