# Section 06 - Semantics and Sentiment Analysis

## Semantics and Word Vectors with Spacy

In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_lg')

In [11]:
nlp(u'phone').vector.shape

(300,)

In [30]:
tokens = nlp(u'girl boy green')

In [31]:
for token1 in tokens:
    for token2 in tokens:
        print(token1.text, token2.text, token1.similarity(token2))

girl girl 1.0
girl boy 0.8148324
girl green 0.28234044
boy girl 0.8148324
boy boy 1.0
boy green 0.2526257
green girl 0.28234044
green boy 0.2526257
green green 1.0


In [32]:
len(nlp.vocab.vectors)

684831

In [33]:
a = len(nlp.vocab.vectors)*300

In [34]:
a

205449300

In [45]:
tokens = nlp(u"dog cat edward")

In [46]:
for token in tokens:
    print(token.text, token.has_vector, token.vector_norm, token.is_oov)

dog True 7.0336733 False
cat True 6.6808186 False
edward True 6.590808 False


In [47]:
from scipy import spatial

In [53]:
cosine_similarity = lambda vec1, vec2 : 1 - spatial.distance.cosine(vec1,vec2)

In [57]:
king = nlp.vocab['king'].vector
royal = nlp.vocab['man'].vector
woman = nlp.vocab['woman'].vector

In [58]:
new_vector = king - royal + woman

In [52]:
new_vector.shape

(300,)

In [59]:
computed_similarities = []

# FOR ALL WORDS IN MY VOCAB
for word in nlp.vocab:
    if word.has_vector:
        if word.is_lower:
            if word.is_alpha:
                similarity = cosine_similarity(new_vector, word.vector)
                computed_similarities.append((word, similarity))
    

In [60]:
computed_similarities = sorted(computed_similarities, key=lambda item:-item[1])

In [65]:
print([t[0].text for t in computed_similarities[50:59]])

['herself', 'crowned', 'mattress', 'wives', 'victoria', 'women', 'egyptian', 'lords', 'lesbian']
