# Importing spaCy

In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_lg')

# Loading some text into spaCy

In [3]:
text = 'I recently had a meal at the new Italian restaurant across the street. The food was delicious! ' \
       'They have an impressive selection of authentic pizzas made with real Italian buffalo Mozzarella cheese. ' \
       'Of course, their range of pasta dishes should not be overlooked. The portion of perfectly cooked ' \
       'spaghetti in a creamy garlic and mushroom sauce that I had was probably one of the best dishes I\'ve ' \
       'tasted.'
text

"I recently had a meal at the new Italian restaurant across the street. The food was delicious! They have an impressive selection of authentic pizzas made with real Italian buffalo Mozzarella cheese. Of course, their range of pasta dishes should not be overlooked. The portion of perfectly cooked spaghetti in a creamy garlic and mushroom sauce that I had was probably one of the best dishes I've tasted."

In [4]:
text_doc = nlp(text)

# Splitting into sentences

In [5]:
sentence_list = list(text_doc.sents)

In [6]:
len(sentence_list)

5

In [7]:
sentence_list[4]

The portion of perfectly cooked spaghetti in a creamy garlic and mushroom sauce that I had was probably one of the best dishes I've tasted.

In [8]:
len(sentence_list[4])

27

In [10]:
sentence_list[4][25]

tasted

In [11]:
sentence_list[4][25].lemma_

'taste'

# Part of speech tagging

In [12]:
sentence = sentence_list[0]
sentence

I recently had a meal at the new Italian restaurant across the street.

In [13]:
token = sentence[0]
token, token.pos_

(I, 'PRON')

In [14]:
part_of_speech_list = [(token, token.pos_) for token in sentence]

In [15]:
part_of_speech_list

[(I, 'PRON'),
 (recently, 'ADV'),
 (had, 'VERB'),
 (a, 'DET'),
 (meal, 'NOUN'),
 (at, 'ADP'),
 (the, 'DET'),
 (new, 'ADJ'),
 (Italian, 'ADJ'),
 (restaurant, 'NOUN'),
 (across, 'ADP'),
 (the, 'DET'),
 (street, 'NOUN'),
 (., 'PUNCT')]

# Sentence dependencies

In [16]:
sentence

I recently had a meal at the new Italian restaurant across the street.

In [17]:
token = sentence[2]
token, list(token.children)

(had, [I, recently, meal, .])

In [18]:
token, list(token.ancestors)

(had, [])

In [19]:
dependencies_list = [(token, list(token.ancestors)) for token in sentence]

In [20]:
dependencies_list

[(I, [had]),
 (recently, [had]),
 (had, []),
 (a, [meal, had]),
 (meal, [had]),
 (at, [meal, had]),
 (the, [restaurant, at, meal, had]),
 (new, [restaurant, at, meal, had]),
 (Italian, [restaurant, at, meal, had]),
 (restaurant, [at, meal, had]),
 (across, [restaurant, at, meal, had]),
 (the, [street, across, restaurant, at, meal, had]),
 (street, [across, restaurant, at, meal, had]),
 (., [had])]

In [21]:
from spacy import displacy

In [22]:
displacy.serve(sentence)


    Serving on port 5000...
    Using the 'dep' visualizer



127.0.0.1 - - [08/Dec/2018 13:38:58] "GET / HTTP/1.1" 200 9863
127.0.0.1 - - [08/Dec/2018 13:38:58] "GET /favicon.ico HTTP/1.1" 200 9863



    Shutting down server on port 5000.



# Word vectors

In [23]:
words = ['street', 'pizza', 'spaghetti']

In [24]:
words_docs = [nlp(word) for word in words]

In [25]:
street = words_docs[0]
pizza = words_docs[1]
spaghetti = words_docs[2]

In [26]:
street, pizza, spaghetti

(street, pizza, spaghetti)

In [27]:
pizza.vector

array([ 0.0068727, -0.21634  ,  0.27831  , -0.26192  ,  0.22884  ,
        0.89332  ,  0.4131   ,  0.27377  ,  0.22652  ,  1.5041   ,
       -0.58059  ,  0.56083  , -0.18432  ,  0.27738  , -0.10709  ,
       -0.13519  ,  0.023817 ,  1.1765   , -0.12659  ,  0.043173 ,
        0.23242  , -0.63213  ,  0.40228  , -0.20605  ,  0.46381  ,
       -0.12991  , -0.68031  , -0.010371 ,  0.50033  , -0.32266  ,
        0.24053  ,  0.40178  ,  0.12051  , -0.13791  ,  0.40821  ,
        0.54735  , -0.25946  ,  0.020254 ,  0.21249  ,  0.91965  ,
       -0.21202  ,  0.66568  ,  0.25879  , -0.36124  , -0.10977  ,
        0.87492  , -0.089425 ,  0.39184  , -0.32589  , -0.22331  ,
       -0.17504  ,  0.074762 ,  0.45271  ,  0.085476 , -0.079526 ,
       -0.23986  , -0.010322 ,  0.089974 ,  0.29794  ,  0.26672  ,
       -0.044288 , -0.082716 ,  0.20801  ,  0.38404  ,  0.15281  ,
       -1.1292   , -0.094527 ,  0.16901  , -0.018155 ,  0.31023  ,
       -0.095716 ,  0.32587  , -0.2225   , -0.040376 , -0.5220

In [28]:
pizza.similarity(spaghetti)

0.5517106029170449

In [29]:
pizza.similarity(street)

0.3441291452936163

In [30]:
fake_word = 'qwertyuio'
nlp(fake_word).vector

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.