In [22]:
import spacy
from tabulate import tabulate
from spacy import displacy

In [3]:
nlp = spacy.load('en_core_web_lg')

In [7]:
text = ("When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him         seriously. “I can tell you very senior CEOs of major American         car companies would shake my hand and turn away because I wasn’t         worth talking to,” said Thrun, now the co-founder and CEO of        online higher education startup Udacity, in an interview with  Recode earlier this week.")
doc = nlp(text)

In [8]:
for entity in doc.ents:
    print(entity.text, entity.label_)

Sebastian Thrun PERSON
Google ORG
2007 DATE
American NORP
Thrun PERSON
Udacity PERSON
Recode PERSON
earlier this week DATE


In [21]:
print(tabulate([[token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
          token.shape_, token.is_alpha, token.is_stop]for token in doc], 
               headers=['text', 'lemma_', 'pos_', 'tag_', 'dep_', 'shape_', 'is_alpha', 'is_stop']))

text       lemma_     pos_    tag_    dep_      shape_    is_alpha    is_stop
---------  ---------  ------  ------  --------  --------  ----------  ---------
When       when       ADV     WRB     advmod    Xxxx      True        False
Sebastian  sebastian  PROPN   NNP     compound  Xxxxx     True        False
Thrun      thrun      PROPN   NNP     nsubj     Xxxxx     True        False
started    start      VERB    VBD     advcl     xxxx      True        False
working    work       VERB    VBG     xcomp     xxxx      True        False
on         on         ADP     IN      prep      xx        True        False
self       self       NOUN    NN      npadvmod  xxxx      True        False
-          -          PUNCT   HYPH    punct     -         False       False
driving    drive      VERB    VBG     amod      xxxx      True        False
cars       car        NOUN    NNS     pobj      xxxx      True        False
at         at         ADP     IN      prep      xx        True        False
Google

In [23]:
displacy.serve(doc, style='dep')


    Serving on port 5000...
    Using the 'dep' visualizer



127.0.0.1 - - [04/Sep/2018 23:06:12] "GET / HTTP/1.1" 200 56634
127.0.0.1 - - [04/Sep/2018 23:06:13] "GET /favicon.ico HTTP/1.1" 200 56634



    Shutting down server on port 5000.



In [24]:
print(tabulate([[ent.text, ent.start_char, ent.end_char, ent.label_]for ent in doc.ents], 
               headers=['text', 'start_char', 'end_char', 'label_']))

text                 start_char    end_char  label_
-----------------  ------------  ----------  --------
Sebastian Thrun               5          20  PERSON
Google                       61          67  ORG
2007                         71          75  DATE
American                    181         189  NORP
Thrun                       295         300  PERSON
Udacity                     371         378  PERSON
Recode                      402         408  PERSON
earlier this week           409         426  DATE


In [27]:
displacy.render(doc, style='ent', jupyter=True)

In [28]:
displacy.render(doc, style='dep', jupyter=True)

In [29]:
print(tabulate([[token1.text, token2.text, token1.similarity(token2)] for token1 in doc for token2 in doc], 
               headers=['token1', 'token2', 'similarity']))

token1     token2       similarity
---------  ---------  ------------
When       When        1
When       Sebastian  -0.00702033
When       Thrun       0
When       started     0.598991
When       working     0.573928
When       on          0.425128
When       self        0.400002
When       -           0.0865491
When       driving     0.48581
When       cars        0.330908
When       at          0.363292
When       Google      0.276873
When       in          0.487571
When       2007        0.13787
When       ,           0.349346
When       few         0.600293
When       people      0.585984
When       outside     0.553441
When       of          0.455727
When       the         0.619606
When       company     0.412777
When       took        0.63257
When       him         0.683443
When                   0
When       seriously   0.555802
When       .           0.437521
When       “           0.30398
When       I           0.515064
When       can         0.614315
When       tell        0

.          .           1


In [30]:
tokens = nlp(u'dog cat banana afskfsd')

for token in tokens:
    print(token.text, token.has_vector, token.vector_norm, token.is_oov)

dog True 7.0336733 False
cat True 6.6808186 False
banana True 6.700014 False
afskfsd False 0.0 True


In [34]:
spacy.info()


    Info about spaCy

    spaCy version      2.0.12         
    Location           I:\ML\Anaconda3\envs\dlwin36\lib\site-packages\spacy
    Platform           Windows-10-10.0.17134-SP0
    Python version     3.6.6          
    Models                            



{'spaCy version': '2.0.12',
 'Location': 'I:\\ML\\Anaconda3\\envs\\dlwin36\\lib\\site-packages\\spacy',
 'Platform': 'Windows-10-10.0.17134-SP0',
 'Python version': '3.6.6',
 'Models': ''}

In [None]:
#Models
#https://github.com/explosion/spacy-models/releases/download/en_vectors_web_lg-2.0.0/en_vectors_web_lg-2.0.0.tar.gz
#https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.0.0/en_core_web_lg-2.0.0.tar.gz