## Spacy

In [1]:
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(u"This is a sentence.")

In [3]:
doc.cats

{}

In [7]:
doc.ents

()

In [12]:
doc.count_by(1)

{1: 4, 0: 1}

In [15]:
doc.has_vector

True

In [16]:
doc.get_lca_matrix()

array([[0, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 2, 3, 1],
       [1, 1, 3, 3, 1],
       [1, 1, 1, 1, 4]], dtype=int32)

In [17]:
doc.is_nered

True

In [18]:
doc.is_parsed

True

In [19]:
doc.is_sentenced

True

In [20]:
doc.is_tagged

True

In [21]:
doc.lang

14626626061804382878

In [22]:
doc.lang_

'en'

In [25]:
doc.mem.size

3600

In [26]:
# doc.merge()

<function Doc.merge>

In [30]:
list(doc.noun_chunks)

[a sentence]

In [33]:
doc.to_json()

{'text': 'This is a sentence.',
 'ents': [],
 'sents': [{'start': 0, 'end': 19}],
 'tokens': [{'id': 0,
   'start': 0,
   'end': 4,
   'pos': 'DET',
   'tag': 'DT',
   'dep': 'nsubj',
   'head': 1},
  {'id': 1,
   'start': 5,
   'end': 7,
   'pos': 'VERB',
   'tag': 'VBZ',
   'dep': 'ROOT',
   'head': 1},
  {'id': 2,
   'start': 8,
   'end': 9,
   'pos': 'DET',
   'tag': 'DT',
   'dep': 'det',
   'head': 3},
  {'id': 3,
   'start': 10,
   'end': 18,
   'pos': 'NOUN',
   'tag': 'NN',
   'dep': 'attr',
   'head': 1},
  {'id': 4,
   'start': 18,
   'end': 19,
   'pos': 'PUNCT',
   'tag': '.',
   'dep': 'punct',
   'head': 1}]}

In [35]:
# doc.retokenize()

In [36]:
doc.sentiment

0.0

In [38]:
list(doc.sents)

[This is a sentence.]

In [40]:
doc.similarity()

TypeError: similarity() takes exactly one argument (0 given)

In [46]:
doc2 = nlp("I like apple")
doc3 = nlp("I really like apple")
doc4 = nlp("I am happy")

In [47]:
doc2.similarity(doc3)

  "__main__", mod_spec)


0.8210044626576248

In [48]:
doc2.similarity(doc4)

  "__main__", mod_spec)


0.5580645864544086

In [50]:
doc.tensor.shape

(5, 96)

In [None]:
doc.tensor.shape

In [51]:
doc.text

'This is a sentence.'

In [52]:
doc.text_with_ws

'This is a sentence.'

In [53]:
doc.to_array()

TypeError: to_array() takes exactly one argument (0 given)

In [54]:
from spacy.attrs import LOWER, POS, ENT_TYPE, IS_ALPHA
doc.to_array([LOWER, POS, ENT_TYPE, IS_ALPHA])

array([[ 1995909169258310477,                   90,                    0,
                           1],
       [ 3411606890003347522,                  100,                    0,
                           1],
       [11901859001352538922,                   90,                    0,
                           1],
       [18108853898452662235,                   92,                    0,
                           1],
       [12646065887601541794,                   97,                    0,
                           0]], dtype=uint64)

In [55]:
doc.user_data

{}

In [57]:
doc.vector.shape

(96,)

In [58]:
doc.vocab

<spacy.vocab.Vocab at 0x7fc96991bd68>

In [59]:
doc.vocab.

<spacy.strings.StringStore at 0x7fc964071378>

In [62]:
doc.vector_norm

15.612124236591061

In [63]:
doc.vector

array([-2.4739090e-01, -2.2242405e+00, -1.2546921e+00,  1.5891397e+00,
       -3.5299918e-01, -8.9434383e-04,  1.0014489e+00,  2.9044402e-01,
        1.4433568e+00,  6.4503215e-02, -3.6572689e-01,  1.1406496e+00,
       -2.5103126e+00, -3.4203529e-03, -4.3100849e-01, -6.6699076e-01,
       -5.5039547e-02,  2.1303937e+00,  2.7755997e+00, -1.6229352e-01,
       -1.2664936e+00,  4.4629017e-01,  2.2127328e+00,  2.4990892e+00,
       -2.6725236e-01,  1.0778682e+00,  5.6172508e-01, -2.7671616e+00,
        1.3703600e+00,  1.3144598e+00,  1.6512916e+00, -4.2029399e-01,
       -1.8267158e+00, -2.8986907e+00,  5.1972121e-01,  3.7608218e-01,
       -9.3003944e-02,  1.4119371e+00, -4.1463251e+00,  1.6257260e+00,
       -1.9995983e-01,  1.3485092e+00, -1.1281586e+00,  4.1449952e+00,
       -1.4170350e+00,  5.1590139e-01,  2.0668216e+00,  8.3228505e-01,
       -1.4188968e-01, -3.0260724e-01, -2.0721081e-01, -5.9967995e-01,
       -2.5865459e+00, -1.2352524e-01, -3.2313965e-02, -4.5055438e-02,
      

In [76]:
import numpy as np

def nlpfy(x): 
    if isinstance(x, str):  
        return nlp(x)
    else:
        return x

def dotProduct(x,y, show=True):    
    x = nlpfy(x)
    y = nlpfy(y)
    
    if show:
        print(x.text)
        print(y.text)
    return np.dot(x.vector, y.vector)

In [77]:
dotProduct(doc, "hello world")

This is a sentence.
hello world


-5.797365

In [79]:
dotProduct("Apple", "Orange")

Apple
Orange


411.619

In [80]:
dotProduct("I love Apples", "Orange are not bad as well")

I love Apples
Orange are not bad as well


30.72254