# A tour of awesome features of spaCy (part 1/2)

## Getting started

In [1]:
#!pip install spacy
#!python -m spacy download en_core_web_lg

In [1]:
import spacy 
nlp = spacy.load('en_core_web_lg')
doc = nlp("Jon Snow isn't the best dragon rider. But we will let it pass because he only learned last week.")

## Preprocessing

In [2]:
for token in doc[6:8]:
    print('The {}th token is {}'.format(token.i, token.text))

The 6th token is dragon
The 7th token is rider


In [3]:
print(doc[6:8].text)

dragon rider


In [4]:
print(doc[6:8].sent)

Jon Snow isn't the best dragon rider.


In [5]:
for sent in doc.sents:
    print([token.lemma_ for token in sent if not token.is_stop])

['Jon', 'Snow', 'good', 'dragon', 'rider', '.']
['let', 'pass', 'learn', 'week', '.']


In [6]:
doc[6].has_vector

True

In [7]:
doc[6:8].has_vector

True

In [8]:
doc[6:8].similarity(doc[12])

0.29920253

In [9]:
doc_sim = nlp('apple orange chair rumpelstiltskin')
doc_sim[0].similarity(doc_sim[1])

0.5618917

In [10]:
doc_sim[0].similarity(doc_sim[2])

0.17142111

In [11]:
doc_sim[0].similarity(doc_sim[3])

-0.114447676

## Linguistic features

In [12]:
token = doc[5]
print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_)

best good ADJ JJS amod


In [13]:
spacy.explain("ADJ")

'adjective'

In [14]:
spacy.explain("JJS")

'adjective, superlative'

In [15]:
spacy.explain("amod")

'adjectival modifier'

In [16]:
from spacy import displacy
options = {"compact": True, "bg": "orange",
           "color": "black", "font": "Source Sans Pro"}
displacy.render(next(doc.sents) , style="dep", options=options) # use render instead of serve in jupyter 

In [17]:
for ent in doc.ents:
    print(ent.text)

displacy.render(doc, style="ent")

Jon Snow
last week


In [18]:
colors = {"DATE": "red"}
options = {"ents": ["DATE"], "colors": colors}
displacy.render(doc, style="ent", options=options)

## SpaCy universe

In [19]:
from spacy_langdetect import LanguageDetector
nlp.add_pipe(LanguageDetector(), name="lang_detect", last=True)
doc_es = nlp('Habitación blanquísima del interior de la casa.')

print(doc_es._.language['language'])
print(doc_es.lang_)

es
en


In [20]:
import neuralcoref
neuralcoref.add_to_pipe(nlp)
doc = nlp("Jon Snow isn't the best dragon rider. But we will let it pass because he only learned last week.")
doc._.coref_resolved

"Jon Snow isn't the best dragon rider. But we will let the best dragon rider pass because Jon Snow only learned last week."

In [21]:
doc._.coref_clusters

[Jon Snow: [Jon Snow, he], the best dragon rider: [the best dragon rider, it]]

In [22]:
# scores for 'we'
doc._.coref_scores[doc[10:11]]

{we: 0.2482292652130127,
 Jon Snow: -3.0058393478393555,
 the best dragon rider: -2.1889660358428955}

In [23]:
# scores for 'it':
doc._.coref_scores[doc[13:14]]

{it: -0.1293228268623352,
 Jon Snow: -2.065023422241211,
 the best dragon rider: 2.3545587062835693,
 we: -3.415001392364502}

In [24]:
# scores for 'he'
doc._.coref_scores[doc[16:17]]

{he: 0.27577924728393555,
 Jon Snow: 4.285954475402832,
 the best dragon rider: -1.2236950397491455,
 we: -4.129101753234863,
 it: -2.2169408798217773}

In [25]:
nlp.remove_pipe("neuralcoref")
coref = neuralcoref.NeuralCoref(nlp.vocab, greedyness=0.45)
nlp.add_pipe(coref, name='neuralcoref')
doc = nlp("Jon Snow isn't the best dragon rider. But we will let it pass because he only learned last week.")
doc._.coref_resolved

"Jon Snow isn't the best dragon rider. But we will let it pass because Jon Snow only learned last week."