### Installing Spacy package

In [1]:
#! pip install --user spacy

### Importing required libraries:

In [2]:
import spacy
nlp = spacy.load("en_core_web_sm")
#nlp = spacy.load("en_core_web_md")


### Token Analysis

In [3]:
text = "Mr. Smith made a deal on a beach of Switzerland near WHO."

doc = nlp(text)  

print("token analysis\n")
for token in doc:
    print("text: "+token.text+", is start: "+str(token.is_sent_start)+"\n POS: "+token.pos_+", LEMMA: " + token.lemma_)
  

token analysis

text: Mr., is start: True
 POS: PROPN, LEMMA: Mr.
text: Smith, is start: False
 POS: PROPN, LEMMA: Smith
text: made, is start: False
 POS: VERB, LEMMA: make
text: a, is start: False
 POS: DET, LEMMA: a
text: deal, is start: False
 POS: NOUN, LEMMA: deal
text: on, is start: False
 POS: ADP, LEMMA: on
text: a, is start: False
 POS: DET, LEMMA: a
text: beach, is start: False
 POS: NOUN, LEMMA: beach
text: of, is start: False
 POS: ADP, LEMMA: of
text: Switzerland, is start: False
 POS: PROPN, LEMMA: Switzerland
text: near, is start: False
 POS: ADP, LEMMA: near
text: WHO, is start: False
 POS: PRON, LEMMA: who
text: ., is start: False
 POS: PUNCT, LEMMA: .


In [4]:
for token in doc:
    print(token.text+", is a stopword? "+str(token.is_stop)+"\n alphab. chars: "+str(token.is_alpha))
    
    

Mr., is a stopword? False
 alphab. chars: False
Smith, is a stopword? False
 alphab. chars: True
made, is a stopword? True
 alphab. chars: True
a, is a stopword? True
 alphab. chars: True
deal, is a stopword? False
 alphab. chars: True
on, is a stopword? True
 alphab. chars: True
a, is a stopword? True
 alphab. chars: True
beach, is a stopword? False
 alphab. chars: True
of, is a stopword? True
 alphab. chars: True
Switzerland, is a stopword? False
 alphab. chars: True
near, is a stopword? False
 alphab. chars: True
WHO, is a stopword? True
 alphab. chars: True
., is a stopword? False
 alphab. chars: False


## Noun Chunks

Spacy automatically finds the noun chunks in the sentence 

In [5]:
def noun_chunks(doc):
    print("chunk analysis\n")
    num_chunks = 0
    for chunk in doc.noun_chunks:
        num_chunks = num_chunks + 1
        print("chunk text: "+chunk.text+"\n root text: "+chunk.root.text+", root dep: "+ chunk.root.dep_+", root head: "+chunk.root.head.text+"\n\n")
    num_chunks = len(list(doc.noun_chunks))
    print("total number of chunks: "+str(num_chunks))
    


In [6]:
text = "Mr. Smith made a deal on a beach of Switzerland near WHO."
print(text)
print("\n")

doc = nlp(text)  

print("chunk analysis\n")
num_chunks = 0
for chunk in doc.noun_chunks:
    num_chunks = num_chunks + 1
    print("chunk text: "+chunk.text+"\n root text: "+chunk.root.text+", root dep: "+ chunk.root.dep_+", root head: "+chunk.root.head.text+"\n\n")
num_chunks = len(list(doc.noun_chunks))
print("total number of chunks: "+str(num_chunks))
    

Mr. Smith made a deal on a beach of Switzerland near WHO.


chunk analysis

chunk text: Mr. Smith
 root text: Smith, root dep: nsubj, root head: made


chunk text: a deal
 root text: deal, root dep: dobj, root head: made


chunk text: a beach
 root text: beach, root dep: pobj, root head: on


chunk text: Switzerland
 root text: Switzerland, root dep: pobj, root head: of


chunk text: WHO
 root text: WHO, root dep: pobj, root head: near


total number of chunks: 5


### NER with Spacy

In [7]:
doc = nlp("Mr. Smith made a deal on a beach of Switzerland near WHO.")  

for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

num_ents = len(doc.ents)
print("Tot. num. of recognized named entities: "+str(num_ents)+"\n")

Smith 4 9 PERSON
Switzerland 36 47 GPE
Tot. num. of recognized named entities: 2



Spacy misses to recognize the organization WHO