In [1]:
import spacy 
nlp = spacy.load('en_core_web_sm')

Tokenization 

In [2]:
text = "Apple is buying a U.K startup for $1.3 billion. Steve Jobs likes apple pie"
doc = nlp(text)

for token in doc: 
    print(f"{token.text:{12}} {token.pos_:{5}} {token.dep_:{2}}")

Apple        PROPN nsubj
is           AUX   aux
buying       VERB  ROOT
a            DET   det
U.K          PROPN compound
startup      NOUN  dobj
for          ADP   prep
$            SYM   quantmod
1.3          NUM   compound
billion      NUM   pobj
.            PUNCT punct
Steve        PROPN compound
Jobs         PROPN nsubj
likes        VERB  ROOT
apple        NOUN  compound
pie          NOUN  dobj


Part-of-speech tagging and Dependency Parsing

print()
token.text
token.pos_
token.dep

In [3]:
from spacy import displacy 
displacy.render(doc, style='dep', jupyter=True, options={'distance':100})

Named Entity Recognition

In [4]:
for ent in doc.ents: 
    print(f"{ent.text:{12}} {ent.label_}")

Apple        ORG
U.K          ORG
$1.3 billion MONEY
Steve Jobs   PERSON


Semantic Similarity

In [5]:
text2 = "Anybody with a computer and some passion for programming knows a macbook is so much cooler than windows"
doc2 = nlp(text2)
similarity = doc.similarity(doc2)
print(f"{similarity:.2f}")

0.42


  similarity = doc.similarity(doc2)


In [6]:
for token in doc2: 
    print(f"{token.text} {token.pos_} {token.dep_}")

Anybody PRON nsubj
with ADP prep
a DET det
computer NOUN pobj
and CCONJ cc
some DET det
passion NOUN nsubj
for ADP prep
programming NOUN pobj
knows VERB ROOT
a DET det
macbook NOUN nsubj
is AUX ccomp
so ADV advmod
much ADV advmod
cooler ADJ acomp
than ADP prep
windows NOUN pobj


In [8]:
for ent in doc2.ents: 
    print(f"{ent.text} {ent.label_}")

Film recommmendation programme

In [11]:
films = { 
    "Film 1" : "A sci-fi adventure with satirical undertones",
    "Film 2": "A comedy about identity theft",
    "Film 3": "A documentary following the end of the world as we know it"
}

query = "A laugh"

# Create new dictionary to store semantic similarity scores between film descriptions (films.keys) and the query variable
scores = {}

# Apply the spaCy model onto the query 
y = nlp(query)
# Iterate through the films dictionary, to compute similarity scores between film descriptions and the query. 
for film, description in films.items(): 
    x = nlp(description)
    # Compute similarity
    similarity = y.similarity(x)
    # The film key is assigned a value of the similarity score.
    scores[film] = int(similarity)

# Recommend film with the highest similarity score. 
recommended_film = max(scores, key=scores.get)
print(f"{recommended_film}")



Film 1


  similarity = y.similarity(x)


Further Topics 
* Dependency Parsing
* Custom NER models
* Integrating spaCy with ML frameworks for comprehensive NLP solutions