# Natural language processing with spaCy

In [None]:
import spacy

# Load the English NLP model
nlp = spacy.load("en_core_web_md")

# Sample text for processing
text = "Apple is looking at buying U.K. startup for $1 billion"

# The nlp object processes the text
doc = nlp(text)

In [None]:
# Tokenization
# spaCy automatically tokenizes the text into words or tokens when we process the text through the nlp object
print("Tokenization:")
for token in doc:
    print(token.text)

In [None]:
# POS Tagging
# Each token in "doc" has various attributes. Here, we use .pos_ to get the part-of-speech tag.
print("\nPart-of-Speech Tagging:")
for token in doc:
    print(token.text, token.pos_)

In [None]:
# Lemmatization
# Lemmatization is the process of reducing a word to its base form. spaCy provides the .lemma_ attribute.
print("\nLemmatization:")
for token in doc:
    print(token.text, token.lemma_)

In [None]:
# Named Entity Recognition (NER)
# spaCy can recognize various types of named entities in a text, which can be accessed through .ents property.
print("\nNamed Entity Recognition:")
for ent in doc.ents:
    print(ent.text, ent.label_)

In [None]:
# Sentence Similarity
# spaCy can compare two objects and predict similarity. 
# Note: Use a larger model for better accuracy at similarity predictions, such as "en_core_web_md" or "en_core_web_lg".
print("\nSentence Similarity:")
doc1 = nlp("I like salty fries and hamburgers.")
doc2 = nlp("Fast food tastes very good.")
similarity = doc1.similarity(doc2)
print("Similarity between sentences:", similarity)

In [None]:
print("\nSentence Similarity:")
doc1 = nlp("John gave Mary a gift.")
doc2 = nlp("John gave a gift to Mary.")
similarity = doc1.similarity(doc2)
print("Similarity between sentences:", similarity)

In [None]:
print("\nSentence Similarity:")
doc1 = nlp("Yesterday was a really nice day.")
doc2 = nlp("Switzerland is located in the middle of Europe.")
similarity = doc1.similarity(doc2)
print("Similarity between sentences:", similarity)

In [None]:
# Dependency Parsing
# spaCy provides token dependency labels which can be used to understand the relationship between tokens.
print("\nDependency Parsing:")
for token in doc:
    print(token.text, token.dep_, token.head.text)

In [None]:
# Visualizing with displaCy
# spaCy offers an excellent visualizer called displaCy. You can use it to visualize the dependency parse or named entities.
from spacy import displacy

print("\nVisualizing Dependency Parse:")
displacy.render(doc, style="dep", jupyter=True)

In [None]:
print("\nVisualizing Named Entities:")
displacy.render(doc, style="ent", jupyter=True)

<b> There is much more that can be done with spaCy. Check the documentation https://spacy.io/ </b>