<a href="https://colab.research.google.com/github/adnan855570/Natural_language_Processing/blob/main/Nlp_Pipeline_using_Spacy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import spacy

In [None]:
nlp = spacy.blank("en")

doc = nlp("Captain ameria is part of Avengers")

for token in doc:
    print(token)

Captain
ameria
is
part
of
Avengers


In [None]:
nlp.pipe_names

[]

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [None]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x78fd158dad40>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x78fd158db0a0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x78fda8e84350>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x78fd15607c40>),
 ('lemmatizer',
  <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x78fd155f3f80>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x78fda8e86650>)]

In [None]:
doc = nlp("Captain america is part of Avengers")

for token in doc:
    print(token, "|" , token.pos_, "|", token.lemma_)

Captain | PROPN | Captain
america | PROPN | america
is | AUX | be
part | NOUN | part
of | ADP | of
Avengers | PROPN | Avengers


# Name Entity Recognition

In [None]:
doc = nlp("Tesla is going to aquire twitter for $4 billion")

for ent in doc.ents:
    print(ent.text, "|", ent.label_, "|", spacy.explain(ent.label_))


Tesla | ORG | Companies, agencies, institutions, etc.
$4 billion | MONEY | Monetary values, including unit


## For rendering or displaying it nicely

In [None]:
from spacy import displacy

displacy.render(doc , style="ent")

# Spacy Language Processing Pipelines: Exercises

In [4]:
#importing necessary libraries
import spacy
from collections import Counter

nlp = spacy.load("en_core_web_sm")  #creating an object and loading the pre-trained model for "English"

Excersie: 1
Get all the proper nouns from a given text in a list and also count how many of them.
# Proper Noun means a noun that names a particular person, place, or thing.

In [5]:
text = '''Ravi and Raju are the best friends from school days.They wanted to go for a world tour and
visit famous cities like Paris, London, Dubai, Rome etc and also they called their another friend Mohan to take part of this world tour.
They started their journey from Hyderabad and spent next 3 months travelling all the wonderful cities in the world and cherish a happy moments!
'''

# https://spacy.io/usage/linguistic-features

#creating the nlp object
doc = nlp(text)

# Extract nouns from the processed text
nouns = [token.text for token in doc if token.pos_ == "NOUN"]

# Print the extracted nouns
print("Nouns:", nouns)

# Extract nouns and count their occurrences
noun_counter = Counter(token.text for token in doc if token.pos_ == "NOUN")

# Print the noun count
print("Noun Count:", noun_counter)

Nouns: ['Ravi', 'friends', 'school', 'days', 'world', 'tour', 'cities', 'friend', 'part', 'world', 'tour', 'journey', 'months', 'cities', 'world', 'moments']
Noun Count: Counter({'world': 3, 'tour': 2, 'cities': 2, 'Ravi': 1, 'friends': 1, 'school': 1, 'days': 1, 'friend': 1, 'part': 1, 'journey': 1, 'months': 1, 'moments': 1})


Excersie: 2
Get all companies names from a given text and also the count of them.
# Hint: Use the spacy ner functionality

In [6]:
text = '''The Top 5 companies in USA are Tesla, Walmart, Amazon, Microsoft, Google and the top 5 companies in
India are Infosys, Reliance, HDFC Bank, Hindustan Unilever and Bharti Airtel'''


doc = nlp(text)

# Extract company names
company_names = [entity.text for entity in doc.ents if entity.label_ == "ORG"]

# Print the extracted company names
print("Company Names:", company_names)

Company Names: ['Tesla', 'Walmart', 'Amazon', 'Microsoft', 'Google', 'Infosys', 'Reliance', 'HDFC Bank', 'Hindustan Unilever', 'Bharti']
