In [1]:
import spacy

In [2]:
# Import the spaCy library, which is used for natural language processing.
nlp = spacy.blank('en')
doc = nlp(" Employee receives 100$ tip. Then he said thank you.")

In [3]:
# Process the text with the blank model, creating a 'doc' object that contains tokens.
for token in doc:
  print(token)

 
Employee
receives
100
$
tip
.
Then
he
said
thank
you
.


In [4]:
# pipe is empty or blanck
nlp.pipe_names

[]

In [7]:
nlp= spacy.load("en_core_web_sm")


In [8]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x71d7e0339480>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x71d7e03397e0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x71d7e3f538b0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x71d7e058cc80>),
 ('lemmatizer',
  <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x71d7e0350ac0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x71d7e3f537d0>)]

In [9]:
# List the names of the pipeline components in the loaded model. This will include 'tagger', 'parser', and 'ner'.
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [10]:
text = "Employee receives 100$ tip. Then he said thank you."

doc = nlp(text)

"""
# Iterate over each token in the 'doc' and print the token,
its part of speech (POS) tag, and its lemma.
# 'token.pos_' provides the POS tag, and 'token.lemma_'
provides the lemma (base form) of the token.
"""
for token in doc:
  print(token, " | ", token.pos_, " | ", token.lemma_)

Employee  |  NOUN  |  employee
receives  |  VERB  |  receive
100  |  NUM  |  100
$  |  NUM  |  $
tip  |  NOUN  |  tip
.  |  PUNCT  |  .
Then  |  ADV  |  then
he  |  PRON  |  he
said  |  VERB  |  say
thank  |  VERB  |  thank
you  |  PRON  |  you
.  |  PUNCT  |  .


## How to use French language using spacy

In [11]:
import spacy

# Load the French small model
nlp = spacy.load("fr_core_news_sm")

In [12]:
# Process the text with the loaded model
doc = nlp("Employer reçoit 100$ de pourboire. Et il dit merci.")

# Iterate over each token in the doc and print the token, its POS tag, and its lemma
for token in doc:
    print(token, " | ", token.pos_, " | ", token.lemma_)

Employer  |  PROPN  |  Employer
reçoit  |  VERB  |  recevoir
100  |  NUM  |  100
$  |  NOUN  |  dollar
de  |  ADP  |  de
pourboire  |  NOUN  |  pourboire
.  |  PUNCT  |  .
Et  |  CCONJ  |  et
il  |  PRON  |  il
dit  |  VERB  |  dire
merci  |  NOUN  |  merci
.  |  PUNCT  |  .


## Entities

In [13]:
# Process the text
doc = nlp("Apple is looking at buying U.K. startup for $1 billion. John Doe, the CEO, confirmed this on Monday.")

# Iterate over the named entities in the doc
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

Apple  |  ORG  |  Companies, agencies, institutions, etc.
buying U.K. startup for  |  MISC  |  Miscellaneous entities, e.g. events, nationalities, products or works of art
John Doe  |  PER  |  Named person or family.
confirmed this on Monday  |  PER  |  Named person or family.


## Named Entity Recognition, or NER for short

It is a subtask of NLP that focuses on identifying and classifying entities within textual data.

In [14]:
# Ensure that the NER component is in the pipeline
if 'ner' in nlp.pipe_names:
    print("NER component is present in the pipeline.")

# Iterate over the named entities in the doc
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))


NER component is present in the pipeline.
Apple  |  ORG  |  Companies, agencies, institutions, etc.
buying U.K. startup for  |  MISC  |  Miscellaneous entities, e.g. events, nationalities, products or works of art
John Doe  |  PER  |  Named person or family.
confirmed this on Monday  |  PER  |  Named person or family.
