In [None]:
import spacy

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
paragraph = '''SpaCy is an open-source natural language processing library written in Python.
It is designed to be fast, efficient, and easy to use, making it a popular choice for developers working on NLP tasks.
SpaCy provides a wide range of functionalities for various NLP tasks, such as tokenization, part-of-speech tagging, named entity recognition, dependency parsing, and more.
Here's a basic example of an NLP program using SpaCy to perform tokenization, part-of-speech tagging, and named entity recognition (NER) on a given paragraph:'''

In [None]:
doc = nlp(paragraph)

In [None]:
print("Text of each token:")
for token in doc:
    print(token.text)

Text of each token:
SpaCy
is
an
open
-
source
natural
language
processing
library
written
in
Python
.


It
is
designed
to
be
fast
,
efficient
,
and
easy
to
use
,
making
it
a
popular
choice
for
developers
working
on
NLP
tasks
.


SpaCy
provides
a
wide
range
of
functionalities
for
various
NLP
tasks
,
such
as
tokenization
,
part
-
of
-
speech
tagging
,
named
entity
recognition
,
dependency
parsing
,
and
more
.


Here
's
a
basic
example
of
an
NLP
program
using
SpaCy
to
perform
tokenization
,
part
-
of
-
speech
tagging
,
and
named
entity
recognition
(
NER
)
on
a
given
paragraph
:


In [None]:
sentences = [sent.text for sent in doc.sents]
print("sentence tokenization")
for i,sentence in enumerate(sentences):
  print(f"Sentence {i+1}: {sentence}")

sentence tokenization
Sentence 1: SpaCy is an open-source natural language processing library written in Python. 

Sentence 2: It is designed to be fast, efficient, and easy to use, making it a popular choice for developers working on NLP tasks.

Sentence 3: SpaCy provides a wide range of functionalities for various NLP tasks, such as tokenization, part-of-speech tagging, named entity recognition, dependency parsing, and more.

Sentence 4: Here's a basic example of an NLP program using SpaCy to perform tokenization, part-of-speech tagging, and named entity recognition (NER) on a given paragraph:


In [None]:
print("\nWord Tokenization and part-of-Speech Tagging:")
for token in doc:
  print(f"Token: {token.text}, POS Tag: {token.pos_}")


Word Tokenization and part-of-Speech Tagging:
Token: SpaCy, POS Tag: PROPN
Token: is, POS Tag: AUX
Token: an, POS Tag: DET
Token: open, POS Tag: ADJ
Token: -, POS Tag: PUNCT
Token: source, POS Tag: NOUN
Token: natural, POS Tag: ADJ
Token: language, POS Tag: NOUN
Token: processing, POS Tag: NOUN
Token: library, POS Tag: NOUN
Token: written, POS Tag: VERB
Token: in, POS Tag: ADP
Token: Python, POS Tag: PROPN
Token: ., POS Tag: PUNCT
Token: 
, POS Tag: SPACE
Token: It, POS Tag: PRON
Token: is, POS Tag: AUX
Token: designed, POS Tag: VERB
Token: to, POS Tag: PART
Token: be, POS Tag: AUX
Token: fast, POS Tag: ADJ
Token: ,, POS Tag: PUNCT
Token: efficient, POS Tag: ADJ
Token: ,, POS Tag: PUNCT
Token: and, POS Tag: CCONJ
Token: easy, POS Tag: ADJ
Token: to, POS Tag: PART
Token: use, POS Tag: VERB
Token: ,, POS Tag: PUNCT
Token: making, POS Tag: VERB
Token: it, POS Tag: PRON
Token: a, POS Tag: DET
Token: popular, POS Tag: ADJ
Token: choice, POS Tag: NOUN
Token: for, POS Tag: ADP
Token: develop

In [None]:
print("\nNamed Entity Recognition (NER):")
for ent in doc.ents:
  print(f"Entity: {ent.text}, Label: {ent.label_}")


Named Entity Recognition (NER):
Entity: Python, Label: GPE
Entity: NLP, Label: ORG
Entity: SpaCy, Label: PERSON
Entity: NLP, Label: ORG
Entity: NLP, Label: ORG
Entity: SpaCy, Label: PERSON
Entity: NER, Label: ORG


In [None]:
stop_words = spacy.lang.en.stop_words.STOP_WORDS

In [None]:
filtered_words = [token.text for token in doc if not token.is_stop]
print("Words after removing stop words:")
print(filtered_words)

Words after removing stop words:
['SpaCy', 'open', '-', 'source', 'natural', 'language', 'processing', 'library', 'written', 'Python', '.', '\n', 'designed', 'fast', ',', 'efficient', ',', 'easy', 'use', ',', 'making', 'popular', 'choice', 'developers', 'working', 'NLP', 'tasks', '.', '\n', 'SpaCy', 'provides', 'wide', 'range', 'functionalities', 'NLP', 'tasks', ',', 'tokenization', ',', '-', '-', 'speech', 'tagging', ',', 'named', 'entity', 'recognition', ',', 'dependency', 'parsing', ',', '.', '\n', 'basic', 'example', 'NLP', 'program', 'SpaCy', 'perform', 'tokenization', ',', '-', '-', 'speech', 'tagging', ',', 'named', 'entity', 'recognition', '(', 'NER', ')', 'given', 'paragraph', ':']


In [None]:
print("\nParsing:")
for sent in doc.sents:
    parsed_sentence = [token.text + ":" + token.dep_ for token in sent if not token.is_stop]
    print(parsed_sentence)


Parsing:
['SpaCy:nsubj', 'open:amod', '-:punct', 'source:nmod', 'natural:amod', 'language:compound', 'processing:compound', 'library:attr', 'written:acl', 'Python:pobj', '.:punct', '\n:dep']
['designed:ROOT', 'fast:advmod', ',:punct', 'efficient:acomp', ',:punct', 'easy:conj', 'use:xcomp', ',:punct', 'making:advcl', 'popular:amod', 'choice:ccomp', 'developers:pobj', 'working:acl', 'NLP:compound', 'tasks:pobj', '.:punct', '\n:dep']
['SpaCy:nsubj', 'provides:ROOT', 'wide:amod', 'range:dobj', 'functionalities:pobj', 'NLP:compound', 'tasks:pobj', ',:punct', 'tokenization:pobj', ',:punct', '-:punct', '-:punct', 'speech:pobj', 'tagging:conj', ',:punct', 'named:acl', 'entity:compound', 'recognition:oprd', ',:punct', 'dependency:compound', 'parsing:conj', ',:punct', '.:punct', '\n:dep']
['basic:amod', 'example:nsubj', 'NLP:compound', 'program:pobj', 'SpaCy:dobj', 'perform:xcomp', 'tokenization:dobj', ',:punct', '-:punct', '-:punct', 'speech:pobj', 'tagging:nsubj', ',:punct', 'named:conj', 'en

In [None]:
from spacy import displacy
displacy.serve(doc, style="dep")




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")
stop_words = spacy.lang.en.stop_words.STOP_WORDS

paragraph = """SpaCy is an open-source natural language processing library written in Python.
It is designed to be fast, efficient, and easy to use, making it a popular choice for developers working on NLP tasks.
SpaCy provides a wide range of functionalities for various NLP tasks, such as tokenization, part-of-speech tagging, named entity recognition, dependency parsing, and more.
Here's a basic example of an NLP program using SpaCy to perform tokenization, part-of-speech tagging, and named entity recognition (NER) on a given paragraph:"""

doc = nlp(paragraph)

def remove_stop_words(text):
    return ' '.join([token.text for token in text if not token.is_stop])

iterations = 1
while True:
    print("\nText before removing stop words (Iteration", iterations, "):")
    print(doc.text)

    print("Stop words before removing:")
    for token in doc:
        if token.is_stop:
            print(token.text)

    stop_word_count = sum(1 for token in doc if token.is_stop)
    print("\n\nStop Word Count",stop_word_count)

    doc = nlp(remove_stop_words(doc))

    print("\nFiltered Text after removing stop words (Iteration", iterations, "):")
    print(doc.text)

    iterations += 1

    if stop_word_count == 0:
        break



Text before removing stop words (Iteration 1 ):
SpaCy is an open-source natural language processing library written in Python. 
It is designed to be fast, efficient, and easy to use, making it a popular choice for developers working on NLP tasks.
SpaCy provides a wide range of functionalities for various NLP tasks, such as tokenization, part-of-speech tagging, named entity recognition, dependency parsing, and more.
Here's a basic example of an NLP program using SpaCy to perform tokenization, part-of-speech tagging, and named entity recognition (NER) on a given paragraph:
Stop words before removing:
is
an
in
It
is
to
be
and
to
it
a
for
on
a
of
for
various
such
as
part
of
and
more
Here
's
a
of
an
using
to
part
of
and
on
a


Stop Word Count 35

Filtered Text after removing stop words (Iteration 1 ):
SpaCy open - source natural language processing library written Python . 
 designed fast , efficient , easy use , making popular choice developers working NLP tasks . 
 SpaCy provides wide ra