In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "Taylor Swift performed in Los Angeles on March 3rd, 2023."
doc = nlp(text)

for ent in doc.ents:
    print(ent.text, ent.label_)

Taylor Swift PERSON
Los Angeles GPE
March 3rd, 2023 DATE


In [2]:
def extract_persons(text):
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == "PERSON":
            print(ent.text)

# Test
extract_persons("Serena Williams had dinner with Tom Hanks in Paris.")

Serena Williams
Tom Hanks


In [4]:
text = "She was running and had run 5 kilometers by 7am."
doc = nlp(text)

for token in doc:
    print(f"{token.text:<10} → {token.lemma_}")

She        → she
was        → be
running    → run
and        → and
had        → have
run        → run
5          → 5
kilometers → kilometer
by         → by
7          → 7
am         → am
.          → .


In [5]:
def remove_stopwords(text):
    doc = nlp(text)
    return [token.text for token in doc if not token.is_stop]

# Test
print(remove_stopwords("This is an example sentence with some stop words."))

['example', 'sentence', 'stop', 'words', '.']


In [6]:
from spacy.lang.en.stop_words import STOP_WORDS

nlp.vocab["powerful"].is_stop = True
text = "SpaCy is awesome and powerful."
doc = nlp(text)

for token in doc:
    if token.is_stop:
        print(f"{token.text} is a stop word")

is is a stop word
and is a stop word
powerful is a stop word


In [7]:
from spacy.matcher import PhraseMatcher

matcher = PhraseMatcher(nlp.vocab)
patterns = [nlp("artificial intelligence")]
matcher.add("AI_PHRASE", patterns)

doc = nlp("Artificial Intelligence is the future. I study artificial intelligence.")
matches = matcher(doc)

for match_id, start, end in matches:
    print(doc[start:end].text)


artificial intelligence


In [8]:
def pos_explanation(text):
    doc = nlp(text)
    for token in doc:
        print(f"{token.text:<10} {token.pos_:<10} {spacy.explain(token.pos_)}")

# Test
pos_explanation("The cat sat on the mat.")


The        DET        determiner
cat        NOUN       noun
sat        VERB       verb
on         ADP        adposition
the        DET        determiner
mat        NOUN       noun
.          PUNCT      punctuation


In [9]:
from spacy.pipeline import Sentencizer

def custom_sentence_split(text):
    nlp.remove_pipe("parser")
    if "sentencizer" not in nlp.pipe_names:
        nlp.add_pipe("sentencizer")

    doc = nlp(text.replace("^", "."))
    for sent in doc.sents:
        print(f"Sentence: {sent.text.strip()}")

# Test
custom_sentence_split("SpaCy is great^It helps with NLP tasks^Really useful.")


Sentence: SpaCy is great.
Sentence: It helps with NLP tasks.
Sentence: Really useful.


In [12]:
from spacy import displacy

def pos_with_visualization():
    sentence = input("Enter a sentence: ")
    doc = nlp(sentence)
    
    for token in doc:
        print(f"{token.text:<10} {token.pos_:<10} {spacy.explain(token.pos_)}")
    
    displacy.render(doc, style="dep", jupyter=False)  # open in browser

# Call function
pos_with_visualization()

SpaCy      PROPN      proper noun
is         AUX        auxiliary
great^It   PRON       pronoun
helps      VERB       verb
with       ADP        adposition
NLP        PROPN      proper noun
tasks^Really ADV        adverb
useful     ADJ        adjective
.          PUNCT      punctuation
