In [5]:
# 1. Named Entities Extraction 
# Print all named entities along with their labels.
import spacy
nlp = spacy.load("en_core_web_sm")

doc1 = nlp("Taylor Swift performed in Los Angeles on March 3rd, 2023.")

for entity in doc1.ents:
    print(entity, entity.label_)

Taylor Swift PERSON
Los Angeles GPE
March 3rd, 2023 DATE


In [12]:
# 2. Entity Classification
# Write a function that receives a sentence and prints only the entities of type PERSON.

doc2 = nlp("Serena Williams had dinner with Tom Hanks in Paris.")

for ent in doc2.ents:
    if ent.label_ == "PERSON":
        print(ent.text)

Serena Williams
Tom Hanks


In [13]:
# 3. Lemmatization
# Given the sentence: Print each word with its lemma.
import spacy

nlp = spacy.load("en_core_web_sm")

words = ["She", "was", "running", "and", "had", "run", "5", "kilometers", "by", "7", "am."]

for word in words:
    doc = nlp(word)
    lemma = doc[0].lemma_
    print(f"{word} → {lemma}")   

She → she
was → be
running → run
and → and
had → have
run → run
5 → 5
kilometers → kilometer
by → by
7 → 7
am. → am


In [14]:
# 4. Stop Word Removal
# Write a function that receives a sentence and returns a list of words that are not stop words.

from spacy.lang.en.stop_words import STOP_WORDS

doc3 = nlp("This is an example sentence with some stop words.")

for token in doc3:
    if token.text not in STOP_WORDS and not token.is_stop and not token.is_punct:
        print(token.text, end='| ')
print()        
# אפשרות נוספת שיותר מתאימה לתשובה המבוקשת

not_stop_words = [token.text for token in doc3 if STOP_WORDS and not token.is_stop and not token.is_punct]
print(not_stop_words)

example| sentence| stop| words| 
['example', 'sentence', 'stop', 'words']


In [21]:
# 5. Custom Stop Word
# Mark the word "powerful" as a stop word, then check if SpaCy treats it as such. Use the sentence below to check its behavior:

STOP_WORDS.add("powerful")
nlp.vocab["powerful"].is_stop = True
print(nlp.vocab['powerful'].is_stop)  

doc4 = nlp("SpaCy is awesome and powerful.")

for token in doc4:
    print(f"{token.text} --> (stop word?) {token.is_stop}")

# is (already stop word)
# and (already stop word)
# powerful (after we add it)

True
SpaCy --> (stop word?) False
is --> (stop word?) True
awesome --> (stop word?) False
and --> (stop word?) True
powerful --> (stop word?) True
. --> (stop word?) False


In [40]:
# 6. Phrase Matcher
# Use PhraseMatcher to identify the phrase "artificial intelligence" in a sentence and print matches.

from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

patterns = [nlp("Artificial Intelligence"), nlp("artificial intelligence")]
matcher.add("ArtificialIntelligence", patterns)

doc5 = nlp("Artificial Intelligence is the future. I study artificial intelligence.")
matches = matcher(doc5)

for match_id, start, end in matches:
    print(doc5[start:end].text)

Artificial Intelligence
artificial intelligence


In [46]:
# 7. POS Tagging + Explanation
# Write a function that prints each word in a sentence with its POS tag and a human-readable explanation.

doc6 = nlp("The cat sat on the mat.")

for token in doc6:
    print(f"{token.text:<10} {token.pos_:<10} {spacy.explain(token.pos_)}")

The        DET        determiner
cat        NOUN       noun
sat        VERB       verb
on         ADP        adposition
the        DET        determiner
mat        NOUN       noun
.          PUNCT      punctuation


In [65]:
# 8. POS Tagging + Displacy Visualization
# Ask the user to input a sentence using input(), print each word with its POS tag, and then display it using spacy.displacy.render.
from spacy import displacy
import spacy

input1 = input("Any sentence:") # Apple is looking at buying a U.K. startup for $1 billion.

doc7 = nlp(input1)
print()    
for token in doc7:
    print(f"{token.text:<10} {token.pos_:<10} {spacy.explain(token.pos_)}")
    
displacy.render(doc7, style="dep", jupyter=True)  # להציג תחביר
displacy.render(doc7, style="ent", jupyter=True)  # להציג ישויות בשם

Any sentence: "Apple is looking at buying a U.K. startup for $1 billion."



"          PUNCT      punctuation
Apple      PROPN      proper noun
is         AUX        auxiliary
looking    VERB       verb
at         ADP        adposition
buying     VERB       verb
a          DET        determiner
U.K.       PROPN      proper noun
startup    NOUN       noun
for        ADP        adposition
$          SYM        symbol
1          NUM        numeral
billion    NUM        numeral
.          PUNCT      punctuation
"          PUNCT      punctuation
