In [None]:
# Practical no 1 : Comprehensive NLP Pipeline for Linguistic Analysis Using spaCy and NLTK
import spacy
import nltk
from nltk.stem import PorterStemmer

In [None]:
nlp = spacy.load("en_core_web_sm")
nltk.download('punkt')
stemmer = PorterStemmer()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
text = ("On May 13, 2025, at 6:19 p.m., the Israeli Air Force executed precision strikes on the courtyard and surrounding areas of Gaza's European Hospital in Khan Yunis, dropping at least nine bunker-busting bombs among dozens of other munitions. The attack disabled key hospital functions and struck nearby infrastructure, with the strikes guided by intelligence from the IDF, Southern Command, and Shin Bet, under real-time oversight from the IDF Chief of the General Staff.")

In [None]:
# Process text
doc = nlp(text)

In [None]:
# 1. Tokenization
print("1 Tokens:")
print([token.text for token in doc])


1 Tokens:
['On', 'May', '13', ',', '2025', ',', 'at', '6:19', 'p.m.', ',', 'the', 'Israeli', 'Air', 'Force', 'executed', 'precision', 'strikes', 'on', 'the', 'courtyard', 'and', 'surrounding', 'areas', 'of', 'Gaza', "'s", 'European', 'Hospital', 'in', 'Khan', 'Yunis', ',', 'dropping', 'at', 'least', 'nine', 'bunker', '-', 'busting', 'bombs', 'among', 'dozens', 'of', 'other', 'munitions', '.', 'The', 'attack', 'disabled', 'key', 'hospital', 'functions', 'and', 'struck', 'nearby', 'infrastructure', ',', 'with', 'the', 'strikes', 'guided', 'by', 'intelligence', 'from', 'the', 'IDF', ',', 'Southern', 'Command', ',', 'and', 'Shin', 'Bet', ',', 'under', 'real', '-', 'time', 'oversight', 'from', 'the', 'IDF', 'Chief', 'of', 'the', 'General', 'Staff', '.']


In [None]:
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()

print("\n Stemming:")
for token in filtered_tokens:
    print(f"{token:50} -> {stemmer.stem(token)}")


 Stemming:
Israeli                                            -> isra
Air                                                -> air
Force                                              -> forc
executed                                           -> execut
precision                                          -> precis
strikes                                            -> strike
courtyard                                          -> courtyard
surrounding                                        -> surround
areas                                              -> area
Gaza                                               -> gaza
European                                           -> european
Hospital                                           -> hospit
Khan                                               -> khan
Yunis                                              -> yuni
dropping                                           -> drop
bunker                                             -> bunker
busting               

In [None]:
print("\n4 Lemmatization:")
for token in doc:
    print(f"{token.text:20} ➤ {token.lemma_}")


4 Lemmatization:
On                   ➤ on
May                  ➤ May
13                   ➤ 13
,                    ➤ ,
2025                 ➤ 2025
,                    ➤ ,
at                   ➤ at
6:19                 ➤ 6:19
p.m.                 ➤ p.m.
,                    ➤ ,
the                  ➤ the
Israeli              ➤ israeli
Air                  ➤ Air
Force                ➤ Force
executed             ➤ execute
precision            ➤ precision
strikes              ➤ strike
on                   ➤ on
the                  ➤ the
courtyard            ➤ courtyard
and                  ➤ and
surrounding          ➤ surround
areas                ➤ area
of                   ➤ of
Gaza                 ➤ Gaza
's                   ➤ 's
European             ➤ European
Hospital             ➤ Hospital
in                   ➤ in
Khan                 ➤ Khan
Yunis                ➤ Yunis
,                    ➤ ,
dropping             ➤ drop
at                   ➤ at
least                ➤ least
ni

In [None]:
filtered_tokens = [token.text for token in doc if not token.is_stop and token.is_alpha]
print("\n  Tokens after Stopword Removal:")
print(filtered_tokens)


  Tokens after Stopword Removal:
['Israeli', 'Air', 'Force', 'executed', 'precision', 'strikes', 'courtyard', 'surrounding', 'areas', 'Gaza', 'European', 'Hospital', 'Khan', 'Yunis', 'dropping', 'bunker', 'busting', 'bombs', 'dozens', 'munitions', 'attack', 'disabled', 'key', 'hospital', 'functions', 'struck', 'nearby', 'infrastructure', 'strikes', 'guided', 'intelligence', 'IDF', 'Southern', 'Command', 'Shin', 'Bet', 'real', 'time', 'oversight', 'IDF', 'Chief', 'General', 'Staff']


In [None]:
print("\n POS Tagging:")
for token in doc:
    print(f"{token.text:20} ➤ {token.pos_:10} ➤ {token.tag_}")


 POS Tagging:
On                   ➤ ADP        ➤ IN
May                  ➤ PROPN      ➤ NNP
13                   ➤ NUM        ➤ CD
,                    ➤ PUNCT      ➤ ,
2025                 ➤ NUM        ➤ CD
,                    ➤ PUNCT      ➤ ,
at                   ➤ ADP        ➤ IN
6:19                 ➤ NUM        ➤ CD
p.m.                 ➤ NOUN       ➤ NN
,                    ➤ PUNCT      ➤ ,
the                  ➤ DET        ➤ DT
Israeli              ➤ ADJ        ➤ JJ
Air                  ➤ PROPN      ➤ NNP
Force                ➤ PROPN      ➤ NNP
executed             ➤ VERB       ➤ VBD
precision            ➤ NOUN       ➤ NN
strikes              ➤ NOUN       ➤ NNS
on                   ➤ ADP        ➤ IN
the                  ➤ DET        ➤ DT
courtyard            ➤ NOUN       ➤ NN
and                  ➤ CCONJ      ➤ CC
surrounding          ➤ VERB       ➤ VBG
areas                ➤ NOUN       ➤ NNS
of                   ➤ ADP        ➤ IN
Gaza                 ➤ PROPN      ➤ NNP
's   

In [None]:
print("\n Noun Phrase Chunks:")
for chunk in doc.noun_chunks:
    print(f"• {chunk.text}")


 Noun Phrase Chunks:
• May
• 6:19 p.m.
• the Israeli Air Force
• precision strikes
• the courtyard and surrounding areas
• Gaza's European Hospital
• Khan Yunis
• at least nine bunker-busting bombs
• dozens
• other munitions
• The attack
• key hospital functions
• nearby infrastructure
• the strikes
• intelligence
• the IDF
• Southern Command
• Shin Bet
• real-time oversight
• the IDF Chief
• the General Staff


In [None]:
print("\n Syntax (Dependency Parsing):")
for token in doc:
    print(f"{token.text:20} ➤ {token.dep_:15} ➤ Head: {token.head.text}")



 Syntax (Dependency Parsing):
On                   ➤ prep            ➤ Head: executed
May                  ➤ pobj            ➤ Head: On
13                   ➤ nummod          ➤ Head: May
,                    ➤ punct           ➤ Head: May
2025                 ➤ nummod          ➤ Head: May
,                    ➤ punct           ➤ Head: executed
at                   ➤ prep            ➤ Head: executed
6:19                 ➤ nummod          ➤ Head: p.m.
p.m.                 ➤ pobj            ➤ Head: at
,                    ➤ punct           ➤ Head: executed
the                  ➤ det             ➤ Head: Force
Israeli              ➤ compound        ➤ Head: Force
Air                  ➤ compound        ➤ Head: Force
Force                ➤ nsubj           ➤ Head: executed
executed             ➤ ROOT            ➤ Head: executed
precision            ➤ compound        ➤ Head: strikes
strikes              ➤ dobj            ➤ Head: executed
on                   ➤ prep            ➤ Head: strikes
the

In [None]:
# 8. Semantics: Named Entity Recognition
print("\n Named Entities:")
for ent in doc.ents:
    print(f"{ent.text:40} ➤ {ent.label_}")


 Named Entities:
May 13, 2025                             ➤ DATE
6:19 p.m.                                ➤ TIME
the Israeli Air Force                    ➤ ORG
Gaza                                     ➤ GPE
European Hospital                        ➤ ORG
Khan Yunis                               ➤ GPE
at least nine                            ➤ CARDINAL
dozens                                   ➤ CARDINAL
IDF                                      ➤ ORG
Southern Command                         ➤ ORG
Shin Bet                                 ➤ PERSON
IDF                                      ➤ ORG
