In [7]:
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk import pos_tag

# Download necessary data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
# Download the specific resource for English POS tagging
nltk.download('averaged_perceptron_tagger_eng') # This line is added to fix the error


# Sample text
text = "Natural Language Processing (NLP) is an exciting field of Artificial Intelligence. It helps computers understand human language."

# a. Tokenization
word_tokens = word_tokenize(text)
sentence_tokens = sent_tokenize(text)

print("\n--- Word Tokenization ---")
print(word_tokens)

print("\n--- Sentence Tokenization ---")
print(sentence_tokens)

# b. Stopword Removal
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in word_tokens if word.lower() not in stop_words]

print("\n--- Filtered Words (Stopwords Removed) ---")
print(filtered_words)

# c. Stemming
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in filtered_words]

print("\n--- Stemmed Words ---")
print(stemmed_words)

# d. POS Tagging
pos_tags = pos_tag(word_tokens)

print("\n--- POS Tagging ---")
for word, tag in pos_tags:
    print(f"{word}: {tag}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.



--- Word Tokenization ---
['Natural', 'Language', 'Processing', '(', 'NLP', ')', 'is', 'an', 'exciting', 'field', 'of', 'Artificial', 'Intelligence', '.', 'It', 'helps', 'computers', 'understand', 'human', 'language', '.']

--- Sentence Tokenization ---
['Natural Language Processing (NLP) is an exciting field of Artificial Intelligence.', 'It helps computers understand human language.']

--- Filtered Words (Stopwords Removed) ---
['Natural', 'Language', 'Processing', '(', 'NLP', ')', 'exciting', 'field', 'Artificial', 'Intelligence', '.', 'helps', 'computers', 'understand', 'human', 'language', '.']

--- Stemmed Words ---
['natur', 'languag', 'process', '(', 'nlp', ')', 'excit', 'field', 'artifici', 'intellig', '.', 'help', 'comput', 'understand', 'human', 'languag', '.']

--- POS Tagging ---
Natural: JJ
Language: NNP
Processing: NNP
(: (
NLP: NNP
): )
is: VBZ
an: DT
exciting: JJ
field: NN
of: IN
Artificial: JJ
Intelligence: NNP
.: .
It: PRP
helps: VBZ
computers: NNS
understand: VBP
h