In [95]:
import nltk

#Download all datasets
nltk.download('all')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     /Users/doreenquisido/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     /Users/doreenquisido/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /Users/doreenquisido/nltk_data...
[nltk_data]    |   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     /Users/doreenquisido/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_eng is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /Users/doreenquisido/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_ru.zip.
[nlt

True

In [108]:
#Sentence Tokenization

from nltk.tokenize import sent_tokenize

text = ("Each book is a doorway, and every page is a step into a world that only exists" 
        " when you choose to open it. It is a world filled with endless possibilities!"
        )

# Perform sentence tokenization
sentences = sent_tokenize(text)

print("Sentences:", sentences)

Sentences: ['Each book is a doorway, and every page is a step into a world that only exists when you choose to open it.', 'It is a world filled with endless possibilities!']


In [109]:
#Word Tokenization

from nltk.tokenize import word_tokenize

# Perform word tokenization
words = word_tokenize(text)

print("Words:", words)

Words: ['Each', 'book', 'is', 'a', 'doorway', ',', 'and', 'every', 'page', 'is', 'a', 'step', 'into', 'a', 'world', 'that', 'only', 'exists', 'when', 'you', 'choose', 'to', 'open', 'it', '.', 'It', 'is', 'a', 'world', 'filled', 'with', 'endless', 'possibilities', '!']


In [110]:
# Stop Words Removal

from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in stop_words]

print("Filtered words:")
print(filtered_words)

Filtered words:
['book', 'doorway', ',', 'every', 'page', 'step', 'world', 'exists', 'choose', 'open', '.', 'world', 'filled', 'endless', 'possibilities', '!']


In [111]:
#Text Normalization: Stemming

from nltk.stem import PorterStemmer

ps = PorterStemmer()

# Apply stemming to the filtered words
stemmed_words = [ps.stem(word) for word in filtered_words]

print("Stemmed Words:")
print(stemmed_words)

Stemmed Words:
['book', 'doorway', ',', 'everi', 'page', 'step', 'world', 'exist', 'choos', 'open', '.', 'world', 'fill', 'endless', 'possibl', '!']


In [112]:
#Text Normalization: Lemmatization

from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

# Apply lemmatization to the filtered words
lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words]

print("Lemmatized Words:")
print(lemmatized_words)

Lemmatized Words:
['book', 'doorway', ',', 'every', 'page', 'step', 'world', 'exists', 'choose', 'open', '.', 'world', 'filled', 'endless', 'possibility', '!']


In [113]:
#Parts of Speech (POS) Tagging

from nltk import pos_tag

# Perform POS tagging on the words
pos_tags = pos_tag(words)

print("Words with POS Tags:")
print(pos_tags)

Words with POS Tags:
[('Each', 'DT'), ('book', 'NN'), ('is', 'VBZ'), ('a', 'DT'), ('doorway', 'NN'), (',', ','), ('and', 'CC'), ('every', 'DT'), ('page', 'NN'), ('is', 'VBZ'), ('a', 'DT'), ('step', 'NN'), ('into', 'IN'), ('a', 'DT'), ('world', 'NN'), ('that', 'WDT'), ('only', 'RB'), ('exists', 'VBZ'), ('when', 'WRB'), ('you', 'PRP'), ('choose', 'VBP'), ('to', 'TO'), ('open', 'VB'), ('it', 'PRP'), ('.', '.'), ('It', 'PRP'), ('is', 'VBZ'), ('a', 'DT'), ('world', 'NN'), ('filled', 'VBN'), ('with', 'IN'), ('endless', 'JJ'), ('possibilities', 'NNS'), ('!', '.')]


In [114]:
#Named Entity Recognition (NER)

from nltk.tokenize import word_tokenize
from nltk import pos_tag, ne_chunk

# The new text for NER
new_text = ( "In Japan, Elias found his path at Google, weaving numbers into stories that" 
             " mirrored the rhythms of life itself."
           )
#Tokenize the text
new_words = word_tokenize(new_text)

#Perform POS tagging
pos_tags = pos_tag(new_words)

#Perform Named Entity Recognition
entities = ne_chunk(pos_tags)

print("Named Entities:")
print(entities)

Named Entities:
(S
  In/IN
  (GPE Japan/NNP)
  ,/,
  (PERSON Elias/NNP)
  found/VBD
  his/PRP$
  path/NN
  at/IN
  (ORGANIZATION Google/NNP)
  ,/,
  weaving/VBG
  numbers/NNS
  into/IN
  stories/NNS
  that/WDT
  mirrored/VBD
  the/DT
  rhythms/NN
  of/IN
  life/NN
  itself/PRP
  ./.)


In [115]:
#Frequency Distribution

from nltk.probability import FreqDist

# Generate a frequency distribution
fdist = FreqDist(filtered_words)

print("Top 5 Most Common Words:")
print(fdist.most_common(5))

Top 5 Most Common Words:
[('world', 2), ('book', 1), ('doorway', 1), (',', 1), ('every', 1)]


In [116]:
#Synonyms and Antonyms

from nltk.corpus import wordnet

# The word we want to find synonyms and antonyms for
word_to_explore = "possibilities"

synonyms = []
antonyms = []

for syn in wordnet.synsets(word_to_explore):
    for lemma in syn.lemmas():
        # Add the synonym to our list
        synonyms.append(lemma.name())
        # Check if the lemma has any antonyms
        if lemma.antonyms():
            antonyms.append(lemma.antonyms()[0].name())

print("Synonyms for 'possibilities':")
print(set(synonyms))
print("\nAntonyms for 'possibilities':")
print(set(antonyms))

Synonyms for 'possibilities':
{'possibleness', 'opening', 'hypothesis', 'theory', 'possible_action', 'possibility'}

Antonyms for 'possibilities':
{'impossibility'}
