In [None]:
import nltk
from nltk.tokenize import word_tokenize, WordPunctTokenizer, sent_tokenize
from nltk.util import ngrams
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
#Word tokenize
text = "The sun is shining brightly"
tokens = word_tokenize(text)
print(tokens)

['The', 'sun', 'is', 'shining', 'brightly']


In [None]:
#WordPunct Tokenization
text = "Hello! Howw yoo doin?"
tokens = WordPunctTokenizer().tokenize(text)
print(tokens)

['Hello', '!', 'Howw', 'yoo', 'doin', '?']


In [None]:
#Sentence Tokenization
text = 'She loves programming. She writes code.'
tokens = sent_tokenize(text)
print(tokens)

['She loves programming.', 'She writes code.']


In [None]:
#Unigrams
text = "She enjoys readings books"
tokens = word_tokenize(text)
unigrams = list(ngrams(tokens, 1))
print(unigrams)

[('She',), ('enjoys',), ('readings',), ('books',)]


In [None]:
#Bigrams
bigrams = list(ngrams(tokens, 2))
print(bigrams)

[('She', 'enjoys'), ('enjoys', 'readings'), ('readings', 'books')]


In [None]:
#Trigrams
trigrams = list(ngrams(tokens, 3))
print(trigrams)

[('She', 'enjoys', 'readings'), ('enjoys', 'readings', 'books')]


In [None]:
#POS
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [None]:
text = "She enjoys reading books"
tokens = word_tokenize(text)
pos_tags = pos_tag(tokens)
print(pos_tags)

[('She', 'PRP'), ('enjoys', 'VBZ'), ('reading', 'VBG'), ('books', 'NNS')]


In [None]:
#Stop words removal
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
text = "She enjoys reading books in library at college, punjab"
stop_words = set(stopwords.words('english'))
tokens = word_tokenize(text)
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
print(filtered_tokens)

['enjoys', 'reading', 'books', 'library', 'college', ',', 'punjab']


In [None]:
#Stemming
from nltk.stem import PorterStemmer

In [None]:
text = "The runners are running swiftly to catch the train. The happiness is evident in the smiles of the people."
tokens = word_tokenize(text)
stemmer = PorterStemmer()
stemmed_tokens = [stemmer.stem(word) for word in tokens]
print(stemmed_tokens)

['the', 'runner', 'are', 'run', 'swiftli', 'to', 'catch', 'the', 'train', '.', 'the', 'happi', 'is', 'evid', 'in', 'the', 'smile', 'of', 'the', 'peopl', '.']


In [3]:
#Lemmatization

import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [7]:
text = "The children are playing with toys swiftly"
tokens = word_tokenize(text)
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in tokens]
print(lemmatized_tokens)

['The', 'child', 'are', 'playing', 'with', 'toy', 'swiftly']


In [12]:
#NER
from nltk import pos_tag, ne_chunk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('maxent_ne_chunker')
nltk.download('maxent_ne_chunker_tab')
nltk.download('words')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker_tab to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker_tab.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [13]:
text = "Anu works at Google and lives in Bengaluru. She visited Punjab in November 2025."
tokens = word_tokenize(text)
pos_tags = pos_tag(tokens)
named_entities = ne_chunk(pos_tags)
print(named_entities)

(S
  (GPE Anu/NNP)
  works/VBZ
  at/IN
  (ORGANIZATION Google/NNP)
  and/CC
  lives/VBZ
  in/IN
  (GPE Bengaluru/NNP)
  ./.
  She/PRP
  visited/VBD
  (GPE Punjab/NNP)
  in/IN
  November/NNP
  2025/CD
  ./.)


In [18]:
from nltk.tree import Tree
entities = [(leaf[0],tree.label()) for tree in named_entities if isinstance(tree, Tree) for leaf in tree.leaves()]
print(entities)

[('Anu', 'GPE'), ('Google', 'ORGANIZATION'), ('Bengaluru', 'GPE'), ('Punjab', 'GPE')]


In [19]:
#WSD
from nltk.wsd import lesk
from nltk.tokenize import word_tokenize
nltk.download('wordnet')


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [21]:
text = 'I went to bank of river'
tokens = word_tokenize(text)
sense = lesk(tokens,'bank')
print("Word Sense : ",sense)
print('Definition : ', sense.definition() if sense else 'No sense found')

Word Sense :  Synset('bank.v.07')
Definition :  cover with ashes so to control the rate of burning
