**Tokenization with nltk**

In [1]:
import nltk
nltk.download('punkt')

text = "I am attending mky NLP Final Practical Session."
tokens = nltk.word_tokenize(text)

print("Tokens :", tokens)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Tokens : ['I', 'am', 'attending', 'mky', 'NLP', 'Final', 'Practical', 'Session', '.']


**Stemming with nltk**

In [2]:
from nltk.stem import PorterStemmer

ps = PorterStemmer()
words = ['run', 'running', 'runner', 'ran']

stemmed_words = [ps.stem(word) for word in words]

print('Stemmed Words :', stemmed_words)

Stemmed Words : ['run', 'run', 'runner', 'ran']


**Lemmatization with nltk**

In [3]:
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
words = ['dogs', 'cats', 'running', 'ate']

lemmatized_words = [lemmatizer.lemmatize(word) for word in words]

print('Lemmatized Words :', lemmatized_words)

[nltk_data] Downloading package wordnet to /root/nltk_data...


Lemmatized Words : ['dog', 'cat', 'running', 'ate']


**POS tagging with nltk**

In [4]:
nltk.download('averaged_perceptron_tagger')

text = "I am attending mky NLP Final Practical Session."

tokens = nltk.word_tokenize(text)
pos_tags = nltk.pos_tag(tokens)

print('POS Tags:', pos_tags)

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


POS Tags: [('I', 'PRP'), ('am', 'VBP'), ('attending', 'VBG'), ('mky', 'NN'), ('NLP', 'NNP'), ('Final', 'NNP'), ('Practical', 'NNP'), ('Session', 'NNP'), ('.', '.')]


**Sentiment analysis with nltk**

In [5]:
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer

sia = SentimentIntensityAnalyzer()
text = "I am attending mky NLP Final Practical Session."

sentiment_scores = sia.polarity_scores(text)

print("Sentiment Scores :", sentiment_scores)

Sentiment Scores : {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


**Frequency distribution with nltk**

In [6]:
from nltk import FreqDist

text = "I am attending mky NLP Final Practical Session."

tokens = nltk.word_tokenize(text)
fdist = FreqDist(tokens)

print("Frequency Distribution :", fdist.most_common())

Frequency Distribution : [('I', 1), ('am', 1), ('attending', 1), ('mky', 1), ('NLP', 1), ('Final', 1), ('Practical', 1), ('Session', 1), ('.', 1)]


**StopWords with nltk**

In [7]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')

text = "I am attending mky NLP Final Practical Session."

stop_words = set(stopwords.words("english"))
tokens = nltk.word_tokenize(text)

filtered_tokens = [word for word in tokens if word.lower() not in stop_words]

print("Filtered Tokens:", filtered_tokens)

Filtered Tokens: ['attending', 'mky', 'NLP', 'Final', 'Practical', 'Session', '.']


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


**Named-entity-recognition with nltk**

In [8]:
nltk.download('maxent_ne_chunker')
nltk.download('words')

text = "I am attending mky NLP Final Practical Session."
tokens = nltk.word_tokenize(text)
pos_tags = nltk.pos_tag(tokens)
chunks = nltk.ne_chunk(pos_tags)

for chunk in chunks:
  if hasattr(chunk, 'label'):
    print(chunk.label(), ' '.join(c[0] for c in chunk))

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


ORGANIZATION NLP Final Practical Session


**Sentence tokenization with nltk**

In [9]:
text = 'I am attending mky NLP Final Practical Session.'

sentences = nltk.sent_tokenize(text)

print("Sentences :",sentences)

Sentences : ['I am attending mky NLP Final Practical Session.']


**Concordance with nltk**

In [10]:
nltk.download('all')
from nltk.book import text1

concordance_list = text1.concordance_list('monstrous')

for e in concordance_list:
  print(e.line)

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_ru.zip.
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping grammars/basque_grammars.zip.
[nltk_data]    | Downloading package bcp47 to /root/nltk_data...
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     /root/n

*** Introductory Examples for the NLTK Book ***
Loading text1, ..., text9 and sent1, ..., sent9
Type the name of the text or sentence to view it.
Type: 'texts()' or 'sents()' to list the materials.
text1: Moby Dick by Herman Melville 1851
text2: Sense and Sensibility by Jane Austen 1811
text3: The Book of Genesis
text4: Inaugural Address Corpus
text5: Chat Corpus
text6: Monty Python and the Holy Grail
text7: Wall Street Journal
text8: Personals Corpus
text9: The Man Who Was Thursday by G . K . Chesterton 1908
ong the former , one was of a most monstrous size . ... This came towards us , 
ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
ll over with a heathenish array of monstrous clubs and spears . Some were thick
d as you gazed , and wondered what monstrous cannibal and savage could ever hav
that has survived the flood ; most monstrous and most mountainous ! That Himmal
they might scout at Moby Dick as a monstrous fable , or still worse and more de
th of

**Collocatioin with nltk**

In [11]:
collactions = text1.collocation_list()
print('Collactions :', collactions)

Collactions : [('Sperm', 'Whale'), ('Moby', 'Dick'), ('White', 'Whale'), ('old', 'man'), ('Captain', 'Ahab'), ('sperm', 'whale'), ('Right', 'Whale'), ('Captain', 'Peleg'), ('New', 'Bedford'), ('Cape', 'Horn'), ('cried', 'Ahab'), ('years', 'ago'), ('lower', 'jaw'), ('never', 'mind'), ('Father', 'Mapple'), ('cried', 'Stubb'), ('chief', 'mate'), ('white', 'whale'), ('ivory', 'leg'), ('one', 'hand')]


**N-gram with nltk**

In [12]:
from nltk import bigrams
from nltk.book import text1

bi_grams = list(bigrams(text1))

print("Bigrams :", bi_grams[:10])

Bigrams : [('[', 'Moby'), ('Moby', 'Dick'), ('Dick', 'by'), ('by', 'Herman'), ('Herman', 'Melville'), ('Melville', '1851'), ('1851', ']'), (']', 'ETYMOLOGY'), ('ETYMOLOGY', '.'), ('.', '(')]


In [13]:
from nltk import trigrams
from nltk.book import text1

tri_grams = list(trigrams(text1))

print("Bigrams :", tri_grams[:10])

Bigrams : [('[', 'Moby', 'Dick'), ('Moby', 'Dick', 'by'), ('Dick', 'by', 'Herman'), ('by', 'Herman', 'Melville'), ('Herman', 'Melville', '1851'), ('Melville', '1851', ']'), ('1851', ']', 'ETYMOLOGY'), (']', 'ETYMOLOGY', '.'), ('ETYMOLOGY', '.', '('), ('.', '(', 'Supplied')]


**Wordnet with nltk**

In [14]:
from nltk.corpus import wordnet

synsets = wordnet.synsets('car')

for synset in synsets:
  print('Sysnet Name :', synset.name())
  print('Definition :', synset.definition())
  print('Examples :', synset.examples())

Sysnet Name : car.n.01
Definition : a motor vehicle with four wheels; usually propelled by an internal combustion engine
Examples : ['he needs a car to get to work']
Sysnet Name : car.n.02
Definition : a wheeled vehicle adapted to the rails of railroad
Examples : ['three cars had jumped the rails']
Sysnet Name : car.n.03
Definition : the compartment that is suspended from an airship and that carries personnel and the cargo and the power plant
Examples : []
Sysnet Name : car.n.04
Definition : where passengers ride up and down
Examples : ['the car was on the top floor']
Sysnet Name : cable_car.n.01
Definition : a conveyance for passengers or freight on a cable railway
Examples : ['they took a cable car to the top of the mountain']


**Word-similarity with nltk**

In [15]:
word1 = wordnet.synset('car.n.01')
word2 = wordnet.synset('bus.n.01')

similarity = word1.wup_similarity(word2)

print(f'Similarity between {word1} and {word2} is {similarity}')

Similarity between Synset('car.n.01') and Synset('bus.n.01') is 0.6666666666666666
