# Importing neccessary libraries

In [1]:
# pip install nltk

In [2]:
import nltk
from nltk import pos_tag
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.corpus import stopwords
from nltk import FreqDist
from nltk.text import Text
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk import ne_chunk

In [4]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('vader_lexicon')

True


# Sample text 

In [5]:
text = "NLTK is a powerful tool for natural language processing. It can tokenize sentences and words. NLTK includes various NLP libraries for text analysis."

# Tokenization

In [6]:
sentences = sent_tokenize(text)

In [7]:
words = word_tokenize(text)

In [8]:
print("Words:", words)

Words: ['NLTK', 'is', 'a', 'powerful', 'tool', 'for', 'natural', 'language', 'processing', '.', 'It', 'can', 'tokenize', 'sentences', 'and', 'words', '.', 'NLTK', 'includes', 'various', 'NLP', 'libraries', 'for', 'text', 'analysis', '.']


# Part-of-speech tagging

In [9]:
pos_tags = pos_tag(words)

In [10]:
print("POS Tags:", pos_tags)

POS Tags: [('NLTK', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('powerful', 'JJ'), ('tool', 'NN'), ('for', 'IN'), ('natural', 'JJ'), ('language', 'NN'), ('processing', 'NN'), ('.', '.'), ('It', 'PRP'), ('can', 'MD'), ('tokenize', 'VB'), ('sentences', 'NNS'), ('and', 'CC'), ('words', 'NNS'), ('.', '.'), ('NLTK', 'NNP'), ('includes', 'VBZ'), ('various', 'JJ'), ('NLP', 'NNP'), ('libraries', 'NNS'), ('for', 'IN'), ('text', 'JJ'), ('analysis', 'NN'), ('.', '.')]


# Stemming and Lemmatization

In [11]:
stemmer = PorterStemmer()

In [12]:
lemmatizer = WordNetLemmatizer()

In [13]:
stemmed_words = [stemmer.stem(word) for word in words]

In [14]:
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]

In [15]:
print("Stemmed Words:", stemmed_words)

Stemmed Words: ['nltk', 'is', 'a', 'power', 'tool', 'for', 'natur', 'languag', 'process', '.', 'it', 'can', 'token', 'sentenc', 'and', 'word', '.', 'nltk', 'includ', 'variou', 'nlp', 'librari', 'for', 'text', 'analysi', '.']


In [16]:
print("Lemmatized Words:", lemmatized_words)

Lemmatized Words: ['NLTK', 'is', 'a', 'powerful', 'tool', 'for', 'natural', 'language', 'processing', '.', 'It', 'can', 'tokenize', 'sentence', 'and', 'word', '.', 'NLTK', 'includes', 'various', 'NLP', 'library', 'for', 'text', 'analysis', '.']


# Stop words removal 

In [17]:
stop_words = set(stopwords.words("english"))

In [18]:
filtered_words = [word for word in words if word.lower() not in stop_words]

In [19]:
print("Filtered Words:", filtered_words)

Filtered Words: ['NLTK', 'powerful', 'tool', 'natural', 'language', 'processing', '.', 'tokenize', 'sentences', 'words', '.', 'NLTK', 'includes', 'various', 'NLP', 'libraries', 'text', 'analysis', '.']


# Frequency Distribution

In [20]:
freq_dist = FreqDist(words)

In [21]:
print("Frequency Distribution:", freq_dist)

Frequency Distribution: <FreqDist with 22 samples and 26 outcomes>


# Concordance and Similarity

In [22]:
text_object = Text(words)

In [23]:
concordance_result = text_object.concordance("NLTK")

Displaying 2 of 2 matches:
 NLTK is a powerful tool for natural langu
t can tokenize sentences and words . NLTK includes various NLP libraries for t


In [24]:
similar_words = text_object.similar("tool")




In [25]:
print("Concordance Result:", concordance_result)

Concordance Result: None


In [26]:
print("Similar Words:", similar_words)

Similar Words: None


# Sentiment Analysis

In [27]:
sia = SentimentIntensityAnalyzer()

In [28]:
sentiment_score = sia.polarity_scores(text)

In [29]:
print("Sentiment Analysis Score:", sentiment_score)

Sentiment Analysis Score: {'neg': 0.0, 'neu': 0.745, 'pos': 0.255, 'compound': 0.6705}


# Named Entity Recognition (NER)

In [30]:
tokens = word_tokenize(text)

In [31]:
pos_tags_for_ner = pos_tag(tokens)

In [32]:
ner_result = ne_chunk(pos_tags_for_ner)

In [33]:
print("NER Result:", ner_result)

NER Result: (S
  (ORGANIZATION NLTK/NNP)
  is/VBZ
  a/DT
  powerful/JJ
  tool/NN
  for/IN
  natural/JJ
  language/NN
  processing/NN
  ./.
  It/PRP
  can/MD
  tokenize/VB
  sentences/NNS
  and/CC
  words/NNS
  ./.
  (ORGANIZATION NLTK/NNP)
  includes/VBZ
  various/JJ
  (ORGANIZATION NLP/NNP)
  libraries/NNS
  for/IN
  text/JJ
  analysis/NN
  ./.)
