In [2]:
# Downloadind the textblob corpora
!python -m textblob.download_corpora

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Unzipping corpora/conll2000.zip.
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
Finished.


In [10]:
# Tokenization

from textblob import TextBlob

blob = TextBlob('Tokenization refers to dividing text or a sentence into a sequence of tokens. which roughly correspond to words')

print(blob.sentences)

for words in blob.sentences[0].words:
  print(words)

[Sentence("Tokenization refers to dividing text or a sentence into a sequence of tokens."), Sentence("which roughly correspond to words")]
Tokenization
refers
to
dividing
text
or
a
sentence
into
a
sequence
of
tokens


In [11]:
# Noun Phrase Extraction

blob = TextBlob('Since we extracted the words in the previous section, instead of that we can just extract out the noun phrases from the textblob')
for np in blob.noun_phrases:
  print(np)

previous section
noun phrases


In [12]:
# Part of speech Tagging
for words, tag in blob.tags:
  print(words, tag)

Since IN
we PRP
extracted VBD
the DT
words NNS
in IN
the DT
previous JJ
section NN
instead RB
of IN
that IN
we PRP
can MD
just RB
extract VB
out RP
the DT
noun JJ
phrases NNS
from IN
the DT
textblob NN


In [22]:
# Words Inflection and Lemmatization
# Inflection is a process of word formation in which characters are added to 
# the base form of a word to express grammatical meanings

blob = TextBlob('There is an accident on the street. It helps if someone call the ambulance')

print(blob.sentences[1].words[1])
print(blob.sentences[1].words[1].singularize())

helps
help


In [24]:
#TextBlob library also offers an in-build object known as Word
# We just need to create an object and apply the function to it.
from textblob import Word
w = Word('station')
w.pluralize()

'stations'

In [25]:
for word, pos in blob.tags:
  if pos == 'NN':
    print(word.pluralize())

accidents
streets
someones
ambulances


In [29]:
# Lemmatization
w = Word('running')
w.lemmatize('v')

'run'

In [31]:
# N-grams in Textblob

for n in blob.ngrams(2):
  print(n)

['There', 'is']
['is', 'an']
['an', 'accident']
['accident', 'on']
['on', 'the']
['the', 'street']
['street', 'It']
['It', 'helps']
['helps', 'if']
['if', 'someone']
['someone', 'call']
['call', 'the']
['the', 'ambulance']


# Sentiment Analysis
The sentiment function of textblob returns two properties, polarity, and subjectivity.
Polarity is float which lies in the range of [-1,1] where 1 means positive statement and -1 means a negative statement. Subjective sentences generally refer to personal opinion, emotion or judgment whereas objective refers to factual information. Subjectivity is also a float which lies in the range of [0,1].

In [33]:
blob = TextBlob('This is amazing')
print(blob)
blob.sentiment


This is amazing


Sentiment(polarity=0.6000000000000001, subjectivity=0.9)