In [48]:
from textblob import TextBlob
import nltk
nltk.download('averaged_perceptron_tagger')  
nltk.download('wordnet')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\z023208\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\z023208\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\wordnet.zip.


True

## Creating a textblob object

In [3]:
blob =  TextBlob('Test Leaf is a great platform to learn technology.')

## textblobs are like python strings

In [4]:
blob[1:5]

TextBlob("est ")

In [5]:
blob.upper()

TextBlob("TEST LEAF IS A GREAT PLATFORM TO LEARN TECHNOLOGY.")

In [6]:
blob.lower()

TextBlob("test leaf is a great platform to learn technology.")

In [7]:
blob.detect_language()

'en'

In [8]:
blob2 = TextBlob("It also helps community through Class, discussions,etc.")

## Concat

In [9]:
blob + " And " + blob2

TextBlob("Test Leaf is a great platform to learn technology. And It also helps community through Class, discussions,etc.")

## Tokenization

In [10]:
blob = TextBlob("Test Leaf is a great platform to learn technology.\n It helps community through through Class, discussions,etc.")

In [12]:
blob.sentences

[Sentence("Test Leaf is a great platform to learn technology."),
 Sentence("It helps community through through Class, discussions,etc.")]

In [13]:
blob.sentences[0]

Sentence("Test Leaf is a great platform to learn technology.")

In [14]:
blob.sentences[0].words

WordList(['Test', 'Leaf', 'is', 'a', 'great', 'platform', 'to', 'learn', 'technology'])

In [15]:
for word in blob.sentences[0].words:
    print(word)

Test
Leaf
is
a
great
platform
to
learn
technology


## Noun phrase extraction

In [25]:
blob = TextBlob("Analytics Vidhya is a great platform to learn data science.")

In [26]:
for np in blob.tags:
    print (np)

('Analytics', 'NNS')
('Vidhya', 'NNP')
('is', 'VBZ')
('a', 'DT')
('great', 'JJ')
('platform', 'NN')
('to', 'TO')
('learn', 'VB')
('data', 'NNS')
('science', 'NN')


## POS tagging

In [27]:
for words, tag in blob.tags:
    print (words, tag)

Analytics NNS
Vidhya NNP
is VBZ
a DT
great JJ
platform NN
to TO
learn VB
data NNS
science NN


## Sentiment Analysis

In [28]:
print (blob)
blob.sentiment

Analytics Vidhya is a great platform to learn data science.


Sentiment(polarity=0.8, subjectivity=0.75)

In [36]:
def sentiment_textblob(feedback): 
    senti = TextBlob(feedback) 
    polarity = senti.sentiment.polarity 
    if -1 <= polarity < -0.5: 
        label = 'very bad' 
    elif -0.5 <= polarity < -0.1: 
        label = 'bad' 
    elif -0.1 <= polarity < 0.2: 
        label = 'ok' 
    elif 0.2 <= polarity < 0.6: 
        label = 'good' 
    elif 0.6 <= polarity <= 1: 
        label = 'positive' 
    return (polarity, label) 

In [37]:
sentiment = sentiment_textblob('Analytics Vidhya is not a great platform to learn data science.')

In [38]:
print(sentiment)

(-0.4, 'bad')


In [39]:
sentiment = sentiment_textblob('Analytics Vidhya is  a great platform to learn data science.')

In [40]:
print(sentiment)

(0.8, 'positive')


## Word Inflection and Lemmatization

In [41]:
blob = TextBlob("Test Leaf is a great platform to learn data science. \n It helps community through blogs, hackathons, discussions,etc.")
print (blob.sentences[1].words[1])
print (blob.sentences[1].words[1].singularize())

helps
help


## Plural 

In [43]:
from textblob import Word
w = Word('cat')
w.pluralize()

'cats'

In [44]:
w = Word('woman')
w.pluralize()

'women'

In [45]:
w = Word('wolf')
w.pluralize()

'wolves'

In [46]:
## using tags
for word,pos in blob.tags:
    if pos == 'NN':
        print (word.pluralize())

platforms
sciences
communities


## lemmatization

In [49]:
w = Word('jokes')
w.lemmatize()  ## v here represents verb

'joke'

In [53]:
w = Word('cats')
w.lemmatize() 

'cat'

In [54]:
w = Word('apples')
w.lemmatize() 

'apple'

## Ngrams

In [57]:
blob = TextBlob("I went to sri lanka")
for ngram in blob.ngrams(2):
    print (ngram)

['I', 'went']
['went', 'to']
['to', 'sri']
['sri', 'lanka']


## Spelling correction

In [58]:
blob = TextBlob('Inceptez is a gret platfrm to lern data scence')
blob.correct()

TextBlob("Inceptez is a great platform to learn data science")

In [59]:
blob.words[3].spellcheck()

[('great', 0.5351351351351351),
 ('get', 0.3162162162162162),
 ('grew', 0.11216216216216217),
 ('grey', 0.026351351351351353),
 ('greet', 0.006081081081081081),
 ('fret', 0.002702702702702703),
 ('grit', 0.0006756756756756757),
 ('cret', 0.0006756756756756757)]

## Creating a short summary from a text

In [60]:
import random

blob = TextBlob('Inceptez is a thriving community for data driven industry. This platform allows \
    people to know more about analytics from its articles, Q&A forum, and learning paths. Also, we help \
    professionals & amateurs to sharpen their skillsets by providing a platform to participate in Hackathons.')

In [61]:
nouns = list()
for word, tag in blob.tags:
    if tag == 'NN':
        nouns.append(word.lemmatize())

print ("This text is about...")
for item in random.sample(nouns, 5):
    word = Word(item)
    print (word.pluralize())

This text is about...
platforms
communities
forums
industries
platforms


## Language Translation

In [62]:
blob = TextBlob('Hi Hope you are doing good?')

In [63]:
blob.translate(to ='es')

TextBlob("Hola espero que estés bien")

In [64]:
blob1 = TextBlob('هذا رائع')

In [65]:
blob1.detect_language()

'ar'

In [66]:
blob = TextBlob("¿Hola como estás?")
blob.detect_language()


'es'

In [67]:
blob1.translate(from_lang='ar', to ='en')

TextBlob("that's cool")

In [68]:
blob.translate(to= 'en')

TextBlob("Hello how are you doing?")

## Text Classification using textblob

In [69]:
training = [
            ('Tom Holland is a terrible spiderman.','neg'),
            ('a terrible Javert (Russell Crowe) ruined Les Miserables for me...','neg'),
            ('The Dark Knight Rises is the greatest superhero movie ever!','pos'),
            ('Fantastic Four should have never been made.','neg'),
            ('Wes Anderson is my favorite director!','pos'),
            ('Captain America 2 is pretty awesome.','pos'),
            ('Let\s pretend "Batman and Robin" never happened..','neg'),
            ]
testing = [
           ('Superman was never an interesting character.','neg'),
           ('Fantastic Mr Fox is an awesome film!','pos'),
           ('Dragonball Evolution is simply terrible!!','neg')
           ]

In [70]:
from textblob import classifiers

In [71]:
classifier = classifiers.NaiveBayesClassifier(training)

In [72]:
print (classifier.accuracy(testing))

1.0


In [76]:
classifier.show_informative_features()

Most Informative Features
            contains(is) = True              pos : neg    =      2.9 : 1.0
         contains(never) = False             pos : neg    =      1.8 : 1.0
      contains(terrible) = False             pos : neg    =      1.8 : 1.0
             contains(a) = False             pos : neg    =      1.8 : 1.0
        contains(pretty) = False             neg : pos    =      1.4 : 1.0
       contains(Captain) = False             neg : pos    =      1.4 : 1.0
             contains(2) = False             neg : pos    =      1.4 : 1.0
         contains(movie) = False             neg : pos    =      1.4 : 1.0
      contains(director) = False             neg : pos    =      1.4 : 1.0
        contains(Knight) = False             neg : pos    =      1.4 : 1.0


In [78]:
blob = TextBlob('I like Knight ', classifier=classifier)
print (blob.classify())

neg


In [81]:
blob = TextBlob('I like Knight ', classifier=classifier)
print (blob.sentiment)

Sentiment(polarity=0.0, subjectivity=0.0)
