In [1]:
import textblob
from textblob import TextBlob
from textblob.classifiers import NaiveBayesClassifier

In [2]:
print("version of textblob: ", textblob.__version__)

version of textblob:  0.17.1


In [3]:
# The textblob.classifiers module makes it simple to create custom classifiers.

# As an example, let’s create a custom sentiment analyzer.


# Loading Data and Creating a Classifier

train = [
     ('I love this sandwich.', 'pos'),
     ('this is an amazing place!', 'pos'),
     ('I feel very good about these beers.', 'pos'),
     ('this is my best work.', 'pos'),
     ("what an awesome view", 'pos'),
     ('I do not like this restaurant', 'neg'),
     ('I am tired of this stuff.', 'neg'),
     ("I can't deal with this", 'neg'),
     ('he is my sworn enemy!', 'neg'),
     ('my boss is horrible.', 'neg')
 ]

test = [
     ('the beer was good.', 'pos'),
     ('I do not enjoy my job', 'neg'),
     ("I ain't feeling dandy today.", 'neg'),
     ("I feel amazing!", 'pos'),
     ('Gary is a friend of mine.', 'pos'),
     ("I can't believe I'm doing this.", 'neg')
 ]

In [4]:
model = NaiveBayesClassifier(train_set=train)

In [5]:
# Evaluating Classifiers
# To compute the accuracy on our test set, use the accuracy(test_data) method.

model.accuracy(test_set=test)

0.8333333333333334

In [6]:
# classify new sentence sentiment

model.classify(text="She is very good girl")

'pos'

In [7]:
model.classify(text="The book is good but I don't like it")

'neg'

In [8]:
# You can get the label probability distribution with the prob_classify(text) method.

prob_dist = model.prob_classify("This one's a doozy.")

In [9]:
prob_dist

<ProbDist with 2 samples>

In [10]:
type(prob_dist)

nltk.probability.DictionaryProbDist

In [11]:
prob_dist.max()

'pos'

In [12]:
round(prob_dist.prob(sample='pos'), 2)

0.63

In [13]:
round(prob_dist.prob(sample='neg'), 2)

0.37

In [14]:
# Classifying TextBlobs

# Another way to classify text is to pass a classifier into the constructor of TextBlob and call its classify() method.


blob = TextBlob(text="The beer is good. But the hangover is horrible.", classifier=model)

In [15]:
blob.classify()

'pos'

In [16]:
for sent in blob.sentences:
    print(f"{sent} : {sent.classify()}")

The beer is good. : pos
But the hangover is horrible. : neg


In [17]:
# Use the show_informative_features() method to display a listing of the most informative features.

model.show_informative_features(5)

Most Informative Features
            contains(my) = True              neg : pos    =      1.7 : 1.0
            contains(an) = False             neg : pos    =      1.6 : 1.0
             contains(I) = False             pos : neg    =      1.4 : 1.0
             contains(I) = True              neg : pos    =      1.4 : 1.0
            contains(my) = False             pos : neg    =      1.3 : 1.0


In [18]:
# Updating Classifiers with New Data
# Use the update(new_data) method to update a classifier with new training data.

new_data = [('She is my best friend.', 'pos'),
             ("I'm happy to have a new friend.", 'pos'),
             ("Stay thirsty, my friend.", 'pos'),
             ("He ain't from around here.", 'neg')]

model.update(new_data=new_data)

True

In [19]:
model.accuracy(test_set=test)

1.0