In [2]:
!pip install textblob



In [3]:
!python -m textblob.download_corpora

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Unzipping corpora/conll2000.zip.
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
Finished.


In [4]:
# import
from textblob import TextBlob

In [5]:
#create textblob
wiki = TextBlob("Python is a high-level, general-purpose programming language.")

In [6]:
# accessing POS (parts of speech) with POS tagging
wiki.tags

[('Python', 'NNP'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('high-level', 'JJ'),
 ('general-purpose', 'JJ'),
 ('programming', 'NN'),
 ('language', 'NN')]

In [7]:
# noun phrase extraction
wiki.noun_phrases

WordList(['python'])

In [8]:
# sentiment analysis
testimonial=TextBlob("Textblob is amazingly simple to use. What great fun!")
testimonial.sentiment


Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)

In [9]:
testimonial.sentiment.polarity

0.39166666666666666

In [10]:
zen = TextBlob("Beautiful is better than ugly. "
"Explicit is better than implicit. "
"Simple is better than complex.")
zen.words

WordList(['Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex'])

In [11]:
zen.sentences

[Sentence("Beautiful is better than ugly."),
 Sentence("Explicit is better than implicit."),
 Sentence("Simple is better than complex.")]

In [12]:
for sent in zen.sentences:
  print(sent)

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.


In [13]:
# word inflection
sent = TextBlob('Use 4 spaces per indentation level.')
sent.words

WordList(['Use', '4', 'spaces', 'per', 'indentation', 'level'])

In [14]:
sent.words[2].singularize()

'space'

In [15]:
sent.words[4].pluralize()

'indentations'

In [16]:
# word lemmatization
from textblob import Word
w = Word('running')
w.lemmatize()

'running'

In [17]:
w = Word('went')
w.lemmatize('v')

'go'

In [18]:
# wordnet ingeretion

In [19]:
Word('length').definitions

['the linear extent in space from one end to the other; the longest dimension of something that is fixed in place',
 'continuance in time',
 'the property of being the extent of something from beginning to end',
 'size of the gap between two places',
 'a section of something that is long and narrow']

In [20]:
from textblob.wordnet import Synset
octopus = Synset('octopus.n.02')
shrimp = Synset('shrimp.n.03')
octopus.path_similarity(shrimp)


0.1111111111111111

In [21]:
# word list
animals = TextBlob("cat dog octopus")
animals.words


WordList(['cat', 'dog', 'octopus'])

In [22]:
animals.words.pluralize()

WordList(['cats', 'dogs', 'octopodes'])

In [23]:
#spelling correction
g= TextBlob('Can you pronounce czechuslovakia?')
g.correct()

TextBlob("An you pronounce czechoslovakia?")

In [24]:
from textblob import Word
w = Word('longitoude')
w.spellcheck()

[('longitude', 1.0)]

In [25]:
sent = TextBlob('She sells sea shells at the sea shore.')
sent.word_counts['sea']


2

In [26]:
sent.words.count('sea')

2

In [27]:
#Translation and Language Detection

blob = TextBlob("hello")
blob.translate(from_lang='en', to='fr')

TextBlob("Bonjour")

In [28]:
d = TextBlob("Bonjour")
d.detect_language

In [29]:
# n-grams
# The TextBlob.ngrams() method returns a list of tuples of n successive words.


blob = TextBlob("Now is better than never.")
blob.ngrams(n=3)

[WordList(['Now', 'is', 'better']),
 WordList(['is', 'better', 'than']),
 WordList(['better', 'than', 'never'])]

In [30]:
# Get Start and End Indices of Sentences
# Use sentence.start and sentence.end to get the indices where a sentence starts and ends within a TextBlob

zen = TextBlob("Beautiful is better than ugly. "
"Explicit is better than implicit. "
"Simple is better than complex.")
for k in zen.sentences:
    print(k)
    print("---- Starts at index {}, Ends at index {}".format(k.start, k.end))

Beautiful is better than ugly.
---- Starts at index 0, Ends at index 30
Explicit is better than implicit.
---- Starts at index 31, Ends at index 64
Simple is better than complex.
---- Starts at index 65, Ends at index 95


In [32]:
# Text Classification system
# The textblob.classifiers module makes it simple to create custom classifiers.

train = [
       ('I love this sandwich.', 'pos'),
       ('this is an amazing place!', 'pos'),
       ('I feel very good about these beers.', 'pos'),
       ('this is my best work.', 'pos'),
       ("what an awesome view", 'pos'),
       ('I do not like this restaurant', 'neg'),
       ('I am tired of this stuff.', 'neg'),
       ("I can't deal with this", 'neg'),
       ('he is my sworn enemy!', 'neg'),
]
test = [
       ('the beer was good.', 'pos'),
       ('I do not enjoy my job', 'neg'),
       ("I ain't feeling dandy today.", 'neg'),
       ("I feel amazing!", 'pos'),
       ('Gary is a friend of mine.', 'pos'),
       ("I can't believe I'm doing this.", 'neg')]

In [33]:
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)


In [34]:
cl.classify("This is an amazing library!")

'pos'

In [35]:
prob_dist = cl.prob_classify("This one's a doozy.")
prob_dist.max()

'pos'

In [36]:
prob_dist = cl.prob_classify("I am suffering from cold")
prob_dist.max()

round(prob_dist.prob("pos"), 2)

round(prob_dist.prob("neg"), 2)

0.69

In [37]:
# Classifying TextBlobs
# Another way to classify text is to pass a classifier into the constructor of TextBlob and call its classify() method.

from textblob import TextBlob
blob = TextBlob("Alcohal is good. But the hangover is horrible.", classifier=cl)
blob.classify()

for s in blob.sentences:
    print(s)
    print(s.classify())

Alcohal is good.
pos
But the hangover is horrible.
pos


In [38]:
# Evaluating Classifiers
# To compute the accuracy on our test set, use the accuracy(test_data) method.

cl.accuracy(test)

1.0