In [1]:
!pip install textblob



In [2]:
!python -m textblob.download_corpora

[nltk_data] Downloading package brown to
[nltk_data]     /home/aman.satyawali/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /home/aman.satyawali/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /home/aman.satyawali/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/aman.satyawali/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package conll2000 to
[nltk_data]     /home/aman.satyawali/nltk_data...
[nltk_data]   Package conll2000 is already up-to-date!
[nltk_data] Downloading package movie_reviews to
[nltk_data]     /home/aman.satyawali/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
Finished.


In [3]:
from textblob import TextBlob

In [4]:
a = TextBlob('I like AI for the works. Getting good at it')

In [5]:
#word tokenization
a.words

WordList(['I', 'like', 'AI', 'for', 'the', 'works', 'Getting', 'good', 'at', 'it'])

In [6]:
#Sentence tokenization
a.sentences

[Sentence("I like AI for the works."), Sentence("Getting good at it")]

In [7]:
# noun phrase esxtraction
a.noun_phrases

WordList(['ai'])

In [8]:
#word inflection
sent = TextBlob('Use 4 spaces per indentation level')

In [9]:
sent.words

WordList(['Use', '4', 'spaces', 'per', 'indentation', 'level'])

In [10]:
sent.words[2].singularize()

'space'

In [11]:
sent.words[2].pluralize() # hardcoded to just add 's' at the end of the word

'spacess'

In [12]:
sent = TextBlob('World cup t20 first match will be played betweer Pakistan and India')

In [13]:
#pos_tagging
sent.pos_tags

[('World', 'NNP'),
 ('cup', 'NN'),
 ('t20', 'NN'),
 ('first', 'JJ'),
 ('match', 'NN'),
 ('will', 'MD'),
 ('be', 'VB'),
 ('played', 'VBN'),
 ('betweer', 'NN'),
 ('Pakistan', 'NNP'),
 ('and', 'CC'),
 ('India', 'NNP')]

In [14]:
#stemming
from textblob import Word

q = Word('history')
q.stem()

'histori'

In [15]:
#lemmatization
q = Word('went')
q.lemmatize('v')

'go'

In [16]:
#definitions
Word('regularization').definitions

['the condition of having been made regular (or more regular)',
 'the act of bringing to uniformity; making regular']

In [17]:
#spelling correction

g = TextBlob('Can you pronounce Chzechoslovakia')
g.correct()

TextBlob("An you pronounce Czechoslovakia")

In [18]:
Word('gre').spellcheck()

[('are', 0.8943089430894309),
 ('re', 0.04656319290465632),
 ('grew', 0.04089677260408968),
 ('grey', 0.009608277900960829),
 ('pre', 0.003695491500369549),
 ('grm', 0.0017245627001724563),
 ('gr', 0.0009854644000985464),
 ('ore', 0.0007390983000739098),
 ('grs', 0.0004927322000492732),
 ('ire', 0.0002463661000246366),
 ('gee', 0.0002463661000246366),
 ('gare', 0.0002463661000246366),
 ('ere', 0.0002463661000246366)]

In [19]:
#word frequency

sent = TextBlob('She sells sea shells on the sea shore')
print(sent.word_counts)
print(sent.word_counts['Sea'])
print(sent.words.count('Sea', case_sensitive = False))

defaultdict(<class 'int'>, {'she': 1, 'sells': 1, 'sea': 2, 'shells': 1, 'on': 1, 'the': 1, 'shore': 1})
0
2


In [23]:
#Translation and language detection
sent = TextBlob('Something is better than nothing')
print(sent.translate(to = 'hi'))
print(sent.detect_language())

कुछ नहीं से कुछ भला
en


<h3>Text classification system</h3>

In [26]:
#Loading data and creating a classifier

train = [
     ('I love this sandwich.', 'pos'),
     ('this is an amazing place!', 'pos'),
     ('I feel very good about these beers.', 'pos'),
     ('this is my best work.', 'pos'),
     ("what an awesome view", 'pos'),
     ('I do not like this restaurant', 'neg'),
     ('I am tired of this stuff.', 'neg'),
     ("I can't deal with this", 'neg'),
     ('he is my sworn enemy!', 'neg'),
     ('my boss is horrible.', 'neg'),
     ('my boss is bad.', 'neg'),
    

]

test = [
     ('the beer was good.', 'pos'),
     ('I do not enjoy my job', 'neg'),
     ("I ain't feeling dandy today.", 'neg'),
     ("I feel amazing!", 'pos'),
     ('Gary is a friend of mine.', 'pos'),
     ("I can't believe I'm doing this.", 'neg')
]

In [29]:
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

In [32]:
cl.accuracy(test)

0.8333333333333334

In [34]:
cl.classify('You are horrible')

'neg'

In [39]:
cl.classify('The movie was bad')

'neg'

In [41]:
cl.accuracy(train)

1.0

In [43]:
# retraining of the model
new_data = [('She is my best friend.', 'pos'),
           ("I'm happy to have a new friend.", 'pos'),
           ("Stay thirsty, my friend.", 'pos'),             
           ("He ain't from around here.", 'neg')]

cl.update(new_data)

True

In [45]:
cl.accuracy(test)

1.0