In [1]:
!pip install textblob



In [2]:
!python -m textblob.download_corpora

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Unzipping corpora/conll2000.zip.
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
Finished.


In [3]:
import textblob
from textblob import TextBlob

In [4]:
wiki=TextBlob('I love natural language processing not you!')

In [5]:
wiki.tags

[('I', 'PRP'),
 ('love', 'VBP'),
 ('natural', 'JJ'),
 ('language', 'NN'),
 ('processing', 'NN'),
 ('not', 'RB'),
 ('you', 'PRP')]

In [6]:
wiki.noun_phrases

WordList(['natural language processing'])

In [7]:
testimonial = TextBlob('TextBlob is amazingly simple to use what a great fun!')
testimonial.sentiment

Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)

In [8]:
testimonial.sentiment.subjectivity

0.4357142857142857

In [9]:
zen = TextBlob('Data is a new fuel. \n Explicit is better than implicit. \n Simple is better than complex.')
zen.words

WordList(['Data', 'is', 'a', 'new', 'fuel', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex'])

In [10]:
zen.sentences

[Sentence("Data is a new fuel."),
 Sentence("Explicit is better than implicit."),
 Sentence("Simple is better than complex.")]

In [11]:
for sentence in zen.sentences:
  print(sentence)

Data is a new fuel.
Explicit is better than implicit.
Simple is better than complex.


In [12]:
sentence = TextBlob('use 4 spaces per indentation level')
sentence.words

WordList(['use', '4', 'spaces', 'per', 'indentation', 'level'])

In [13]:
sentence.words[2].singularize()

'space'

In [14]:
sentence.words[0].pluralize()

'uses'

In [15]:
from textblob import Word
q = Word('lions')
q = q.lemmatize()
print(q)

lion


In [16]:
q = Word('went')
q.lemmatize("v")

'go'

In [17]:
Word('length').definitions

['the linear extent in space from one end to the other; the longest dimension of something that is fixed in place',
 'continuance in time',
 'the property of being the extent of something from beginning to end',
 'size of the gap between two places',
 'a section of something that is long and narrow']

In [18]:
from textblob.wordnet import Synset
octopus = Synset('Octopus.n.02')
shrimp = Synset("Shrimp.n.03")
octopus.path_similarity(shrimp)

0.1111111111111111

In [19]:
animals = TextBlob("Cow Sheep Octopus")
animals.words

WordList(['Cow', 'Sheep', 'Octopus'])

In [20]:
animals.words.pluralize()

WordList(['Cows', 'Sheeps', 'Octopera'])

In [21]:
wg = TextBlob('You aree verry badd')
wg.correct()

TextBlob("You are very bad")

In [22]:
wg.words[1].spellcheck()

[('are', 0.8335246842709529),
 ('free', 0.09667049368541906),
 ('area', 0.03742824339839265),
 ('agree', 0.017680826636050518),
 ('tree', 0.009644087256027554),
 ('aren', 0.004362801377726751),
 ('armee', 0.0006888633754305396)]

In [23]:
from textblob import Word
k = Word('Longitudea')
k.spellcheck()

[('Longitude', 1.0)]

In [24]:
sent = TextBlob('She sales sea shells at the sea shore')
sent.word_counts['sea']

2

In [25]:
sent.words.count('Sea')

2

In [26]:
sent.words.count('Sea',case_sensitive=True)

0

In [27]:
sent.noun_phrases.count('Sea')

0

In [28]:
blob=TextBlob(u'something is better than nothing')
blob.translate(to='hi')

AttributeError: 'list' object has no attribute 'strip'

In [29]:
chinese_blob = TextBlob(u'嗨，我是尤瓦')
chinese_blob.translate(from_lang='Zh-CN',to='En')

TextBlob("Hi, I am Uva")

In [30]:
!pip install --upgrade textblob

Collecting textblob
  Downloading textblob-0.18.0.post0-py3-none-any.whl.metadata (4.5 kB)
Downloading textblob-0.18.0.post0-py3-none-any.whl (626 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m626.3/626.3 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: textblob
  Attempting uninstall: textblob
    Found existing installation: textblob 0.17.1
    Uninstalling textblob-0.17.1:
      Successfully uninstalled textblob-0.17.1
Successfully installed textblob-0.18.0.post0


In [31]:
d=TextBlob("This a good story")
d = d.detect_language()
print(d)

HTTPError: HTTP Error 400: Bad Request

In [32]:
zen[0:15]

TextBlob("Data is a new f")

In [33]:
zen.upper()

TextBlob("DATA IS A NEW FUEL. 
 EXPLICIT IS BETTER THAN IMPLICIT. 
 SIMPLE IS BETTER THAN COMPLEX.")

In [34]:
zen.find('than')

41

In [35]:
a_blob = TextBlob('apple')
s_blob = TextBlob('samsung')
a_blob < s_blob

True

In [36]:
a_blob + ' and ' + s_blob

TextBlob("apple and samsung")

In [37]:
"{0} and {1}".format(a_blob, s_blob)

'apple and samsung'

In [38]:
blob = TextBlob("Now is better than never")
blob.ngrams(n=3)

[WordList(['Now', 'is', 'better']),
 WordList(['is', 'better', 'than']),
 WordList(['better', 'than', 'never'])]

In [39]:
for k in zen.sentences:
  print(k)
  print('\ .... starts at index{{, Ends at index {{'.format(k.start, k.end))

Data is a new fuel.
\ .... starts at index{, Ends at index {
Explicit is better than implicit.
\ .... starts at index{, Ends at index {
Simple is better than complex.
\ .... starts at index{, Ends at index {


In [40]:
train = [
    ('I love this sand which', 'pos'),
     ('This is an amazing place', 'pos'),
      ('I feel very good about these beers', 'pos'),
      ('Tom Holland is a terrible spiderman.','pos'),
            ('a terrible Javert (Russell Crowe) ruined Les Miserables for me...','pos'),
            ('The Dark Knight Rises is the greatest superhero movie ever!','neg'),
            ('Fantastic Four should have never been made.','pos'),
            ('Wes Anderson is my favorite director!','neg'),
            ('Captain America 2 is pretty awesome.','neg'),
            ('Let\s pretend "Batman and Robin" never happened..','pos'),
            ]
test = [('we have to take a picture and paste on jupyter notebook','pos'),
        ('Superman was never an interesting character.','pos'),
           ('Fantastic Mr Fox is an awesome film!','neg'),
           ('Dragonball Evolution is simply terrible!!','pos')
           ]

In [41]:
from textblob import classifiers
cl = classifiers.NaiveBayesClassifier(train)

In [42]:
cl.accuracy(test)

0.75

In [43]:
cl.show_informative_features(1)

Most Informative Features
            contains(is) = True              neg : pos    =      2.8 : 1.0


In [44]:
print (cl.accuracy(test))
cl.show_informative_features(3)

0.75
Most Informative Features
            contains(is) = True              neg : pos    =      2.8 : 1.0
             contains(2) = False             pos : neg    =      1.5 : 1.0
       contains(America) = False             pos : neg    =      1.5 : 1.0


In [45]:
cl.classify("This is an amazing Library!")

'pos'

In [46]:
def get_probabilities(text):
    probabilities = cl.prob_classify(text)
    return {
        'pos': round(probabilities.prob('pos'), 2),
        'neg': round(probabilities.prob('neg'), 2)
    }

probabilities = get_probabilities('the weather is terrible!')
print(f'Probability of Positive: {probabilities["pos"]}')
print(f'Probability of Negative: {probabilities["neg"]}')

Probability of Positive: 0.94
Probability of Negative: 0.06


In [47]:
prob_dist = cl.prob_classify("Iam Suffering from cough and cold.")
prob_dist.max()

'pos'

In [48]:
round(prob_dist.prob('neg'),2)

0.0

In [49]:
round(prob_dist.prob('pos'),2)

1.0

In [50]:
blob=TextBlob('Alcohol is good But the hangover is horrible',classifier=cl)

In [51]:
blob.classify()

'pos'

In [52]:
for b in blob.sentences:
  print(b)
  print(b.classify())

Alcohol is good But the hangover is horrible
pos


In [53]:
cl.accuracy(test)

0.75

In [54]:
cl.show_informative_features(1)

Most Informative Features
            contains(is) = True              neg : pos    =      2.8 : 1.0


In [55]:
new_data = [('She is my best friend','pos'),
 ("I'm happy to have a new friend",'pos'),
  ('Stay thirsty, my friend', 'pos'),
   ("He ain't from around here",'neg')]

In [56]:
cl.update(new_data)

True

In [57]:
cl.accuracy(test)

0.75