## Tokenization

In [1]:
from textblob import TextBlob
blob = TextBlob("This is a great platform to learn data science. \n It helps community through blogs, hackathons, discussions, etc.")

In [2]:
print(blob.sentences)

[Sentence("This is a great platform to learn data science."), Sentence("It helps community through blogs, hackathons, discussions, etc.")]


In [3]:
print(blob.sentences[0])

This is a great platform to learn data science.


In [4]:
for word in blob.sentences[0].words:
    print(word)

This
is
a
great
platform
to
learn
data
science


## Noun Phrase Extraction

In [5]:
blob = TextBlob("This is a great platform to learn data science.")
for np in blob.noun_phrases:
    print(np)

great platform
data science


## Part-of-speech Tagging

In [6]:
for word, tag in blob.tags:
    print(word, tag)

This DT
is VBZ
a DT
great JJ
platform NN
to TO
learn VB
data NNS
science NN


## Words Inflection and Lemmatization

In [7]:
blob = TextBlob("This is a great platform to learn data science. \n It helps community through blogs, hackathons, discussions, etc.")

In [8]:
print(blob.sentences[1].words[1].singularize())

help


In [9]:
from textblob import Word
w = Word('Platform')
print(w.pluralize())

Platforms


In [10]:
for word, pos in blob.tags:
    if pos == 'NN':
        print(word.pluralize())

platforms
sciences
communities


## N-grams

In [11]:
blob = TextBlob("This is a great platform to learn data science.")
for ngram in blob.ngrams(2):
    print(ngram)

['This', 'is']
['is', 'a']
['a', 'great']
['great', 'platform']
['platform', 'to']
['to', 'learn']
['learn', 'data']
['data', 'science']


## Sentiment Analysis

In [12]:
print(blob.sentiment)

Sentiment(polarity=0.8, subjectivity=0.75)


## Spelling correction

In [13]:
blob = TextBlob('This is a gret platfrm to learn data scence.')
print(blob.correct())

His is a great platform to learn data science.


In [14]:
print(blob.words[4].spellcheck())

[('platform', 1.0)]


## Summary of a text

In [15]:
import random

blob = TextBlob('This is a thriving community for data driven industry. This platform allows people to know more about analytics from its articles, Q&A forum, and learning paths. Also, we help professionals & amateurs to sharpen their skillsets by providing a platform to participate in Hackathons.')

nouns = list()
for word, tag in blob.tags:
    if tag == 'NN':
        nouns.append(word.lemmatize())

In [16]:
print('This text is about...')
for item in random.sample(nouns, 5):
    word = Word(item)
    print(word.pluralize())

This text is about...
communities
industries
platforms
forums
platforms


## Text classification using TextBlob

In [17]:
training = [
('Tom Holland is a terrible spiderman.','pos'),
('a terrible Javert (Russell Crowe) ruined Les Miserables for me...','pos'),
('The Dark Knight Rises is the greatest superhero movie ever!','neg'),
('Fantastic Four should have never been made.','pos'),
('Wes Anderson is my favorite director!','neg'),
('Captain America 2 is pretty awesome.','neg'),
('Let\s pretend "Batman and Robin" never happened..','pos'),
]

testing = [
('Superman was never an interesting character.','pos'),
('Fantastic Mr Fox is an awesome film!','neg'),
('Dragonball Evolution is simply terrible!!','pos')
]

In [18]:
from textblob import classifiers
classifier = classifiers.NaiveBayesClassifier(training)
print(classifier.accuracy(testing))

1.0


In [19]:
classifier.show_informative_features(3)

Most Informative Features
            contains(is) = True              neg : pos    =      2.9 : 1.0
         contains(never) = False             neg : pos    =      1.8 : 1.0
             contains(a) = False             neg : pos    =      1.8 : 1.0
