## Libraries

In [1]:
from textblob import TextBlob, Word

## Tokenization

In [2]:
zen = TextBlob("Beautiful is better than ugly. "
               "Explicit is better than implicit. "
               "Simple is better than complex.")

In [3]:
zen.words

WordList(['Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex'])

In [4]:
zen.sentences

[Sentence("Beautiful is better than ugly."),
 Sentence("Explicit is better than implicit."),
 Sentence("Simple is better than complex.")]

## POS Tagging

In [5]:
wiki = TextBlob("Python is a high-level, general purpose programming language.")

In [6]:
wiki.tags

[('Python', 'NNP'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('high-level', 'JJ'),
 ('general', 'JJ'),
 ('purpose', 'NN'),
 ('programming', 'NN'),
 ('language', 'NN')]

## Noun Phrase

In [7]:
wiki.noun_phrases

WordList(['python', 'general purpose'])

## Inflection and Lemmatization

In [8]:
w = Word("octopi")
w.lemmatize()

'octopus'

In [9]:
w = Word("went")
w.lemmatize("v")

'go'

## Singularize - Pluralize

In [10]:
sentence = TextBlob('Use 4 spaces per indentation level.')
sentence.words[2].singularize()

'space'

In [11]:
TextBlob('cars').words.singularize()

WordList(['car'])

In [12]:
TextBlob('bat').words.pluralize()

WordList(['bats'])

## Spelling Correction

In [13]:
b = TextBlob("I havv goood speling!")
print(b.correct())

I have good spelling!


In [14]:
w = Word('falibility')
w.spellcheck()

[('fallibility', 1.0)]

In [15]:
w = Word('aple')
w.spellcheck()

[('able', 0.5140664961636828),
 ('pale', 0.4219948849104859),
 ('apple', 0.028132992327365727),
 ('ample', 0.023017902813299233),
 ('ape', 0.010230179028132993),
 ('ale', 0.0025575447570332483)]

## Word Count

In [16]:
monty = TextBlob("hello world Hello hello hey hell")  
monty.word_counts['hell']

1

In [17]:
monty.words.count('hello')

3

In [18]:
monty.words.count('hello', case_sensitive=True)

2

In [19]:
wiki.noun_phrases.count('world')

0

## Language

In [20]:
TextBlob(u"بسيط هو أفضل من مجمع")
b.detect_language()

'da'

In [21]:
en_blob = TextBlob(u'Simple is better than complex.')
en_blob.translate(to='es')

TextBlob("Simple es mejor que complejo.")

In [22]:
fr = TextBlob(u"Je vais tres bien")
fr.translate(from_lang="fr", to='en')

TextBlob("I am very well")

## n-grams

In [23]:
blob = TextBlob("Now is better than never.")
blob.ngrams(n=3)

[WordList(['Now', 'is', 'better']),
 WordList(['is', 'better', 'than']),
 WordList(['better', 'than', 'never'])]

## Sentiment

In [24]:
# Sentiment property returns a namedtuple of the form Sentiment(polarity, subjectivity). 
# The polarity score is a float within the range [-1.0, 1.0].
#   where -1.0 is very negative and 1.0 is very positive
# The subjectivity is a float within the range [0.0, 1.0] 
#    where 0.0 is very objective and 1.0 is very subjective.

In [25]:
test1 = TextBlob("Textblob is amazingly simple to use. What great fun!")
test1.sentiment

Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)

In [26]:
test2 = TextBlob("Exams were very tough and the students got bad grades")
test2.sentiment

Sentiment(polarity=-0.6027777777777776, subjectivity=0.8333333333333333)

In [27]:
# Sentence objects we get from sentence tokenizer have the same properties and methods as TextBlobs.

for sentence in zen.sentences:
    print(sentence.sentiment)

Sentiment(polarity=0.2166666666666667, subjectivity=0.8333333333333334)
Sentiment(polarity=0.5, subjectivity=0.5)
Sentiment(polarity=0.06666666666666667, subjectivity=0.41904761904761906)
