## textblob

- [getting started guide](https://textblob.readthedocs.io/en/dev/quickstart.html)

In [31]:
from textblob import TextBlob

In [2]:
 wiki = TextBlob("Python is a high-level, general-purpose programming language.")

#### POS Tagging

In [3]:
wiki.tags

[('Python', 'NNP'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('high-level', 'JJ'),
 ('general-purpose', 'JJ'),
 ('programming', 'NN'),
 ('language', 'NN')]

#### NP Extraction

In [4]:
wiki.noun_phrases

WordList(['python'])

#### Sentiment analysis

In [5]:
testimonial = TextBlob("Textblob is amazingly simple to use. What great fun!")

testimonial.sentiment

Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)

In [6]:
testimonial.sentiment.polarity

0.39166666666666666

#### Tokenization

In [34]:
zen = TextBlob("Beautiful is better than ugly. Explicit is better than implicit. Simple is better than complex.")
zen.words

WordList(['Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex'])

In [8]:
zen.sentences

[Sentence("Beautiful is better than ugly."),
 Sentence("Explicit is better than implicit."),
 Sentence("Simple is better than complex.")]

#### Lemmatization

In [9]:
sentence = TextBlob('Use 4 spaces per indentation level.')

sentence.words

WordList(['Use', '4', 'spaces', 'per', 'indentation', 'level'])

In [10]:
sentence.words[2].singularize()

'space'

In [11]:
sentence.words[-1].pluralize()

'levels'

In [12]:
from textblob import Word
w = Word("octopi")
w.lemmatize()

'octopus'

In [13]:
w = Word("went")
w.lemmatize("v")  # Pass in WordNet part of speech (verb)

'go'

#### WordNet Integration

In [14]:
from textblob import Word
from textblob.wordnet import VERB

word = Word("octopus")
word.synsets

[Synset('octopus.n.01'), Synset('octopus.n.02')]

In [15]:
Word("hack").get_synsets(pos=VERB)

[Synset('chop.v.05'),
 Synset('hack.v.02'),
 Synset('hack.v.03'),
 Synset('hack.v.04'),
 Synset('hack.v.05'),
 Synset('hack.v.06'),
 Synset('hack.v.07'),
 Synset('hack.v.08')]

#### Working with Word Lists

In [16]:
animals = TextBlob("cat dog octopus")
animals.words.pluralize()

WordList(['cats', 'dogs', 'octopodes'])

#### Spelling Correction

In [17]:
b = TextBlob("I havv goood speling!")

b.correct()

TextBlob("I have good spelling!")

In [18]:
w = Word('falibility')

w.spellcheck()

[('fallibility', 1.0)]

#### Syntactic Parsing

- uses the [pattern parser](http://www.clips.ua.ac.be/pages/pattern-en#parser)

In [19]:
b = TextBlob("And now for something completely different.")

b.parse()

'And/CC/O/O now/RB/B-ADVP/O for/IN/B-PP/B-PNP something/NN/B-NP/I-PNP completely/RB/B-ADJP/O different/JJ/I-ADJP/O ././O/O'

#### N-grams

In [20]:
blob = TextBlob("Now is better than never.")

blob.ngrams(n=3)

[WordList(['Now', 'is', 'better']),
 WordList(['is', 'better', 'than']),
 WordList(['better', 'than', 'never'])]

### Sentiment Classification with textblob

In [21]:
train = [
     ('I love this sandwich.', 'pos'),
     ('this is an amazing place!', 'pos'),
     ('I feel very good about these beers.', 'pos'),
     ('this is my best work.', 'pos'),
     ("what an awesome view", 'pos'),
     ('I do not like this restaurant', 'neg'),
     ('I am tired of this stuff.', 'neg'),
     ("I can't deal with this", 'neg'),
     ('he is my sworn enemy!', 'neg'),
     ('my boss is horrible.', 'neg')
 ]
test = [
     ('the beer was good.', 'pos'),
     ('I do not enjoy my job', 'neg'),
     ("I ain't feeling dandy today.", 'neg'),
     ("I feel amazing!", 'pos'),
     ('Gary is a friend of mine.', 'pos'),
     ("I can't believe I'm doing this.", 'neg')
 ]

In [22]:
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

cl.accuracy(test)

0.8333333333333334

In [23]:
cl.classify("This is an amazing library!")

'pos'

In [24]:
from textblob import TextBlob
blob = TextBlob("The beer is good. But the hangover is horrible.", classifier=cl)
blob.classify()

'pos'

In [25]:
for s in blob.sentences:
    print(s.classify(), s)

pos The beer is good.
neg But the hangover is horrible.


In [26]:
new_data = [('She is my best friend.', 'pos'),
            ("I'm happy to have a new friend.", 'pos'),
            ("Stay thirsty, my friend.", 'pos'),
            ("He ain't from around here.", 'neg')]
cl.update(new_data)

True

In [27]:
cl.accuracy(test)

1.0

#### Feature Extractors 

In [28]:
def end_word_extractor(document):
    tokens = document.split()
    first_word, last_word = tokens[0], tokens[-1]
    feats = {}
    feats["first({0})".format(first_word)] = True
    feats["last({0})".format(last_word)] = False
    return feats

features = end_word_extractor("I feel happy")
assert features == {'last(happy)': False, 'first(I)': True}

In [29]:
# when you reference a feature extractor, all train/test cases are passed through it
cl2 = NaiveBayesClassifier(test, feature_extractor=end_word_extractor)
blob = TextBlob("I'm excited to try my new classifier.", classifier=cl2)
blob.classify()

'pos'

In [30]:
cl2.accuracy(test)

1.0