In [1]:
!pip install textblob





In [2]:
from textblob import TextBlob
from textblob import Word
from textblob.taggers import NLTKTagger
from textblob.translate import Translator

import warnings
warnings.filterwarnings("ignore")

## Spelling correction

In [4]:
#make corrections from overr to over
r = TextBlob("The quick brown fox jump over the lazy dog")
print(r.correct())

The quick brown fox jump over the lazy dog


In [6]:
#make corrections from markeet to market
r = TextBlob("The boy hadd gone to markeet")
print(r.correct())

The boy had gone to market


## Counting word occurences

In [8]:
!python -m textblob.download_corpora

[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\brown.zip.
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.
[nltk_data] Downloading package conll2000 to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\conll2000.zip.
[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\movie_reviews.zip.
Finished.


In [9]:
#count the number of word occurences of word (dog)
r = TextBlob("The quick brown fox jump over the lazy dog and the Dog and cat were at the cat's home")
print(r.words.count('dog'))

2


In [10]:
print(r.words.count('cat'))

2


In [11]:
print(r.words.count('the'))

4


In [12]:
print(r.words.count('dog', case_sensitive=True))

1


In [13]:
print(r.words.count('the', case_sensitive=True))

3


## Lemmatization breaks words into their root form

In [14]:
e = Word('went')
print('e=', e.lemmatize("v"))

e= go


In [15]:
u = Word('octopi')
print('u=', u.lemmatize())

u= octopus


In [18]:
w = Word('begun')
print('w=', w.lemmatize("v"))

w= begin


## Tokenization breaks text into either their various words or sentences

In [19]:
#print the text into various words
m = TextBlob("The quick brown fox jump over the lazy dog and the Dog and cat were at the cat's home")
print(m.words)

['The', 'quick', 'brown', 'fox', 'jump', 'over', 'the', 'lazy', 'dog', 'and', 'the', 'Dog', 'and', 'cat', 'were', 'at', 'the', 'cat', "'s", 'home']


In [20]:
#print the text into various sentences
y = TextBlob("The quick brown fox jump over the lazy dog and the Dog and cat were at the cat's home. Python is great programming language. Simple living high thinking")
print(y.sentences)

[Sentence("The quick brown fox jump over the lazy dog and the Dog and cat were at the cat's home."), Sentence("Python is great programming language."), Sentence("Simple living high thinking")]


In [21]:
#find the noun phrase
#print the text into various sentences
i = TextBlob("The quick brown fox jump over the lazy dog and the Dog and cat were at the cat's home. Python is great programming language. Simple living high thinking")
print(i.noun_phrases)

['quick brown fox', 'lazy dog', 'dog', "cat 's home", 'python', 'simple']


In [26]:
#makes word plural
animals = TextBlob(" cat dog goat octopus child man lady")
print(animals.words.pluralize())

['cats', 'dogs', 'goats', 'octopodes', 'children', 'men', 'ladies']


## Bigrams ngrams, trigrams

Breaks text/sentences into various parts either in three, two or any specified number

In [27]:
#Break the sentence into bigrams
w = TextBlob("The quick brown fox jump over the lazy dog and the Dog and cat were at the cat's home. Python is great programming language. Simple living high thinking")
print(w.ngrams(2))

[WordList(['The', 'quick']), WordList(['quick', 'brown']), WordList(['brown', 'fox']), WordList(['fox', 'jump']), WordList(['jump', 'over']), WordList(['over', 'the']), WordList(['the', 'lazy']), WordList(['lazy', 'dog']), WordList(['dog', 'and']), WordList(['and', 'the']), WordList(['the', 'Dog']), WordList(['Dog', 'and']), WordList(['and', 'cat']), WordList(['cat', 'were']), WordList(['were', 'at']), WordList(['at', 'the']), WordList(['the', 'cat']), WordList(['cat', "'s"]), WordList(["'s", 'home']), WordList(['home', 'Python']), WordList(['Python', 'is']), WordList(['is', 'great']), WordList(['great', 'programming']), WordList(['programming', 'language']), WordList(['language', 'Simple']), WordList(['Simple', 'living']), WordList(['living', 'high']), WordList(['high', 'thinking'])]


In [28]:
#Break the sentence into trigrams
w = TextBlob("The quick brown fox jump over the lazy dog and the Dog and cat were at the cat's home. Python is great programming language. Simple living high thinking")
print(w.ngrams(3))

[WordList(['The', 'quick', 'brown']), WordList(['quick', 'brown', 'fox']), WordList(['brown', 'fox', 'jump']), WordList(['fox', 'jump', 'over']), WordList(['jump', 'over', 'the']), WordList(['over', 'the', 'lazy']), WordList(['the', 'lazy', 'dog']), WordList(['lazy', 'dog', 'and']), WordList(['dog', 'and', 'the']), WordList(['and', 'the', 'Dog']), WordList(['the', 'Dog', 'and']), WordList(['Dog', 'and', 'cat']), WordList(['and', 'cat', 'were']), WordList(['cat', 'were', 'at']), WordList(['were', 'at', 'the']), WordList(['at', 'the', 'cat']), WordList(['the', 'cat', "'s"]), WordList(['cat', "'s", 'home']), WordList(["'s", 'home', 'Python']), WordList(['home', 'Python', 'is']), WordList(['Python', 'is', 'great']), WordList(['is', 'great', 'programming']), WordList(['great', 'programming', 'language']), WordList(['programming', 'language', 'Simple']), WordList(['language', 'Simple', 'living']), WordList(['Simple', 'living', 'high']), WordList(['living', 'high', 'thinking'])]


In [29]:
#Break the sentence into unigrams
w = TextBlob("The quick brown fox jump over the lazy dog and the Dog and cat were at the cat's home. Python is great programming language. Simple living high thinking")
print(w.ngrams(1))

[WordList(['The']), WordList(['quick']), WordList(['brown']), WordList(['fox']), WordList(['jump']), WordList(['over']), WordList(['the']), WordList(['lazy']), WordList(['dog']), WordList(['and']), WordList(['the']), WordList(['Dog']), WordList(['and']), WordList(['cat']), WordList(['were']), WordList(['at']), WordList(['the']), WordList(['cat']), WordList(["'s"]), WordList(['home']), WordList(['Python']), WordList(['is']), WordList(['great']), WordList(['programming']), WordList(['language']), WordList(['Simple']), WordList(['living']), WordList(['high']), WordList(['thinking'])]


In [30]:
#Break the sentence into six-grams
w = TextBlob("The quick brown fox jump over the lazy dog and the Dog and cat were at the cat's home. Python is great programming language. Simple living high thinking")
print(w.ngrams(6))

[WordList(['The', 'quick', 'brown', 'fox', 'jump', 'over']), WordList(['quick', 'brown', 'fox', 'jump', 'over', 'the']), WordList(['brown', 'fox', 'jump', 'over', 'the', 'lazy']), WordList(['fox', 'jump', 'over', 'the', 'lazy', 'dog']), WordList(['jump', 'over', 'the', 'lazy', 'dog', 'and']), WordList(['over', 'the', 'lazy', 'dog', 'and', 'the']), WordList(['the', 'lazy', 'dog', 'and', 'the', 'Dog']), WordList(['lazy', 'dog', 'and', 'the', 'Dog', 'and']), WordList(['dog', 'and', 'the', 'Dog', 'and', 'cat']), WordList(['and', 'the', 'Dog', 'and', 'cat', 'were']), WordList(['the', 'Dog', 'and', 'cat', 'were', 'at']), WordList(['Dog', 'and', 'cat', 'were', 'at', 'the']), WordList(['and', 'cat', 'were', 'at', 'the', 'cat']), WordList(['cat', 'were', 'at', 'the', 'cat', "'s"]), WordList(['were', 'at', 'the', 'cat', "'s", 'home']), WordList(['at', 'the', 'cat', "'s", 'home', 'Python']), WordList(['the', 'cat', "'s", 'home', 'Python', 'is']), WordList(['cat', "'s", 'home', 'Python', 'is', 'gr

## POS: parts of speech tagging

In [34]:
w = TextBlob("The quick brown fox jump over the lazy dog and the Dog and cat were at the cat's home. Python is great programming language. Simple living high thinking")
print(w.tags)

[('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'JJ'), ('jump', 'NN'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('and', 'CC'), ('the', 'DT'), ('Dog', 'NNP'), ('and', 'CC'), ('cat', 'NN'), ('were', 'VBD'), ('at', 'IN'), ('the', 'DT'), ('cat', 'NN'), ("'s", 'POS'), ('home', 'NN'), ('Python', 'NNP'), ('is', 'VBZ'), ('great', 'JJ'), ('programming', 'JJ'), ('language', 'NN'), ('Simple', 'NNP'), ('living', 'VBG'), ('high', 'JJ'), ('thinking', 'NN')]


In [38]:
nltk_tagger = NLTKTagger()
tg = TextBlob("GameGbay's CEO Kelvin introduces a new gaming empire project in Ghana and the world", pos_tagger=nltk_tagger)

print(tg.pos_tags)

[('GameGbay', 'NNP'), ("'s", 'POS'), ('CEO', 'NNP'), ('Kelvin', 'NNP'), ('introduces', 'VBZ'), ('a', 'DT'), ('new', 'JJ'), ('gaming', 'NN'), ('empire', 'NN'), ('project', 'NN'), ('in', 'IN'), ('Ghana', 'NNP'), ('and', 'CC'), ('the', 'DT'), ('world', 'NN')]


In [39]:
#Parse method analyzes the text made of a sentence of tokens to determine its grammatical structure
print(tg.parse())

GameGbay/NN/B-NP/O '/POS/O/O s/PRP/B-NP/O CEO/NNP/I-NP/O Kelvin/NNP/I-NP/O introduces/VBZ/B-VP/O a/DT/B-NP/O new/JJ/I-NP/O gaming/NN/I-NP/O empire/NN/I-NP/O project/NN/I-NP/O in/IN/B-PP/B-PNP Ghana/NNP/B-NP/I-PNP and/CC/I-NP/I-PNP the/DT/I-NP/I-PNP world/NN/I-NP/I-PNP
