In [1]:
from textblob import TextBlob, Word
from nltk.corpus import treebank
import nltk

In [2]:
# Start with any text:
my_text = 'Hello, my name is Friedrich Miescher. Sometimes I am afraid at night when it is dark.'

# blob (or any other name) is our TextBlob object:
blob = TextBlob(my_text)
print(blob)
# Show POS (part of speech) tag for linguistic analysis:
print(blob.tags)
print(blob.noun_phrases)

# Show the sentiment of the two sentences (positive, negative):
for sentence in blob.sentences:
    print(sentence, sentence.sentiment.polarity)

Hello, my name is Friedrich Miescher. Sometimes I am afraid at night when it is dark.
[('Hello', 'NNP'), ('my', 'PRP$'), ('name', 'NN'), ('is', 'VBZ'), ('Friedrich', 'NNP'), ('Miescher', 'NNP'), ('Sometimes', 'RB'), ('I', 'PRP'), ('am', 'VBP'), ('afraid', 'JJ'), ('at', 'IN'), ('night', 'NN'), ('when', 'WRB'), ('it', 'PRP'), ('is', 'VBZ'), ('dark', 'JJ')]
['hello', 'friedrich miescher']
Hello, my name is Friedrich Miescher. 0.0
Sometimes I am afraid at night when it is dark. -0.375


In [3]:
nltk.set_proxy('SET PROXY SERVER AND PORT HERE')   # We need this to make it work from the FMI network.

# Translate to other languages:
print(blob.translate(to='es'))
print(blob.translate(to='de'))
print(blob.translate(to='it'))

Hola, mi nombre es Friedrich Miescher. A veces tengo miedo por la noche cuando está oscuro.
Hallo, ich heiße Friedrich Miescher. Manchmal habe ich nachts Angst, wenn es dunkel ist.
Ciao, mi chiamo Friedrich Miescher. A volte ho paura di notte quando è buio.


In [4]:
# Another example:
zen = TextBlob("Explicit is better than implicit. "
               "Simple is better than complex. ")

print(zen.words)
print(zen.sentences)

for sentence in zen.sentences:
    print('Polarity: ', sentence.sentiment.polarity, ' Subjectivity: ', sentence.sentiment.subjectivity)

['Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex']
[Sentence("Explicit is better than implicit."), Sentence("Simple is better than complex.")]
Polarity:  0.5  Subjectivity:  0.5
Polarity:  0.06666666666666667  Subjectivity:  0.41904761904761906


In [5]:
# Grammatical manipulation:
blob = TextBlob('goose')
print(blob.words[0].pluralize())

my_word = Word('children')
print(my_word.lemmatize())

my_word = Word('were')
print(my_word.lemmatize('v'))

geese
child
be


In [6]:
# Show definitions of words:
print(Word('scientist').definitions)

# Correct spelling:
b = TextBlob('I havv goood speling!')
print(b.correct())

# Detect the language:
b = TextBlob('Ich heisse Peter.')
print(b.detect_language())

# You can access large corpora of texts, for example:
# The first words of Genesis:
print(nltk.corpus.genesis.words())

# The following are examples of parsing sentences and displaying their structure visually.
# IMPORTANT: Several pop-up windows will open to show the visual output. You'll have to close them one by one to proceed.
t = treebank.parsed_sents('wsj_0001.mrg')[0]
t.draw()

groucho_grammar = nltk.CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
    """)

sentence = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']
parser = nltk.ChartParser(groucho_grammar)
for tree in parser.parse(sentence):
    print(tree)
    tree.draw()

['a person with advanced knowledge of one or more sciences']
I have good spelling!
de
['In', 'the', 'beginning', 'God', 'created', 'the', ...]
(S
  (NP I)
  (VP
    (VP (V shot) (NP (Det an) (N elephant)))
    (PP (P in) (NP (Det my) (N pajamas)))))
(S
  (NP I)
  (VP
    (V shot)
    (NP (Det an) (N elephant) (PP (P in) (NP (Det my) (N pajamas))))))
