In [2]:
# conda install -n dphil nltk textblob

import nltk

nltk.download("punkt")  # Punkt sentence tokenizer
nltk.download("brown")  # Brown reference corpus
nltk.download("wordnet")  # Lemmatization using wordnet
nltk.download("omw-1.4")  # Open Multilingual Wordnet
nltk.download("averaged_perceptron_tagger")  # POS tagger
# https://explosion.ai/blog/part-of-speech-pos-tagger-in-python

from textblob import TextBlob

text = """
Shall I compare thee to a summer’s day?
Thou art more lovely and more temperate;
Rough winds do shake the darling buds of May,
And summer’s lease hath all too short a date;

Sometime too hot the eye of heaven shines,
And often is his gold complexion dimm’d;
And every fair from fair sometime declines,
By chance or nature’s changing course untrimm’d;

But thy eternal summer shall not fade,
Nor lose possession of that fair thou ow’st;
Nor shall Death brag thou wander’st in his shade,
When in eternal lines to time thou grow’st:

So long as men can breathe or eyes can see,
So long lives this, and this gives life to thee.
"""

blob = TextBlob(text)

len(blob)

blob[0]

blob[1:16]

# sentence segementation might not be suitable for lyrics
len(blob.sentences)

blob.sentences[0]

# sentences seem to only stop at "?" and "."
[string[-1] for string in blob.sentences]


newblob = blob[1:16] + " " + blob[168:171] + "a? Yes! We use complicated methods."

len(newblob.sentences)

newblob.sentences[0]
newblob.sentences[1]
newblob.sentences[2]



wordlist = newblob.tokenize()
wordlist

lemmas = wordlist.lemmatize()
lemmas

lemmas.lower()

wordlist.stem()

newblob.correct()  # this should be called .wrongify()

TextBlob("Leipzig is a beatifull townn").correct()  

newblob.pos_tags

print("\n".join([word + "\t" + tag for word, tag in newblob.pos_tags]))

[word for word, tag in newblob.tags if tag[0] == "V"]  


print("\n".join([word + "\t" + tag for word, tag in blob.tags if tag[0] == "V"]))


newblob.ngrams(3)

list(enumerate(["a", "b", "c"]))

[ngram for index, ngram in enumerate(newblob.ngrams(3)) if index % 3 == 0]

TextBlob("Leipzig is a town.").sentiment

TextBlob("Leipzig is a beautiful town.").sentiment

TextBlob("Leipzig seems to be a beautiful town.").sentiment

TextBlob("Leipzig is not a beautiful town.").sentiment

TextBlob("Leipzig is an ugly town.").sentiment

TextBlob("Leipzig is not an ugly town.").sentiment  # fail!

[sentence.sentiment.polarity for sentence in newblob.sentences]

[sentence for sentence in blob.sentences if sentence.polarity > 0.3]

choppedblob = TextBlob(text.replace(";", ".").replace(",", "."))

# the magic number for sonnets!""
len(choppedblob.sentences)

[sentence for sentence in choppedblob.sentences if sentence.polarity > 0.6]

[sentence for sentence in choppedblob.sentences if sentence.polarity <= -0.1]

import codecs
import xml.etree.ElementTree as ElementTree

tei_header, tei_text = ElementTree.fromstring(
    codecs.open("macbeth.xml", "r", "utf-8").read()
)
textnodes_act1 = [textnodes for textnodes in tei_text[1][0].itertext()]
text = " ".join(textnodes_act1).replace("\n", " ")
len(text)

macblob = TextBlob(text)

macblob.sentences[0]

macblob.sentences[0].tokens

" ".join(macblob.sentences[0].tokens)


blobs = [TextBlob(" ".join(sentence.tokens)) for sentence in macblob.sentences]

polarities = [blob.polarity for blob in blobs]

import matplotlib.pyplot as plt

plt.plot(polarities)
plt.show()

subjectivities = [blob.subjectivity for blob in blobs]

plt.plot(polarities)
plt.plot(subjectivities)
plt.show()

import numpy as np

plt.plot(np.array(polarities) * np.array(subjectivities))
plt.show()


textnodes = [textnodes for textnodes in tei_text[1].itertext()]
text = " ".join(textnodes).replace("\n", " ")
macblob = TextBlob(text)
blobs = [TextBlob(" ".join(sentence.tokens)) for sentence in macblob.sentences]
polarities = [blob.polarity for blob in blobs]
subjectivities = [blob.subjectivity for blob in blobs]
series = np.array(polarities) * np.array(subjectivities)
plt.plot(series)
plt.show()

# let's get "scientific"!
from scipy.signal import savgol_filter

smoothed = savgol_filter(series, 5, 3)
plt.plot(series)
plt.plot(smoothed)
plt.show()

# signal or noise?
smoothed1 = savgol_filter(series, 25, 3)
plt.plot(smoothed1)
smoothed2 = savgol_filter(series, 1000, 5)
plt.plot(smoothed2)
plt.show()

smoothed_polarities = savgol_filter(polarities, 25, 3)
plt.plot(smoothed_polarities)
smoothed_series = savgol_filter(series, 25, 3)
plt.plot(smoothed_series)
plt.show()


yay = [sentence for sentence in blobs if sentence.polarity == 1]
nay = [sentence for sentence in blobs if sentence.polarity == -1]

yay
nay

# most words do not get assigned a polarity on their own
[(token, TextBlob(token).polarity) for token in yay[0].tokens]
[(token, TextBlob(token).polarity) for token in nay[0].tokens]

# let's grab all the words from both lists
yaywords = list(set((" ".join([x.string for x in yay])).split()))
naywords = list(set((" ".join([x.string for x in nay])).split()))

# only few of the words in the best or worst sentences get a polarity score
# when isolated
[token for token in yaywords if TextBlob(token).polarity > 0]
[token for token in naywords if TextBlob(token).polarity < 0]


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\husem\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\husem\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\husem\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\husem\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\husem\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Shall	NN
I	PRP
compare	VBP
data	NNS
Yes	UH
We	PRP
use	VBP
complicated	VBN
methods	NNS
compare	VBP
do	VBP
shake	VB
darling	VBG
hot	VBZ
is	VBZ
changing	VBG
fade	VB
lose	VBP
breathe	VB
see	VB
gives	VBZ
thee	VB


FileNotFoundError: [Errno 2] No such file or directory: 'macbeth.xml'