In [None]:
# Import dependencies and required magics
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import pandas as pd

In [None]:
# Load text from web-page, save to local file

URL = 'http://www.abrahamlincolnonline.org/lincoln/speeches/lyceum.htm'
page = requests.get(URL)

#next step: parsing
soup = BeautifulSoup(page.content, 'html.parser')
soup

In [None]:
# Load from saved file, review it, 
# drop lines as needed, perform necessary processing.

#extract full speech, from beginning to end
print(str(soup).find('As a subject fo'))
print(str(soup).find('not prevail against')+len('not prevail against'))

speech = str(soup)[2285:23012]

#show last 100 words
speech[-100:]

In [None]:
#clean speech from characters used in html
import re

def cleanhtml(raw_html):
  cleanr = re.compile('<.*?>')
  cleantext = re.sub(cleanr, '', raw_html)
  return cleantext

cleaned_speech = cleanhtml(speech).replace("\n" , " ")

#personalized cleaning: only works for this dataset
cleaned_speech = cleanhtml(cleaned_speech).replace("St. Louis" , "St Louis")
cleaned_speech

In [None]:
#split sentences by . and put them in a list
sentences = cleaned_speech.split('.')

#edit individual sentences
sentences = list(filter(None, [str.strip(x) for x in sentences]))
sentences

In [None]:
# Perform sentiment analysis
df = list()
for sentence in sentences:
  testimonial = TextBlob(sentence)
  #testimonial.sentiment (polarity, subjectvity)
  testimonial.sentiment.polarity
  df.append([sentence, testimonial.sentiment.polarity, testimonial.subjectivity])
df

In [None]:
# Save sentiment data to dataframe
df = pd.DataFrame(df)
df.columns=['sentence', 'polarity', 'subjectivity']

In [None]:
# Output key sentiment analysis results including:
#   Overall sentiment analysis scores for the document
#   Correlation of polarity and subjectivity scores across sentences

display(df)
df.corr()

In [None]:
# Print out 20 sentences and their scores including:
#    5 most negative sentences including polarity and subjectivity
#    5 most positive sentences including polarity and subjectivity
#    5 most subjective sentences including polarity and subjectivity
#    5 most objective sentences including polarity and subjectivity

print('\n', 'top 5 sentences for polarity')
display(df.sort_values('polarity', ascending=False)[0:5])

print('\n', 'lowest 5 sentences for polarity')
display(df.sort_values('polarity', ascending=True)[0:5])

print('\n', 'top 5 sentences for subjectivity')
display(df.sort_values('subjectivity', ascending=False)[0:5])

print('\n', 'lowest 5 sentences for subjectivity')
display(df.sort_values('subjectivity', ascending=True)[0:5])

In [None]:
# Load text from web-page, save to local file
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import pandas as pd
import re
import plotly.express as px

fig = px.scatter(df, title='Lincoln', x='polarity', y='subjectivity', hover_data=['sentence'])
fig.show()