In [1]:
import pandas as pd
import numpy as np

# Plotly
import plotly.graph_objects as go

import pickle

from collections import Counter
from sklearn.feature_extraction import text 
from sklearn.feature_extraction.text import CountVectorizer
from textblob import TextBlob

from matplotlib import pyplot as plt
from adjustText import adjust_text
import math
from tqdm import tqdm
import seaborn as sns

## Helper Functions

In [2]:
def split_text(text, n=10):
    """
        Splits text into n partitions
    """
    
    # Get partition ranges
    length = len(text)
    partition_size = math.floor(length/n)
    partition = np.arange(0, length, partition_size)
    
    # split text
    text_partition = []
    for split in range(n):
        text_partition.append(text[partition[split]:partition[split]+partition_size])
    return text_partition

In [3]:
def build_presidents_polarity_transcripts(corpus, n=10):
    """
        Returns the polarity of the corpus for each president's text
    """
    
    polarity_transcripts = {}
    for president in corpus.index:
        transcript = corpus.loc[president].transcripts
        partitioned_text = split_text(transcript, n)
        polarity_text = list(map(pol, partitioned_text))
        polarity_transcripts[president] = polarity_text
    return polarity_transcripts

In [4]:
def build_presidents_subjectivity_transcripts(corpus, n=10):
    """
        Returns the subjectivity of the corpus for each president's text
    """
    
    polarity_transcripts = {}
    for president in corpus.index:
        transcript = corpus.loc[president].transcripts
        partitioned_text = split_text(transcript, n)
        polarity_text = list(map(sub, partitioned_text))
        polarity_transcripts[president] = polarity_text
    return polarity_transcripts

In [5]:
def build_party_polarity_transcripts(corpus, n=10):
    """
        Returns the polarity of the corpus for each party's text
    """
    
    polarity_transcripts = {}
    for party in corpus.index:
        transcript = corpus.loc[party].transcripts
        partitioned_text = split_text(transcript, n)
        polarity_text = list(map(pol, partitioned_text))
        polarity_transcripts[party] = polarity_text
    return polarity_transcripts

In [6]:
def build_party_subjectivity_transcripts(corpus, n=10):
    """
        Returns the subjectivity of the corpus for each party's text
    """
    
    polarity_transcripts = {}
    for party in corpus.index:
        transcript = corpus.loc[party].transcripts
        partitioned_text = split_text(transcript, n)
        polarity_text = list(map(sub, partitioned_text))
        polarity_transcripts[party] = polarity_text
    return polarity_transcripts

In [7]:
def build_party_corpus(corpus):
    """
        Returns the corpus of the political parties and their respective transcripts
    """
    
    # Build corpus
    party_texts = []
    parties = list(set(corpus.Party.values))
    for party in parties:
        text = ""
        for row in corpus[corpus.Party == party].itertuples(index=False):
            text += row[1]
        party_texts.append(text)
    party_corpus =  pd.DataFrame({'Party': parties, 'transcripts': party_texts}).set_index('Party')
    
    # Include only non-null parties
    party_order = []
    parties = ['Democratic', 'Republican', 'Democratic-Republican', 'Whig', 'Federalist', 'Unaffiliated']
    for party in parties:
        if party in set(corpus.Party.values):
            party_order.append(party)
    party_corpus = party_corpus.reindex(index = party_order)
    return party_corpus

### Plotly Functions

In [8]:
def get_polarity_plot(president, polarity_transcripts, show=False):
    fig = go.Figure(data=go.Scatter(x=[str(x * .1) for x in list(range(0,10))], y=polarity_transcripts[president], mode='lines+markers'))

    print('Polarity of ' + president + ' over time')
    fig.update_layout(title='Polarity of ' + president + ' over time',
    xaxis_title='Overall transcript length',
    yaxis_title='<-- Negative ------------------------ Positive -->')
    
    fig.update_layout(xaxis_tickformat = '%')
    
    filename = "plotly_html/president_htmls/polarity/" + president + '_polarity_plot.html'
    fig.write_html(filename, auto_open=False)
    if show:
        fig.show()

In [9]:
def get_subjectivity_plot(president, subjectivity_transcripts, show=False):
    fig = go.Figure(data=go.Scatter(x=[str(x * .1) for x in list(range(0,10))], y=subjectivity_transcripts[president], mode='lines+markers'))

    print('Subjectivity of ' + president + ' over time')
    fig.update_layout(title='Subjectivity of ' + president + ' over time',
    xaxis_title='Overall transcript length',
    yaxis_title='<-- Facts ------------------------ Opinions -->')
    
    fig.update_layout(xaxis_tickformat = '%')
    
    filename = "plotly_html/president_htmls/subjectivity/" + president + '_subjectivity_plot.html'
    fig.write_html(filename, auto_open=False)
    if show:     
        fig.show()

In [10]:
def get_party_polarity(corpus, title, corpus_num, show=False):
    
    party_corpus = build_party_polarity_transcripts(build_party_corpus(corpus), n=10)
    
    fig = go.Figure()
    for party in sorted(party_corpus.keys()):
        fig.add_trace(go.Scatter(x=[str(x * .1) for x in list(range(0,10))], y=party_corpus[party],
                    mode='lines+markers', name=party))
        
    print('Polarity of ' + title + ' over time')
    fig.update_layout(title='Polarity of ' + title + ' over time',
    xaxis_title='Overall transcript length',
    yaxis_title='<-- Negative ------------------------ Positive -->')
    
    fig.update_layout(xaxis_tickformat = '%')

    filename = "plotly_html/party_htmls/political_era_" + str(corpus_num) + '_polarity_plot.html'
    fig.write_html(filename, auto_open=False)
    if show:
        fig.show()
    

In [11]:
def get_party_subjectivity(corpus, title, corpus_num, show=False):
    
    party_corpus = build_party_subjectivity_transcripts(build_party_corpus(corpus), n=10)
    
    fig = go.Figure()
    for party in sorted(party_corpus.keys()):
        fig.add_trace(go.Scatter(x=[str(x * .1) for x in list(range(0,10))], y=party_corpus[party],
                    mode='lines+markers', name=party))
        
    print('Subjectivity of ' + title + ' over time')
    fig.update_layout(title='Subjectivity of ' + title + ' over time',
    xaxis_title='Overall transcript length',
    yaxis_title='<-- Facts ------------------------ Opinions -->')
    
    fig.update_layout(xaxis_tickformat = '%')

    filename = "plotly_html/party_htmls/political_era_" + str(corpus_num) + '_subjectivity_plot.html'
    fig.write_html(filename, auto_open=False)
    if show:
        fig.show()
        

In [12]:
def get_president_polarity_pol_era(corpus, title, corpus_num, show=False):
    polarity = build_presidents_polarity_transcripts(corpus)
    
    fig = go.Figure()
    
    for president in polarity.keys():
        fig.add_trace(go.Scatter(x=[str(x * .1) for x in list(range(0,10))], y=polarity[president],
                    mode='lines+markers', name=president))

    print('Polarity of Presidents during the ' + title + ' over time')
    fig.update_layout(title='Polarity of Presidents during the ' + title + ' over time',
    xaxis_title='Overall transcript length',
    yaxis_title='<-- Negative ------------------------ Positive -->')
    
    fig.update_layout(xaxis_tickformat = '%')
    
    filename = "plotly_html/pol_era_htmls/political_era_" + str(corpus_num) + '_presidents_polarity_plot.html'
    fig.write_html(filename, auto_open=False)
    if show:
        fig.show()
    

In [13]:
def get_president_subjectivity_pol_era(corpus, title, corpus_num, show=False):
    subjectivity = build_presidents_subjectivity_transcripts(corpus)
    
    fig = go.Figure()
    
    for president in subjectivity.keys():
        fig.add_trace(go.Scatter(x=[str(x * .1) for x in list(range(0,10))], y=subjectivity[president],
                    mode='lines+markers', name=president))

    print('Subjectivity of Presidents during the ' + title + ' over time')
    fig.update_layout(title='Subjectivity of Presidents during the ' + title + ' over time',
    xaxis_title='Overall transcript length',
    yaxis_title='<-- Facts ------------------------ Opinions -->')
    
    fig.update_layout(xaxis_tickformat = '%')
    
    filename = "plotly_html/pol_era_htmls/political_era_" + str(corpus_num) + '_presidents_subjectivity_plot.html'
    fig.write_html(filename, auto_open=False)
    if show:
        fig.show()

## Load Data

In [14]:
# conda install -c conda-forge adjusttext 

# Load corpuses
corpus = pickle.load(open("pickles/corpus.p", "rb" ))
first_party_corpus = pickle.load(open("pickles/first_party_corpus.p", "rb" ))
second_party_corpus = pickle.load(open("pickles/second_party_corpus.p", "rb" ))
third_party_corpus = pickle.load(open("pickles/third_party_corpus.p", "rb" ))
fourth_party_corpus = pickle.load(open("pickles/fourth_party_corpus.p", "rb" ))
fifth_party_corpus = pickle.load(open("pickles/fifth_party_corpus.p", "rb" ))
sixth_party_corpus = pickle.load(open("pickles/sixth_party_corpus.p", "rb" ))

## Polarity of Subjectivity of Presidents

In [15]:
# Find polarity and subjectivity of texts for each president
pol = lambda x: TextBlob(x).sentiment.polarity
sub = lambda x: TextBlob(x).sentiment.subjectivity

corpus['pol'] = corpus.transcripts.apply(pol)
corpus['subj'] = corpus.transcripts.apply(sub)

corpus
corpus_scatter = corpus.copy()

## Polarity and Subjectivity over time

In [16]:
# Number of partitions
N = 10
polarity_transcripts = build_presidents_polarity_transcripts(corpus, N)
subjectivity_transcripts = build_presidents_subjectivity_transcripts(corpus, N)

### Polarity over time

In [17]:
for president in polarity_transcripts.keys():
    get_polarity_plot(president, polarity_transcripts)

Polarity of George Washington over time
Polarity of John Adams over time
Polarity of Thomas Jefferson over time
Polarity of James Madison over time
Polarity of James Monroe over time
Polarity of John Quincy Adams over time
Polarity of Andrew Jackson over time
Polarity of Martin Van Buren over time
Polarity of William Harrison over time
Polarity of John Tyler over time
Polarity of James K. Polk over time
Polarity of Zachary Taylor over time
Polarity of Millard Fillmore over time
Polarity of Franklin Pierce over time
Polarity of James Buchanan over time
Polarity of Abraham Lincoln over time
Polarity of Andrew Johnson over time
Polarity of Ulysses S. Grant over time
Polarity of Rutherford B. Hayes over time
Polarity of James A. Garfield over time
Polarity of Chester A. Arthur over time
Polarity of Grover Cleveland over time
Polarity of Benjamin Harrison over time
Polarity of William McKinley over time
Polarity of Franklin D. Roosevelt over time
Polarity of William Taft over time
Polarity 

### Subjectivity over time

In [18]:
for president in subjectivity_transcripts.keys():
    get_subjectivity_plot(president, subjectivity_transcripts)

Subjectivity of George Washington over time
Subjectivity of John Adams over time
Subjectivity of Thomas Jefferson over time
Subjectivity of James Madison over time
Subjectivity of James Monroe over time
Subjectivity of John Quincy Adams over time
Subjectivity of Andrew Jackson over time
Subjectivity of Martin Van Buren over time
Subjectivity of William Harrison over time
Subjectivity of John Tyler over time
Subjectivity of James K. Polk over time
Subjectivity of Zachary Taylor over time
Subjectivity of Millard Fillmore over time
Subjectivity of Franklin Pierce over time
Subjectivity of James Buchanan over time
Subjectivity of Abraham Lincoln over time
Subjectivity of Andrew Johnson over time
Subjectivity of Ulysses S. Grant over time
Subjectivity of Rutherford B. Hayes over time
Subjectivity of James A. Garfield over time
Subjectivity of Chester A. Arthur over time
Subjectivity of Grover Cleveland over time
Subjectivity of Benjamin Harrison over time
Subjectivity of William McKinley ov

### Subjectivity over time

In [19]:
# Find polarity and subjectivity of texts for each political era

# First party system
first_party_corpus['pol'] = first_party_corpus.transcripts.apply(pol)
first_party_corpus['subj'] = first_party_corpus.transcripts.apply(sub)

# Second party system
second_party_corpus['pol'] = second_party_corpus.transcripts.apply(pol)
second_party_corpus['subj'] = second_party_corpus.transcripts.apply(sub)

# Third party system
third_party_corpus['pol'] = third_party_corpus.transcripts.apply(pol)
third_party_corpus['subj'] = third_party_corpus.transcripts.apply(sub)

# Fourth party system
fourth_party_corpus['pol'] = fourth_party_corpus.transcripts.apply(pol)
fourth_party_corpus['subj'] = fourth_party_corpus.transcripts.apply(sub)

# Fifth party system
fifth_party_corpus['pol'] = fifth_party_corpus.transcripts.apply(pol)
fifth_party_corpus['subj'] = fifth_party_corpus.transcripts.apply(sub)

# Sixth party system
sixth_party_corpus['pol'] = sixth_party_corpus.transcripts.apply(pol)
sixth_party_corpus['subj'] = sixth_party_corpus.transcripts.apply(sub)

party_system_corpuses = [
                            first_party_corpus, 
                            second_party_corpus,
                            third_party_corpus,
                            fourth_party_corpus,
                            fifth_party_corpus,
                            sixth_party_corpus
                        ]

party_system_titles = [
                            'First Party System (1792–1824)',
                            'Second Party System (1828–1854)',
                            'Third Party System (1854–1895)',
                            'Fourth Party System (1896–1932)',
                            'Fifth Party System (1932–1964)',
                            'Sixth Party System (1964–present)'
                        ]

political_era_tuples = list(zip(party_system_corpuses, party_system_titles))

In [20]:
# Build interactive plots
for i, pol_era in enumerate(political_era_tuples):
    pol_era_corpus = pol_era[0]
    pol_era_title = pol_era[1]
    get_party_polarity(pol_era_corpus, pol_era_title, i+1)
    get_party_subjectivity(pol_era_corpus, pol_era_title, i+1)
    get_president_polarity_pol_era(pol_era_corpus, pol_era_title, i+1)
    get_president_subjectivity_pol_era(pol_era_corpus, pol_era_title, i+1)

Polarity of First Party System (1792–1824) over time
Subjectivity of First Party System (1792–1824) over time
Polarity of Presidents during the First Party System (1792–1824) over time
Subjectivity of Presidents during the First Party System (1792–1824) over time
Polarity of Second Party System (1828–1854) over time
Subjectivity of Second Party System (1828–1854) over time
Polarity of Presidents during the Second Party System (1828–1854) over time
Subjectivity of Presidents during the Second Party System (1828–1854) over time
Polarity of Third Party System (1854–1895) over time
Subjectivity of Third Party System (1854–1895) over time
Polarity of Presidents during the Third Party System (1854–1895) over time
Subjectivity of Presidents during the Third Party System (1854–1895) over time
Polarity of Fourth Party System (1896–1932) over time
Subjectivity of Fourth Party System (1896–1932) over time
Polarity of Presidents during the Fourth Party System (1896–1932) over time
Subjectivity of 

In [23]:
# Update textposition for presidents whose labels overlap with others
president_textposition = {
    'John Adams': 'top right',
    'Ulysses S. Grant': 'middle right',
    'Lyndon B. Johnson': 'bottom center',
    'Richard M. Nixon': 'bottom center',
    'Jimmy Carter': 'bottom center',
    'Barack Obama': 'bottom center',
    'George W. Bush': 'middle left',
    'John Tyler': 'middle right',
    'William Taft': 'bottom center',
    'Andrew Jackson': 'bottom center'
}

In [24]:
# Map colors to political party
parties_color = {
            'Democratic': 'blue',
            'Democratic-Republican': 'green',
            'Federalist': 'orange',
            'Republican': 'red',
            'Unaffiliated': 'black',
            'Whig': 'purple'
          }

def get_party_color_mapping(corpus):
    return list(map(lambda x: parties_color[x], list(corpus.Party.values)))

In [25]:
# Build interactive scatterplot
fig = go.Figure()

for president in president_textposition.keys():
    president_corpus = corpus_scatter[corpus_scatter.index == president]
    fig.add_trace(go.Scatter(
        x=president_corpus['pol'],
        y=president_corpus['subj'],
        legendgroup=president_corpus.Party[0],
        name=president_corpus.Party[0],
        mode="markers+text",
        text=president_corpus.index,
        textposition= president_textposition[president],
        marker=dict(color= get_party_color_mapping(president_corpus)),
        showlegend = False
    ))
    
    president_corpus = corpus_scatter[corpus_scatter.index == president]
    if president_corpus.Party[0] == 'Federalist':
        fig.add_trace(go.Scatter(
            x=president_corpus['pol'],
            y=president_corpus['subj'],
            legendgroup=president_corpus.Party[0],
            name=president_corpus.Party[0],
            mode="markers",
            text=president_corpus.index,
            textposition= president_textposition[president],
            marker=dict(color= get_party_color_mapping(president_corpus))
        ))
        
corpus_scatter = corpus_scatter.drop(president_textposition.keys())
for party in set(corpus_scatter.Party):
    party_corpus = corpus_scatter[corpus_scatter.Party == party]
    fig.add_trace(go.Scatter(
        x=party_corpus['pol'],
        y=party_corpus['subj'],
        legendgroup=party,
        name=party,
        mode="markers+text",
        text=party_corpus.index,
        textposition= "top center",
        marker=dict(color= get_party_color_mapping(party_corpus)),
        showlegend = False
    ))
    
    party_corpus = corpus_scatter[corpus_scatter.Party == party]
    fig.add_trace(go.Scatter(
        x=party_corpus['pol'],
        y=party_corpus['subj'],
        legendgroup=party,
        name=party,
        mode="markers",
        text=party_corpus.index,
        textposition= "top center",
        marker=dict(color= get_party_color_mapping(party_corpus))
    ))
    
fig.update_layout(
    title="Polarity vs Subjectivity",
    xaxis_title="<-- Negative ------------------------ Positive -->",
    yaxis_title="<-- Facts ------------------------ Opinions -->",

)

fig.write_html('plotly_html/sentiment_analysis_scatterplot.html', auto_open=False)