In [2]:
import numpy as np # we will use this later, so import it now
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.plotting import figure, output_file, show

In [3]:
output_notebook()

In [4]:
def get_and_clean_data(path):
    data = pd.read_csv(path)
    data = data.drop(['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1'], axis=1)
    data['date'] = pd.to_datetime(data['created_at'].astype(str), errors='coerce')
    return data

In [5]:
def get_mean_scores_by_date(df):
    df['date'] = df['date'].dt.date
    df2 = df.groupby('date')['scores'].mean().to_frame().reset_index()
    return df2

In [6]:
def get_plot_data(path):
    df = get_and_clean_data(path)
    df = get_mean_scores_by_date(df)
    
    return df[['date', 'scores']]

In [20]:
def plot_chart(rep_df, dem_df, rep_cand, dem_cand, title):
    p = figure(x_axis_type="datetime", title=title, plot_height=350, plot_width=900)
    p.xgrid.grid_line_color=None
    p.ygrid.grid_line_alpha=0.5
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Sentiment Score'

    p.line(rep_df.date, rep_df.scores, line_color="red", 
           line_width=4, line_alpha=0.6, 
           legend_label= rep_cand + ' (' + str(round(rep_df["scores"].mean(), 5)) + ')')
    p.circle(rep_df.date, rep_df.scores, fill_color="red", size=5, color="red")

    p.line(dem_df.date, dem_df.scores, line_color="blue", 
           line_width=4, line_alpha=0.6, 
           legend_label= dem_cand + ' (' + str(round(df_biden["scores"].mean(), 5))  + ')')
    p.circle(dem_df.date, dem_df.scores, fill_color="blue", size=5, color="blue")

    p.legend.location = "bottom_right"

    show(p)

In [21]:
df_trump = get_plot_data('data2/sentiment/candidate/2020/trump/2020_trump.csv')
df_biden = get_plot_data('data2/sentiment/candidate/2020/biden/2020_biden.csv')

plot_chart(df_trump, df_biden, 'Trump', 'Biden', "Candidate Sentiment - 2020 - Last 30 Days")

In [22]:
df_trump_16 = get_plot_data('data2/sentiment/candidate/2016/trump/2016_trump.csv')
df_hillary = get_plot_data('data2/sentiment/candidate/2016/hillary/2016_hillary.csv')

plot_chart(df_trump_16, df_hillary, 'Trump', 'Hillary', "Candidate Sentiment - 2016 - 30 Days Prior to Election Day")

In [23]:
df_obama = get_plot_data('data2/sentiment/candidate/2012/obama/2012_obama.csv')
df_romney = get_plot_data('data2/sentiment/candidate/2012/romney/2012_romney.csv')

plot_chart(df_obama, df_romney, 'Romney', 'Obama', "Candidate Sentiment - 2012 - 30 Days Prior to Election")

In [24]:
df_trump_econ = get_plot_data('data2/sentiment/econ/trump/2020_trump economy.csv')
df_biden_econ = get_plot_data('data2/sentiment/econ/biden/2020_biden economy.csv')

plot_chart(df_trump_econ, df_biden_econ, 'Trump', 'Biden', "Candidate|Economy - Last 30 Days")

In [28]:
df_trump_party = get_plot_data('data2/sentiment/party/trump/2020_trump republican.csv')
df_biden_party = get_plot_data('data2/sentiment/party/biden/2020_biden democrat.csv')

plot_chart(df_trump_party, df_biden_party, 'Trump', 'Biden', "Candidate|Party - Last 30 Days")

In [29]:
df_trump_env = get_plot_data('data2/sentiment/environment/trump/2020_trump environment.csv')
df_biden_env = get_plot_data('data2/sentiment/environment/biden/2020_biden environment.csv')

plot_chart(df_trump_env, df_biden_env, 'Trump', 'Biden', "Candidate|Environment - Last 30 Days")

In [32]:
df_trump_health = get_plot_data('data2/sentiment/health/trump/2020_trump health.csv')
df_biden_health = get_plot_data('data2/sentiment/health/biden/2020_biden health.csv')

plot_chart(df_trump_health, df_biden_health, 'Trump', 'Biden', "Candidate|Health - Last 30 Days")

In [33]:
df_trump_imm = get_plot_data('data2/sentiment/imm/trump/2020_trump immigration.csv')
df_biden_imm = get_plot_data('data2/sentiment/imm/biden/2020_biden immigration.csv')

plot_chart(df_trump_imm, df_biden_imm, 'Trump', 'Biden', "Candidate|Immigration - Last 30 Days")

In [None]:
df_trump_job = get_plot_data('data2/sentiment/job/trump/2020_trump job.csv')
df_biden_job = get_plot_data('data2/sentiment/job/biden/2020_biden job.csv')

plot_chart(df_trump_job, df_biden_job, 'Trump', 'Biden', "Candidate|Job - Last 30 Days")