In [1]:
import numpy as np # we will use this later, so import it now
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.plotting import figure, output_file, show

In [2]:
output_notebook()

In [3]:
def get_and_clean_data(path):
    data = pd.read_csv(path)
    data = data.drop(['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1'], axis=1)
    data['date'] = pd.to_datetime(data['created_at'].astype(str), errors='coerce')
    return data

In [4]:
def get_mean_scores_by_date(df):
    df['date'] = df['date'].dt.date
    df2 = df.groupby('date')['scores'].mean().to_frame().reset_index()
    return df2

In [5]:
def get_plot_data(path):
    df = get_and_clean_data(path)
    df = get_mean_scores_by_date(df)
    
    return df[['date', 'scores']]

In [20]:
def plot_chart(rep_df, dem_df, rep_cand, dem_cand):
    p = figure(x_axis_type="datetime", title="Candidate Sentiment - 30 Days", plot_height=350, plot_width=900)
    p.xgrid.grid_line_color=None
    p.ygrid.grid_line_alpha=0.5
    p.xaxis.axis_label = 'Time'
    p.yaxis.axis_label = 'Value'

    p.line(rep_df.date, rep_df.scores, line_color="red", 
           line_width=4, line_alpha=0.6, 
           legend_label= rep_cand + ' (' + str(round(rep_df["scores"].mean(), 5)) + ')')
    p.circle(rep_df.date, rep_df.scores, fill_color="red", size=5, color="red")

    p.line(dem_df.date, dem_df.scores, line_color="blue", 
           line_width=4, line_alpha=0.6, 
           legend_label= dem_cand + ' (' + str(round(df_biden["scores"].mean(), 5))  + ')')
    p.circle(dem_df.date, dem_df.scores, fill_color="blue", size=5, color="blue")

    p.legend.location = "bottom_right"

    show(p)

In [21]:
df_trump = get_plot_data('data2/sentiment/candidate/2020/trump/2020_trump.csv')
df_biden = get_plot_data('data2/sentiment/candidate/2020/biden/2020_biden.csv')

plot_chart(df_trump, df_biden, 'Trump', 'Biden')

In [22]:
df_trump_16 = get_plot_data('data2/sentiment/candidate/2016/trump/2016_trump.csv')
df_hillary = get_plot_data('data2/sentiment/candidate/2016/hillary/2016_hillary.csv')

plot_chart(df_trump_16, df_hillary, 'Trump', 'Hillary')

In [23]:
df_trump_16 = get_plot_data('data2/sentiment/candidate/2012/obama/2012_obama.csv')
df_hillary = get_plot_data('data2/sentiment/candidate/2012/romney/2012_romney.csv')

plot_chart(df_trump_16, df_hillary, 'Romney', 'Obama')