### Creating Bar Charts for Bokeh Visualization
Historical Trend Comparison for Significant Variables

In [57]:
# imports
import geopandas as gpd
import pandas as pd
import numpy as np
import json
from bokeh.io import output_notebook
from bokeh.models import (CDSView, ColorBar, ColumnDataSource,
                          CustomJS, CustomJSFilter, 
                          GeoJSONDataSource, HoverTool,
                          LinearColorMapper, Slider, LabelSet)
from bokeh.layouts import column, row, widgetbox
from bokeh.palettes import brewer, Spectral5
from bokeh.plotting import figure, show

In [2]:
# set pandas to display all columns in dataframe
pd.set_option("display.max_columns", None)

In [3]:
# read in data
data = "https://raw.githubusercontent.com/ehuang13/w210-presidential-election/master/data/Data-Jul07/combined_jul05.csv"
df = pd.read_csv(data, encoding = "ISO-8859-1")

In [8]:
# checkout data
df.sample(5)

Unnamed: 0,YEAR_FIPS,YEAR,STATE_FIPS,COUNTY_FIPS,STATE,COUNTY,REP_CANDIDATE,DEM_CANDIDATE,REP_VOTES,DEM_VOTES,COUNTY_TOTALVOTES,WINNING_CANDIDATE,WINNING_PARTY,WINNING_PARTY_BINARY,HOUSE_WINNING_BINARY,SENATE_WINNING_BINARY,UNEMPLOYMENT_RATE,AVG_WAGE_SALARY,AA_FEMALE,AA_MALE,BA_FEMALE,BA_MALE,H_FEMALE,H_MALE,IA_FEMALE,IA_MALE,NA_FEMALE,NA_MALE,TOT_FEMALE,TOT_MALE,TOT_POP,WA_FEMALE,WA_MALE,TOT_POP_LESS19,TOT_MALE_LESS19,TOT_FEMALE_LESS19,TOT_POP_20to39,TOT_MALE_20to39,TOT_FEMALE_20to39,TOT_POP_40to59,TOT_MALE_40to59,TOT_FEMALE_40to59,TOT_POP_Above60,TOT_MALE_Above60,TOT_FEMALE_Above60,AA_FEMALE%,AA_MALE%,BA_FEMALE%,BA_MALE%,H_FEMALE%,H_MALE%,IA_FEMALE%,IA_MALE%,NA_FEMALE%,NA_MALE%,WA_FEMALE%,WA_MALE%,TOT_FEMALE%,TOT_MALE%,TOT_POP_LESS19%,TOT_POP_20to39%,TOT_POP_40to59%,TOT_POP_Above60%,MARGIN_VICTORY
1052,201218067,2012,18,67,Indiana,Howard County,Mitt Romney,Barack Obama,20327,15135,36290.0,Mitt Romney,republican,1,1,1,9.1,0,426,342,2944,2841,1063,1149,149,144,20,9,42948,39807,82755,38454,35575,21515,10997,10518,19029,9177,9852,23429,11427,12002,18782,8206,10576,0.005148,0.004133,0.035575,0.03433,0.012845,0.013884,0.0018,0.00174,0.000242,0.000109,0.464673,0.429883,0.518978,0.481022,0.259984,0.229944,0.283113,0.226959,-0.14307
13958,200829223,2008,29,223,Missouri,Wayne County,John McCain,Barack Obama,3784,2243,6154.0,John McCain,republican,1,1,0,6.8,0,18,12,15,22,57,65,29,31,1,1,6727,6629,13356,6559,6466,3131,1652,1479,2493,1226,1267,3935,1924,2011,3797,1827,1970,0.001348,0.000898,0.001123,0.001647,0.004268,0.004867,0.002171,0.002321,7.5e-05,7.5e-05,0.49109,0.484127,0.503669,0.496331,0.234426,0.186658,0.294624,0.284292,-0.250406
8377,200042091,2000,42,91,Pennsylvania,Montgomery County,George W. Bush,Al Gore,145623,177990,332422.0,Al Gore,democrat,0,1,1,3.1,0,15571,14963,28643,28216,7059,8197,458,454,147,128,387147,362262,749409,338898,315346,196925,100775,96150,200818,99511,101307,209705,102544,107161,141961,59432,82529,0.020778,0.019966,0.038221,0.037651,0.009419,0.010938,0.000611,0.000606,0.000196,0.000171,0.45222,0.420793,0.516603,0.483397,0.262774,0.267968,0.279827,0.189431,0.097367
12189,200451163,2004,51,163,Virginia,Rockbridge County,George W. Bush,John Kerry,5412,3627,9181.0,George W. Bush,republican,1,1,1,3.3,0,58,40,315,331,98,94,33,32,1,0,10629,10560,21189,10125,10080,4923,2562,2361,4924,2516,2408,6392,3143,3249,4950,2339,2611,0.002737,0.001888,0.014866,0.015621,0.004625,0.004436,0.001557,0.00151,4.7e-05,0.0,0.477842,0.475719,0.501628,0.498372,0.232338,0.232385,0.301666,0.233612,-0.194423
9982,200419013,2004,19,13,Iowa,Black Hawk County,George W. Bush,John Kerry,28046,35392,63907.0,John Kerry,democrat,0,1,0,4.5,32142,737,700,5468,4863,1459,1674,168,148,59,55,65482,61009,126491,58183,54397,33654,16858,16796,37324,18547,18777,32873,15986,16887,22640,9618,13022,0.005827,0.005534,0.043228,0.038445,0.011534,0.013234,0.001328,0.00117,0.000466,0.000435,0.459977,0.430046,0.517681,0.482319,0.266058,0.295072,0.259884,0.178985,0.114948


### Create Static Bar Chart
* x-axis: any feature selected, such as unemployment rate
* y-axis: all the election years

**Steps Required:**
* aggregate the selected feature by year
* plot bar chart

**Reference Sources:**
* https://docs.bokeh.org/en/latest/docs/user_guide/categorical.html
* https://towardsdatascience.com/interactive-bar-charts-with-bokeh-7230e5653ba3

In [30]:
# set visualization to display inline within jupyter notebook
output_notebook()

In [55]:
# pick any random selected feature
feature = "UNEMPLOYMENT_RATE"

# aggregate selected feature by year
grouped_feature = df[["YEAR", feature]].groupby(df["YEAR"]).mean()

In [56]:
# checkout grouped feature dataframe
# grouped_feature

In [61]:
# bokeh visualization
years = list(grouped_feature["YEAR"])
plot_feature = list(grouped_feature[feature])

# initialize the plot
p = figure(plot_height=400, 
           title="Average Unemployment Rate by Year")

# plot bar chart
p.vbar(years, top = plot_feature, width=3, fill_color = Spectral5)

# label axis and ticks
p.xaxis.axis_label="Years"
p.yaxis.axis_label="Average Unemployment Rate"
p.xaxis.ticker = years

# display plot
show(p)

### Package Bar Chart Plot into a Function

In [69]:
def bar_plot(feature = "UNEMPLOYMENT_RATE"):
    
    # set default feature is unemployment rate

    # aggregate selected feature by year
    grouped_feature = df[["YEAR", feature]].groupby(df["YEAR"]).mean()
    
    # bokeh visualization
    years = [2000, 2004, 2008, 2012, 2016]
    plot_feature = list(grouped_feature[feature])

    # initialize the plot
    p = figure(plot_height=400, 
               title="Selected Feature Summary Statistics by Year")

    # plot bar chart
    p.vbar(years, top = plot_feature, width=3, fill_color = Spectral5)

    # label axis and ticks
    p.xaxis.axis_label="Years"
    p.yaxis.axis_label="Average Selected Feature"
    p.xaxis.ticker = years

    return show(p)
        

Test plotting other selected features

In [82]:
# see all available features for selection
df.columns[16:]

Index(['UNEMPLOYMENT_RATE', 'AVG_WAGE_SALARY', 'AA_FEMALE', 'AA_MALE',
       'BA_FEMALE', 'BA_MALE', 'H_FEMALE', 'H_MALE', 'IA_FEMALE', 'IA_MALE',
       'NA_FEMALE', 'NA_MALE', 'TOT_FEMALE', 'TOT_MALE', 'TOT_POP',
       'WA_FEMALE', 'WA_MALE', 'TOT_POP_LESS19', 'TOT_MALE_LESS19',
       'TOT_FEMALE_LESS19', 'TOT_POP_20to39', 'TOT_MALE_20to39',
       'TOT_FEMALE_20to39', 'TOT_POP_40to59', 'TOT_MALE_40to59',
       'TOT_FEMALE_40to59', 'TOT_POP_Above60', 'TOT_MALE_Above60',
       'TOT_FEMALE_Above60', 'AA_FEMALE%', 'AA_MALE%', 'BA_FEMALE%',
       'BA_MALE%', 'H_FEMALE%', 'H_MALE%', 'IA_FEMALE%', 'IA_MALE%',
       'NA_FEMALE%', 'NA_MALE%', 'WA_FEMALE%', 'WA_MALE%', 'TOT_FEMALE%',
       'TOT_MALE%', 'TOT_POP_LESS19%', 'TOT_POP_20to39%', 'TOT_POP_40to59%',
       'TOT_POP_Above60%', 'MARGIN_VICTORY'],
      dtype='object')

In [83]:
test1 = bar_plot("MARGIN_VICTORY")

In [84]:
# aggregate selected feature by year
test1_grouped = df[["YEAR", "MARGIN_VICTORY"]].groupby(df["YEAR"]).mean()
test1_grouped

Unnamed: 0_level_0,YEAR,MARGIN_VICTORY
YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,2000,-0.171779
2004,2004,-0.2153
2008,2008,-0.152396
2012,2012,-0.208777
2016,2016,-0.314008


In [72]:
test2 = bar_plot("TOT_POP_LESS19%")

In [76]:
# aggregate selected feature by year
test2_grouped = df[["YEAR", "TOT_POP_LESS19%"]].groupby(df["YEAR"]).mean()
test2_grouped

Unnamed: 0_level_0,YEAR,TOT_POP_LESS19%
YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,2000,0.284323
2004,2004,0.275984
2008,2008,0.267129
2012,2012,0.261081
2016,2016,0.251323
