In [1]:
#imports
import numpy as np
import pandas as pd
import geopandas as gpd
import json

from shapely.geometry import Point

import matplotlib.pyplot as plt

from bokeh.io import show, output_notebook, output_file
from bokeh.plotting import save, figure
from bokeh.models import GeoJSONDataSource, HoverTool, LinearColorMapper

#library that can look up FIPS codes
import us

%matplotlib inline

In [2]:
gdf = gpd.read_file('./data/district_shape_files/us-116th-congressional-districts.shp')

In [3]:
gdf.head()

Unnamed: 0,AREALAND,CENTLON,BASENAME,FUNCSTAT,CDTYP,CENTLAT,CDSESSN,MTFCC,INTPTLAT,HU100,...,INTPTLON,LSADC,POP100,STATE,GEOID,NAME,OID,LEN,CD116,geometry
0,1961857000.0,-94.7924812,3,N,O,38.8921109,116,G5200,38.8950035,,...,-94.7906431,C2,,20,2003,Congressional District 3,211904700000000.0,300778.9,3,"POLYGON ((-10581640.8426 4716558.141000003, -1..."
1,37076060000.0,-97.7398849,4,N,O,37.5477516,116,G5200,37.5544654,,...,-97.7439831,C2,,20,2004,Congressional District 4,211904700000000.0,1361146.0,4,"POLYGON ((-11084143.3693 4554917.087099999, -1..."
2,36633400000.0,-95.4308232,2,N,O,38.4777375,116,G5200,38.4822777,,...,-95.42464,C2,,20,2002,Congressional District 2,211904700000000.0,1481857.0,2,"POLYGON ((-10776468.3192 4830390.271700002, -1..."
3,196347000000.0,-100.2270268,Congressional District (at Large),N,O,44.4445678,116,G5200,44.4467957,,...,-100.2381762,C1,,46,4600,Congressional District (at Large),211904700000000.0,2963038.0,0,"POLYGON ((-11583670.1033 5621144.450300001, -1..."
4,543555800.0,144.7693797,Delegate District (at Large),N,O,13.4427156,116,G5200,13.4382886,,...,144.7729493,C4,,66,6698,Delegate District (at Large),211904700000000.0,179211.4,98,"POLYGON ((16092726.9697 1510969.434599999, 160..."


In [4]:
#just the states, not the territories
gdf = gdf[gdf.STATE.astype(int) <59]

In [5]:
#getting rid of undefined congressional districts
gdf = gdf[gdf.CD116 != 'ZZ']

In [6]:
#getting rid of DC
gdf = gdf[gdf.CD116 != '98']

In [7]:
#making dictionary of state codes
state_codes = {}

for i in gdf.STATE:
    state = us.states.lookup(i)
    state_codes[i] = state.abbr

In [8]:
#mapping dictionary of state codes to the state column
gdf.STATE = gdf.STATE.map(state_codes)

In [9]:
#changing at-large districts to 01
gdf.CD116 = np.where(gdf.CD116 == '00','01', gdf.CD116)

In [10]:
##making new district column
gdf['district'] = gdf.STATE + '-' + gdf.CD116

In [11]:
gdf.district.nunique()

435

In [12]:
#resetting index
gdf.reset_index(drop=True, inplace=True)

In [13]:
gdf.head()

Unnamed: 0,AREALAND,CENTLON,BASENAME,FUNCSTAT,CDTYP,CENTLAT,CDSESSN,MTFCC,INTPTLAT,HU100,...,LSADC,POP100,STATE,GEOID,NAME,OID,LEN,CD116,geometry,district
0,1961857000.0,-94.7924812,3,N,O,38.8921109,116,G5200,38.8950035,,...,C2,,KS,2003,Congressional District 3,211904700000000.0,300778.9,3,"POLYGON ((-10581640.8426 4716558.141000003, -1...",KS-03
1,37076060000.0,-97.7398849,4,N,O,37.5477516,116,G5200,37.5544654,,...,C2,,KS,2004,Congressional District 4,211904700000000.0,1361146.0,4,"POLYGON ((-11084143.3693 4554917.087099999, -1...",KS-04
2,36633400000.0,-95.4308232,2,N,O,38.4777375,116,G5200,38.4822777,,...,C2,,KS,2002,Congressional District 2,211904700000000.0,1481857.0,2,"POLYGON ((-10776468.3192 4830390.271700002, -1...",KS-02
3,196347000000.0,-100.2270268,Congressional District (at Large),N,O,44.4445678,116,G5200,44.4467957,,...,C1,,SD,4600,Congressional District (at Large),211904700000000.0,2963038.0,1,"POLYGON ((-11583670.1033 5621144.450300001, -1...",SD-01
4,6086782000.0,-72.8559714,1,N,O,42.3220872,116,G5200,42.3310441,,...,C2,,MA,2501,Congressional District 1,211904700000000.0,679239.4,1,"POLYGON ((-8182896.5063 5173890.644599997, -81...",MA-01


In [14]:
#drop columns
gdf.drop(columns=['AREALAND','CENTLON','BASENAME','FUNCSTAT','CDTYP',
                  'HU100','POP100','GEOID','MTFCC','INTPTLON','LSADC',
                  'NAME','OID','LEN'],
        inplace=True)

In [15]:
gdf.head()

Unnamed: 0,CENTLAT,CDSESSN,INTPTLAT,AREA,STATE,CD116,geometry,district
0,38.8921109,116,38.8950035,3297382000.0,KS,3,"POLYGON ((-10581640.8426 4716558.141000003, -1...",KS-03
1,37.5477516,116,37.5544654,59383410000.0,KS,4,"POLYGON ((-11084143.3693 4554917.087099999, -1...",KS-04
2,38.4777375,116,38.4822777,60769660000.0,KS,2,"POLYGON ((-10776468.3192 4830390.271700002, -1...",KS-02
3,44.4445678,116,44.4467957,391981800000.0,SD,1,"POLYGON ((-11583670.1033 5621144.450300001, -1...",SD-01
4,42.3220872,116,42.3310441,11379120000.0,MA,1,"POLYGON ((-8182896.5063 5173890.644599997, -81...",MA-01


## Testing

In [16]:
final_df = pd.read_csv('./data/final_df.csv')

In [17]:
final_df.drop(columns = 'Unnamed: 0',inplace=True)

In [18]:
final_df.head()

Unnamed: 0,district,uncontested,116first,116last,116gender,116party,2018votes,2018pct,2018margin,congress,...,trumpagree_20,ypreds_-20,ypreds_-15,ypreds_-10,ypreds_-5,ypreds_0,ypreds_5,ypreds_10,ypreds_15,ypreds_20
0,AL-01,False,Bradley,Byrne,1,R,152308,63.3,26.6,115,...,1.15,37.746425,37.746425,37.877942,37.877942,37.877942,37.877942,38.871538,38.871538,38.871538
1,AL-02,False,Martha,Roby,0,R,138582,61.5,23.0,115,...,1.149474,37.746425,37.746425,37.877942,37.877942,37.877942,37.877942,38.871538,38.871538,38.871538
2,AL-03,False,Mike,Rogers,1,R,147481,63.8,27.6,115,...,1.15,38.554546,38.554546,38.686064,38.686064,38.686064,38.686064,39.679659,39.679659,39.679659
3,AL-04,False,Robert,Aderholt,1,R,183968,79.9,59.8,115,...,1.162105,37.746425,37.746425,37.877942,37.877942,37.877942,37.877942,38.871538,38.871538,38.871538
4,AL-05,False,Mo,Brooks,1,R,158373,61.1,22.2,115,...,1.004348,37.746425,37.746425,37.746425,37.746425,37.746425,37.877942,37.877942,37.877942,37.877942


In [19]:
final_df['115agree_pct'] = final_df['115agree_pct'] * 100

In [20]:
final_df.head()

Unnamed: 0,district,uncontested,116first,116last,116gender,116party,2018votes,2018pct,2018margin,congress,...,trumpagree_20,ypreds_-20,ypreds_-15,ypreds_-10,ypreds_-5,ypreds_0,ypreds_5,ypreds_10,ypreds_15,ypreds_20
0,AL-01,False,Bradley,Byrne,1,R,152308,63.3,26.6,115,...,1.15,37.746425,37.746425,37.877942,37.877942,37.877942,37.877942,38.871538,38.871538,38.871538
1,AL-02,False,Martha,Roby,0,R,138582,61.5,23.0,115,...,1.149474,37.746425,37.746425,37.877942,37.877942,37.877942,37.877942,38.871538,38.871538,38.871538
2,AL-03,False,Mike,Rogers,1,R,147481,63.8,27.6,115,...,1.15,38.554546,38.554546,38.686064,38.686064,38.686064,38.686064,39.679659,39.679659,39.679659
3,AL-04,False,Robert,Aderholt,1,R,183968,79.9,59.8,115,...,1.162105,37.746425,37.746425,37.877942,37.877942,37.877942,37.877942,38.871538,38.871538,38.871538
4,AL-05,False,Mo,Brooks,1,R,158373,61.1,22.2,115,...,1.004348,37.746425,37.746425,37.746425,37.746425,37.746425,37.877942,37.877942,37.877942,37.877942


In [21]:
merged = gdf.merge(final_df, on='district')

In [22]:
merged_json = json.loads(merged.to_json())

json_data = json.dumps(merged_json)

## Bokeh for Trump Agreement Percentage

In [23]:
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer

In [24]:
#input GeoJSON data
geosource = GeoJSONDataSource(geojson = json_data)

#make color palette
palette = brewer['RdBu'][11]

#instantiate colors on a linear scale
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 100)

#x-tick labels for legend
tick_labels = {'5': '<5%', '10':'10%', '15':'15%','20':'20%',
               '75':'75%', '80':'80%','85':'85%', '90': '90%', '95': '>95%'}

#create the color bar
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

#create plot
p = figure(title = 'Trump Agreement Percentage', plot_height = 600 , plot_width = 950,
           toolbar_location = 'right',
           tooltips=[("Name", "@district"), ("Trump Agreement %", "@115agree_pct"),('Congressman','@115last'),('Party','@115party')],
           x_axis_location=None, y_axis_location=None,)

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

#add the data patches
p.patches('xs','ys', source = geosource,fill_color = {'field' :'115agree_pct', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

#put the legend below
p.add_layout(color_bar, 'below')

In [25]:
save(p, '/Users/christophershaw/Desktop/DSI/capstone/trump_agreement_districts.html',title='Trump Agreement by District')

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")


'/Users/christophershaw/Desktop/DSI/capstone/trump_agreement_districts.html'

## Bokeh for 2018 Margin

In [26]:
geosource = GeoJSONDataSource(geojson = json_data)

palette = brewer['RdBu'][11]

color_mapper = LinearColorMapper(palette = palette, low = -10, high = 10)

tick_labels = {'-10':'>-10%','-8':'-8%','-6':'-6%','-4':'-4%','-2':'-2%','0': '<2%', 
               '2':'2%', '4':'4%','6':'6%', '8': '8%','10': '>10%'}

color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

p = figure(title = '2018 Vote Margin', plot_height = 600 , plot_width = 950,
           toolbar_location = 'right',
           tooltips=[("Name", "@district"), ("2018 Election Margin", "@2018margin"),
                     ('Congressman','@116last'),('Party','@116party'),('Uncontested','@uncontested')],
           x_axis_location=None, y_axis_location=None,)

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

p.patches('xs','ys', source = geosource,fill_color = {'field' :'2018margin', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

p.add_layout(color_bar, 'below')

In [27]:
save(p, '/Users/christophershaw/Desktop/DSI/capstone/2018margin_districts.html',title='2018 Election Margins')

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")


'/Users/christophershaw/Desktop/DSI/capstone/2018margin_districts.html'

## Predicted Margin with 10% more agreement with Trump

In [28]:
geosource = GeoJSONDataSource(geojson = json_data)

palette = brewer['RdBu'][11]

color_mapper = LinearColorMapper(palette = palette, low = -10, high = 10)

tick_labels = {'-10':'>-10%','-8':'-8%','-6':'-6%','-4':'-4%','-2':'-2%','0': '<2%', 
               '2':'2%', '4':'4%','6':'6%', '8': '8%','10': '>10%'}

color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

p = figure(title = '2018 Vote Margin +10% Agreement', plot_height = 600 , plot_width = 950,
           toolbar_location = 'right',
           tooltips=[("Name", "@district"), ("2018 Election Margin", "@ypreds_10"),
                     ('Party','@116party')],
           x_axis_location=None, y_axis_location=None,)

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

p.patches('xs','ys', source = geosource,fill_color = {'field' :'ypreds_10', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

p.add_layout(color_bar, 'below')

In [29]:
save(p, '/Users/christophershaw/Desktop/DSI/capstone/2018margin_districts_10.html',title='2018 Election Margins +10% Agreement')

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")


'/Users/christophershaw/Desktop/DSI/capstone/2018margin_districts_10.html'

## Predicted Margin with 10% less agreement with Trump

In [30]:
#renaming negative columns for plotting purposes
final_df.rename(columns={'ypreds_-10':'ypreds_minus10'},inplace=True)

In [31]:
merged = gdf.merge(final_df, on='district')

merged_json = json.loads(merged.to_json())

json_data = json.dumps(merged_json)

In [32]:
geosource = GeoJSONDataSource(geojson = json_data)

palette = brewer['RdBu'][11]

color_mapper = LinearColorMapper(palette = palette, low = -10, high = 10)

tick_labels = {'-10':'>-10%','-8':'-8%','-6':'-6%','-4':'-4%','-2':'-2%','0': '<2%', 
               '2':'2%', '4':'4%','6':'6%', '8': '8%','10': '>10%'}

color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

p = figure(title = '2018 Vote Margin -10% Agreement', plot_height = 600 , plot_width = 950,
           toolbar_location = 'right',
           tooltips=[("Name", "@district"), ("2018 Election Margin", "@ypreds_minus10"),
                     ('Party','@116party')],
           x_axis_location=None, y_axis_location=None,)

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

p.patches('xs','ys', source = geosource,fill_color = {'field' :'ypreds_minus10', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

p.add_layout(color_bar, 'below')

In [33]:
save(p, '/Users/christophershaw/Desktop/DSI/capstone/2018margin_districts_-10.html',title='2018 Election Margins -10% Agreement')

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")


'/Users/christophershaw/Desktop/DSI/capstone/2018margin_districts_-10.html'