Bonus Lecture #1: Doing Data Science with <a href="https://plot.ly/">plotly</a>
=================

### Exploring the US election

In this lecture, we are going to explore some ways of visualizing data using plotly, a very cool visualization api which allows you to create professional-looking visualizations. We will do that in the context of Presidential election! We are going to combine data from various sources, just as you would do in real life data science! The data that we are using is a processed version of those found in the below sources.

**US Election results** can be found <a href="http://www.stat.berkeley.edu/users/nolan/data/voteProject/2016_US_County_Level_Presidential_Results.csv">here</a>.

**Median Income data by state** can be found here <a href="https://www.census.gov/prod/2012pubs/acsbr11-02.pdf">here</a>.

**Gini coefficient** data lives <a href="http://www.census.gov/topics/income-poverty/income-inequality.html">here</a>.

In [1]:
import plotly.plotly as py
import plotly.graph_objs as go
from datascience import *
import numpy as np
import math

py.sign_in('data8', 'qOP6GPnJlLfnS2TnwDvs')

## Election Results Overview 

In [2]:
#First off let's read in our data and take a quick look
#One easy way to read in your data from a csv file is using the datascience package's table
#You will learn more about the datascience package next week!
election_2012 = Table.read_table("https://raw.githubusercontent.com/adnanhemani/data8-assets/master/data/election_df_2012.csv")
election_2016 = Table.read_table("https://raw.githubusercontent.com/adnanhemani/data8-assets/master/data/election_df_2016.csv").drop("code")
election_2016.show(5)

state_abbr,votes_dem.16,votes_gop.16,perc_diff,text
AK,93003.0,130413.0,-0.167445,AK Democrats: 41.6277258567% Republicans: 58.37227 ...
AL,718084.0,1306920.0,-0.290784,AL Democrats: 35.4607806681% Republicans: 64.53921 ...
AR,378729.0,677904.0,-0.28314,AR Democrats: 35.8430031998% Republicans: 64.15699 ...
AZ,936250.0,1021150.0,-0.0433758,AZ Democrats: 47.8312090912% Republicans: 52.16879 ...
CA,5931280.0,3184720.0,0.30129,CA Democrats: 65.0645063341% Republicans: 34.93549 ...


In [3]:
#The first step is to extract the data we are interested in from our tables
#all the statements below return the corresponding table columns in the form of a numpy array.

#state abbreviations in alphabetical order
state_abbreviations = election_2016.column("state_abbr").astype(str)
#hover text for 2016 state results
text_16 = election_2016.column("text").astype(str)
#Percentage difference between DNC and GOP in 2016
perc_difference_16 = election_2016.column("perc_diff").astype(str)
# 2016 election vote count by state
votes_dem_16 = election_2016.column("votes_dem.16").astype(int)
votes_gop_16 = election_2016.column("votes_gop.16").astype(int)
# Percentage difference between DNC and GOP in 2012

perc_difference_12 = election_2012.column("perc_diff").astype(str)

In [4]:
#Now I want to calculate the percentage that each party got on each state - numpy math
perc_dem = votes_dem_16/(votes_dem_16+votes_gop_16)
perc_gop = votes_gop_16/(votes_dem_16+votes_gop_16)

In [5]:
#Link to bar chart tutorial: https://plot.ly/python/bar-charts/
import plotly.plotly as py
import plotly.graph_objs as go

trace1 = go.Bar(
    y=state_abbreviations, #labels
    x=perc_dem, #data
    name='Democrats', #Name of the category of data
    orientation = 'h', #vertical or horizontal bar chart
    marker = dict(
        color = 'rgba(169, 228, 232, 0.4)',
        line = dict(
            color = 'rgba(169, 228, 232, 1.0)',
            width = 5)
    )
)
trace2 = go.Bar(
    y=state_abbreviations,
    x=perc_gop,
    name='Republicans',
    orientation = 'h',
    marker = dict(
        color = 'rgba(224, 92, 45, 0.6)', #a is transparency, more clear once we zoom in
        line = dict(
            color = 'rgba(224, 92, 45, 1.0)',
            width = 5)
    )
)

data = [trace1, trace2]

layout = go.Layout(
    title = "Battle of the States",
    barmode='stack',
    xaxis = dict(title="% Vote"),
    yaxis = dict(title="State Labels")
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='marker-h-bar')

In [6]:
# Link to chloropleth map tutorial: https://plot.ly/python/choropleth-maps/

#Determine the color scale that we will be using
scl = [[0, 'rgb(255, 0, 0)'],[0.25, 'rgb(255, 122, 122)'],[0.4, "rgb(247, 133, 133)"],[0.45, 'rgb(255, 153, 153)'],
       [0.5, 'rgb(255, 255, 255)'],[0.55, 'rgb(160, 160, 255)'],[0.6, 'rgb(133, 177, 247)'], [0.75, 'rgb(122,122,255)'],[1, 'rgb(0, 0, 255)']]

#first off we have to put the data in the right format
data = [ dict(
        type='choropleth', #specifies type of plot
        autocolorscale = False,
        locations = state_abbreviations, #the state abbreviations go here
        z = perc_difference_16.astype(float), # Data that will determine color
        locationmode = 'USA-states', #determine the type of map you want to make
        text = text_16, # hover text content
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ), #marker determines line color between states 
        colorbar = dict(
            title = "Percentage Difference"), #Title of our map
            colorscale = scl, #set the colorscale we created
            zmin = -0.5,
            zmax = 0.5
        ) ]

#make the layout that contains information about the map
layout = dict(
        title = '2016 Election Results', #chart title
        geo = dict( #more information about the map plot
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )

## In how many states did each party improve their performance?

This election was admittedly one of the most polarized of the recent years. So one question to ask is how did that affect the electoral map?

In [7]:
#explore in how many states republicans and democrats improved their percentage
perc_difference_12 = perc_difference_12.astype(float)
perc_difference_16 = perc_difference_16.astype(float)
diff = perc_difference_16-perc_difference_12
diff

array([        nan, -0.06653383, -0.04118679,  0.06154231,  0.09065405,
       -0.01602986, -0.0428437 ,  0.05965599, -0.06889856, -0.02196869,
        0.02127256, -0.08562986, -0.15948696, -0.03570099,  0.00155769,
       -0.09594675,  0.00136109, -0.08256713, -0.02814982,  0.05432321,
        0.00726051, -0.12689232, -0.09870574, -0.06251926, -0.10306611,
       -0.06835505, -0.08346208, -0.0175548 , -0.19376047, -0.0479124 ,
       -0.05474053, -0.03601187, -0.0101138 , -0.04176926, -0.04866872,
       -0.10896324, -0.05077576, -0.00628566, -0.06393449, -0.11274131,
       -0.04007624, -0.13507731, -0.06489077,  0.06549292,  0.23887209,
        0.0195334 , -0.06074703,  0.04336346, -0.07765141, -0.16928941,
       -0.08935981])

In [8]:
# Doing math with numpy arrays

#Get rid of the nan value
diff = diff[~np.isnan(diff)]
dems_improved = np.count_nonzero(diff>0)
republicans_improved = np.count_nonzero(diff<0)

In [9]:
#Bar chart
trace0 = go.Bar(
            x=['Democrats improved', 'Republicans improved'],
            y=[dems_improved, republicans_improved]
    )

data = [trace0]

layout = go.Layout(
    title="In how many states did the two parties improve?"
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='basic-bar')

In [10]:
#Let's try to see how many states republicans actually converted compared to the 2012 election
gop_12 = perc_difference_12<0 #won by the gop in 2012
gop_16 = perc_difference_16<0 #won by the gop in 2016
lst = list(zip(gop_12, gop_16))
lst

[(False, True),
 (True, True),
 (True, True),
 (True, True),
 (False, False),
 (False, False),
 (False, False),
 (False, False),
 (False, False),
 (False, True),
 (True, True),
 (False, False),
 (False, True),
 (True, True),
 (False, False),
 (True, True),
 (True, True),
 (True, True),
 (True, True),
 (False, False),
 (False, False),
 (False, False),
 (False, True),
 (False, False),
 (True, True),
 (True, True),
 (True, True),
 (True, True),
 (True, True),
 (True, True),
 (False, False),
 (False, False),
 (False, False),
 (False, False),
 (False, False),
 (False, True),
 (True, True),
 (False, False),
 (False, True),
 (False, False),
 (True, True),
 (True, True),
 (True, True),
 (True, True),
 (True, True),
 (False, False),
 (False, False),
 (False, False),
 (False, True),
 (True, True),
 (True, True)]

In [11]:
#How many times each item appears in a list
from collections import Counter
counts = dict(Counter(lst))
counts

{(False, False): 21, (False, True): 7, (True, True): 23}

In [12]:
#Code to change the dictionary keys into something more readable
counts["democrat_to_democrat"] = counts.pop((False, False)) #pop removes a key-pair value from your dictionary
counts["democrat_to_republican"] = counts.pop((False, True))
counts["republican_to_republican"] = counts.pop((True, True))
counts["republican_to_democrat"] = 0
counts

{'democrat_to_democrat': 21,
 'democrat_to_republican': 7,
 'republican_to_democrat': 0,
 'republican_to_republican': 23}

In [13]:
#Get all items from a dictionary in alphabetical order of the keys
sorted_list = sorted(counts.items(), key=lambda x: x[0]) #sorts a dictionary by key and convert to a list of tuples.
sorted_list

[('democrat_to_democrat', 21),
 ('democrat_to_republican', 7),
 ('republican_to_democrat', 0),
 ('republican_to_republican', 23)]

In [14]:
trace0 = go.Bar(
    x=['Democrat to Democrat', 'Democrat to Republican', 'Republican to Democrat',
       'Republican to Republican'],
    y=[val for key, val in sorted_list], #I want the keys in sorted order
    marker=dict(
        color=['rgba(204,204,204,1)','rgba(222,45,38,0.8)','rgba(204,204,204,1)', 'rgba(204,204,204,1)']),
)

data = [trace0]
layout = go.Layout(
    title='How many states did the two parties manage to convert?',
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='color-bar')

## Voting and income inequality

Now we will take a look at how income and income inequality relate to the voting patterns using data from 2010 on median income per state and the Gini coefficient for income inequality.

In [15]:
import csv #to read a dict from a csv file
#we will need that dictionary to put our data in a similar format
with open('data/state_abbreviations.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    state_abbr_dict = dict(reader)
# DC is missing from our data
state_abbr_dict['district of columbia'] = "DC"

In [16]:
import pprint #for printing large dictionaries more cleanly
with open('data/gini_states_data.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    gini_data = dict(reader)
    
#Put the data in the right format
for key in gini_data.keys():
    gini_data[key] = eval(gini_data[key])
    
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(gini_data)

{   'Gini': [   0.399791867,
                0.399657935,
                0.376482785,
                0.44936254600000003,
                0.438901246,
                0.53154701,
                0.7277280690000001,
                0.48247823100000004,
                0.397961587,
                0.38879489899999997,
                0.34610495,
                0.487053216,
                0.383764029,
                0.300658852,
                0.36632257700000004,
                0.42266601299999995,
                0.466088861,
                0.483965099,
                0.50558567,
                0.580388665,
                0.48904141799999995,
                0.445720136,
                0.436748475,
                0.46901077,
                0.354874611,
                0.305532992,
                0.31527397,
                0.48403284,
                0.512200654,
                0.41891861,
                0.6293997170000001,
                0.430408478,
                0

In [17]:
#replace states by state abbreaviations
states_encoded = []
for state in gini_data['State']:
    state = state.lower() #convert to lowercase
    if state in state_abbr_dict.keys():
        states_encoded.append(state_abbr_dict[state])
gini_data['State'] = states_encoded

In [18]:
gini = list(zip(gini_data['Year'], gini_data['State'], gini_data['Gini']))
#keep data from 2010
gini_2010 = [i for i in gini if i[0]==2010]
len(gini_2010) #problem! why do we have 52 states?

52

In [19]:
sorted(gini_2010, key = lambda x:x[1])

[(2010, 'AK', 0.632009983),
 (2010, 'AL', 0.681595385),
 (2010, 'AR', 0.6291412120000001),
 (2010, 'AZ', 0.564963937),
 (2010, 'CA', 0.620458305),
 (2010, 'CO', 0.567936897),
 (2010, 'CT', 0.549246907),
 (2010, 'DC', 0.572081685),
 (2010, 'DE', 0.59138608),
 (2010, 'FL', 0.62091881),
 (2010, 'GA', 0.5514155629999999),
 (2010, 'HI', 0.574038565),
 (2010, 'IA', 0.607103229),
 (2010, 'ID', 0.629451573),
 (2010, 'IL', 0.5973303320000001),
 (2010, 'IN', 0.589851499),
 (2010, 'KS', 0.5894688370000001),
 (2010, 'KY', 0.6125893),
 (2010, 'LA', 0.5845420960000001),
 (2010, 'MA', 0.623285711),
 (2010, 'MD', 0.582541764),
 (2010, 'ME', 0.6818228359999999),
 (2010, 'MI', 0.5895479920000001),
 (2010, 'MN', 0.684081972),
 (2010, 'MO', 0.573903501),
 (2010, 'MS', 0.58449775),
 (2010, 'MT', 0.5598515270000001),
 (2010, 'NC', 0.595159471),
 (2010, 'ND', 0.630670428),
 (2010, 'NE', 0.592691958),
 (2010, 'NH', 0.591243804),
 (2010, 'NJ', 0.578736603),
 (2010, 'NM', 0.5760961170000001),
 (2010, 'NV', 0.58

In [20]:
gini_2010.remove((2010, 'SD', 0.655813515))
len(gini_2010)

51

In [21]:
#Convert the list of tuples back into a dictionary, only keeping state and gini coefficient value.
gini_2010 = dict([(i[1], i[2]) for i in gini_2010])
gini_2010

{'AK': 0.632009983,
 'AL': 0.681595385,
 'AR': 0.6291412120000001,
 'AZ': 0.564963937,
 'CA': 0.620458305,
 'CO': 0.567936897,
 'CT': 0.549246907,
 'DC': 0.572081685,
 'DE': 0.59138608,
 'FL': 0.62091881,
 'GA': 0.5514155629999999,
 'HI': 0.574038565,
 'IA': 0.607103229,
 'ID': 0.629451573,
 'IL': 0.5973303320000001,
 'IN': 0.589851499,
 'KS': 0.5894688370000001,
 'KY': 0.6125893,
 'LA': 0.5845420960000001,
 'MA': 0.623285711,
 'MD': 0.582541764,
 'ME': 0.6818228359999999,
 'MI': 0.5895479920000001,
 'MN': 0.684081972,
 'MO': 0.573903501,
 'MS': 0.58449775,
 'MT': 0.5598515270000001,
 'NC': 0.595159471,
 'ND': 0.630670428,
 'NE': 0.592691958,
 'NH': 0.591243804,
 'NJ': 0.578736603,
 'NM': 0.5760961170000001,
 'NV': 0.582882047,
 'NY': 0.599277914,
 'OH': 0.591796398,
 'OK': 0.573493421,
 'OR': 0.590557694,
 'PA': 0.591880679,
 'RI': 0.543013096,
 'SC': 0.57542032,
 'SD': 0.626876354,
 'TN': 0.59136045,
 'TX': 0.564221501,
 'UT': 0.617894828,
 'VA': 0.666916549,
 'VT': 0.588738322,
 'WA

In [22]:
#read in income data
income = Table.read_table("data/states_median_income.csv")
income.show(5)

State,median_income
alabama,41459
alaska,66311
arizona,48108
arkansas,39375
california,59540


In [23]:
#format the income column
income.append_column("median_income",[int(i.replace(",", "")) for i in income.column("median_income")])
income.append_column("State", [state_abbr_dict[key] for key in income.column("State")])

In [24]:
median_income = income.sort("State").column("median_income")
gini = [i[1] for i in sorted(list(gini_2010.items()), key=lambda x:x[0])]

In [25]:
electoral = Table.read_table("data/electoral_votes.csv").select(["State", "electoral_votes"]).take(np.arange(51))
electoral['State'] = electoral.apply(lambda x: state_abbr_dict[x.lower()], "State")
electoral_votes = electoral.sort("State").column("electoral_votes")

In [26]:
#get the indices
democratic_states = perc_difference_16.astype(float)>0
republicans_states = perc_difference_16.astype(float)<0
democratic_states

array([False, False, False, False,  True,  True,  True,  True,  True,
       False, False,  True, False, False,  True, False, False, False,
       False,  True,  True,  True, False,  True, False, False, False,
       False, False, False,  True,  True,  True,  True,  True, False,
       False,  True, False,  True, False, False, False, False, False,
        True,  True,  True, False, False, False], dtype=bool)

In [27]:
hover_text = []
bubble_size = []
for i in range(len(state_abbreviations)): 
    hover_text.append(('State: {state}<br>'+
                          'Electoral Votes: {evotes}<br>'+
                          'Median income: {income}<br>'+
                          'Gini coefficient: {gini}<br>').format(state=state_abbreviations[i],
                                                evotes=electoral_votes[i],
                                                income=median_income[i],
                                                gini=gini[i]
                                                )) #what appears when you hover over the bubbles
    bubble_size.append(electoral_votes[i]) #bubble size depends on #of electoral votes

In [28]:
# Link to bubble chart tutorial: https://plot.ly/python/bubble-charts/
text = hover_text
size = np.array(bubble_size)*2

trace0 = go.Scatter(
    x=median_income[democratic_states],
    y=np.array(gini)[democratic_states],
    mode='markers',
    name='Democratic',
    text=np.array(text)[democratic_states],
    marker=dict(
        symbol='circle',
        sizemode='diameter',
        sizeref=0.85,
        size=np.array(size)[democratic_states],
        line=dict(
            width=2
        )
    )
)
trace1 = go.Scatter(
    x=median_income[republicans_states],
    y=np.array(gini)[republicans_states],
    mode='markers',
    name='Republican',
    text=np.array(text)[republicans_states],
    marker=dict(
        sizemode='diameter',
        sizeref=0.85,
        size=np.array(size)[republicans_states],
        line=dict(
            width=2
        ), color = 'rgb(243, 0, 0)'
    )
)

data = [trace0, trace1]
layout = go.Layout(
    title = "Income inequality and the 2016 election",
    xaxis = dict(title="Median Income in 2010"),yaxis = dict(title="Gini coefficient in 2010"),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='2106')

## Let's take a look at Michigan

In [29]:
#Let's find the voting and census data for all the Michigan counties
election_16 = Table.read_table("https://raw.githubusercontent.com/adnanhemani/data8-assets/master/data/merged_election_data.csv")
mi_voting_data = election_16.where(election_16.column("State") == "michigan")
mi_voting_data

Unnamed: 0,County Name,State,ObamaVotes.12,RomneyVotes.12,votes_dem.16,votes_gop.16,total_votes.16,per_dem.16,per_gop.16,diff.16,per_point_diff.16,bushVote.04,kerryVote.04,Total.Precincts.08,Precincts.Reporting.08,Obama.08,McCain.08,Other.08,name,x_coord,y_coord,state_abbr,total_population,white_population,white_population_error,black_population,black_population_error,total_households,average_household_size,average_family_size,fertility,enrolled_hs,enrolled_higher_ed,edu_less_than_9th,edu_hs_no_diploma,edu_hs_diploma,edu_college_no_degree,edu_college_bachelors,edu_college_graduate_professional,in_labor_force_yes,in_labor_force_no,public_transportation_to_work,working_at_home,occupation_management_business_science_arts,income_less_than_10000,income_10000_to_24999,income_25000_to_49999,income_50000_to_99999,income_100000_to_149999,income_150000_to_199999,income_more_than_200000
39,alcona,michigan,2462,3571,1732,4201,6179,0.280304,0.679883,1037,1111,3590,2871,12,12,2896,3404,84,Alcona,-83.2729,44.7122,MI,11238,10998,257,,,4608,2.43,2.94,77,559,309,337,1064,3758,2012,681,424,4137,5643,7,313,809,363,1229,1520,1194,202,202,52
45,alger,michigan,2212,2329,1663,2585,4513,0.368491,0.57279,2648,372,2316,2395,14,14,2472,2188,90,Alger,-86.4842,47.1604,MI,9604,8366,498,789.0,187.0,3688,2.24,2.73,69,545,255,358,661,3196,1382,917,330,3788,4376,47,224,915,296,800,1194,1062,237,237,47
47,allegan,michigan,19594,29023,17932,33812,55153,0.325132,0.613058,515,673,32952,19227,48,48,23526,29526,1025,Allegan,-86.25,42.565,MI,111385,103163,717,1433.0,317.0,42078,2.62,3.04,1725,6823,5093,3033,5061,28472,16513,9689,4460,56049,29190,92,1946,13907,2423,6495,12028,15327,4198,4198,762
59,alpena,michigan,6549,7298,4877,9090,14688,0.33204,0.618873,1674,666,7665,7406,18,18,7705,7125,255,Alpena,-83.1976,45.037,MI,29958,29241,819,,,13357,2.21,2.7,382,1644,1645,829,1822,7450,5409,1938,1370,14353,10115,41,665,3941,1119,3524,3876,3528,977,977,118
78,antrim,michigan,5107,7917,4448,8469,13572,0.327734,0.624005,1623,704,8379,5072,16,16,6079,7506,267,Antrim,-85.1769,45.006,MI,23975,23137,267,,,10043,2.33,2.75,250,1287,741,398,1419,6473,3863,2413,1631,11084,8458,62,654,2706,669,2141,3043,3008,807,807,192
87,arenac,michigan,3669,4054,2238,4704,7457,0.300121,0.630817,1034,830,4071,4076,20,20,4155,3807,166,Arenac,-83.7524,44.0455,MI,16487,15781,679,,,6686,2.33,2.83,112,951,521,672,1572,5041,2569,843,437,7091,6591,69,325,1654,570,1626,2241,1780,318,318,59
134,baraga,michigan,1574,1866,1156,2158,3486,0.331612,0.619048,2,671,1977,1660,8,8,1725,1846,73,Baraga,-88.342,46.7154,MI,8882,6671,714,599.0,299.0,3336,2.39,2.94,87,480,238,482,678,2799,1342,539,203,3841,3509,27,81,1037,326,620,1098,1019,201,201,38
144,barry,michigan,11496,16651,9109,19197,30265,0.300975,0.634297,10,840,18638,11312,25,25,13449,16431,685,Barry,-85.311,42.5978,MI,59576,58075,1117,,,22843,2.6,3.01,636,3644,3007,928,2883,16028,9836,4152,2494,30505,15866,14,762,7014,955,3586,6372,8510,2409,2409,416
156,bay,michigan,26797,23735,21641,28327,52932,0.408845,0.535158,2233,100,25443,31041,89,89,32589,23794,1044,Bay,-83.9427,43.7208,MI,108156,101781,650,1582.0,440.0,44345,2.41,2.91,1271,6056,6707,2739,6679,26758,17321,9178,4226,54158,32537,283,1573,13927,3528,7931,13545,13709,4022,4022,634
194,benzie,michigan,4683,5071,4108,5539,10221,0.401918,0.541923,393,149,5283,4383,13,13,5461,4687,171,Benzie,-86.2478,44.6304,MI,17705,17059,1009,,,7366,2.35,2.83,170,1032,753,232,1060,4302,2748,1922,1292,8754,5599,34,473,2441,372,1314,2510,2419,547,547,94


In [30]:
# Add a new column indicating % unemployed and % of GOP vote per county
mi_voting_data = mi_voting_data.with_column("perc_labor_force", mi_voting_data.column("in_labor_force_yes")/(mi_voting_data.column("in_labor_force_yes") + mi_voting_data.column("in_labor_force_no")))
mi_voting_data = mi_voting_data.with_column("perc_gop_vote", mi_voting_data.column("votes_gop.16")/(mi_voting_data.column("votes_gop.16") + mi_voting_data.column("votes_dem.16")))

In [31]:
# Break the counties into 2 tables: one for Democrat-voting counties and the other for Republic-voting counties
gop_counties = mi_voting_data.where(mi_voting_data.column("perc_gop_vote") > 0.5)
dem_counties = mi_voting_data.where(mi_voting_data.column("perc_gop_vote") < 0.5)

In [32]:
trace_gop = go.Scatter(
    name = "Republican",
    x = gop_counties.column("perc_labor_force"),
    y = gop_counties.column("perc_gop_vote"),
    text = gop_counties.column("County Name"),
    mode = 'markers'
)

trace_dem = go.Scatter(
    name = "Democrat",
    x = dem_counties.column("perc_labor_force"),
    y = dem_counties.column("perc_gop_vote"),
    text = dem_counties.column("County Name"),
    mode = 'markers'
)

data = [trace_gop, trace_dem]

layout = go.Layout(
    title='Percent Unemployed versus GOP Votes in Michigan',
    xaxis = dict(
        title = "% Employed",
    ),
    yaxis = dict(
        title = "% Votes for Republicans",
    )
)

fig = go.Figure(data = data, layout = layout)

py.iplot(fig, filename='employment-vs-votes')

High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~data8/0 or inside your plot.ly account where it is named 'employment-vs-votes'


## Bonus material - Adnan

In [33]:
import requests
import json
url = 'http://catalog.civicdashboards.com/dataset/08b4daa7-8888-4e03-ad73-ae2ff3c47771/resource/b12bbaa4-14df-4293-8636-2bb80804678c/download/89e296fe1c5c410d83c2d97155366e24temp.geojson'
michigan_data = requests.get(url)
michigan_data = michigan_data.json()

In [34]:
county_names = []
county_names_dict = {}

for county in michigan_data['features']:
    for m in range(len(county['properties']['name'])):
        if county['properties']['name'][m:m+6] == 'County':
            county_names.append(county['properties']['name'][0:m-1].lower())
            county_names_dict[county['properties']['name'][0:m-1]] = county['properties']['name']
            
print(county_names)

['alger', 'cheboygan', 'barry', 'kalamazoo', 'menominee', 'grand traverse', 'mackinac', 'kalkaska', 'ottawa', 'roscommon', 'saginaw', 'ogemaw', 'tuscola', 'mason', 'ingham', 'ontonagon', 'branch', 'montmorency', 'osceola', 'wexford', 'alcona', 'allegan', 'huron', 'hillsdale', 'dickinson', 'chippewa', 'leelanau', 'clare', 'baraga', 'alpena', 'gratiot', 'wayne', 'oscoda', 'bay', 'muskegon', 'keweenaw', 'houghton', 'cass', 'crawford', 'newaygo', 'midland', 'gladwin', 'washtenaw', 'mecosta', 'luce', 'oceana', 'jackson', 'montcalm', 'sanilac', 'shiawassee', 'eaton', 'st. joseph', 'schoolcraft', 'st. clair', 'isabella', 'benzie', 'kent', 'emmet', 'monroe', 'gogebic', 'manistee', 'antrim', 'iosco', 'lake', 'livingston', 'delta', 'van buren', 'berrien', 'missaukee', 'macomb', 'iron', 'lenawee', 'ionia', 'arenac', 'calhoun', 'clinton', 'presque isle', 'charlevoix', 'otsego', 'genesee', 'lapeer', 'marquette', 'oakland']


In [35]:
red_counties_2012 = []
blue_counties_2012 = []
election_16 = Table.read_table("https://raw.githubusercontent.com/adnanhemani/data8-assets/master/data/merged_election_data.csv")
mi_voting_data = election_16.where(election_16.column("State") == "michigan")
mi_voting_data

Unnamed: 0,County Name,State,ObamaVotes.12,RomneyVotes.12,votes_dem.16,votes_gop.16,total_votes.16,per_dem.16,per_gop.16,diff.16,per_point_diff.16,bushVote.04,kerryVote.04,Total.Precincts.08,Precincts.Reporting.08,Obama.08,McCain.08,Other.08,name,x_coord,y_coord,state_abbr,total_population,white_population,white_population_error,black_population,black_population_error,total_households,average_household_size,average_family_size,fertility,enrolled_hs,enrolled_higher_ed,edu_less_than_9th,edu_hs_no_diploma,edu_hs_diploma,edu_college_no_degree,edu_college_bachelors,edu_college_graduate_professional,in_labor_force_yes,in_labor_force_no,public_transportation_to_work,working_at_home,occupation_management_business_science_arts,income_less_than_10000,income_10000_to_24999,income_25000_to_49999,income_50000_to_99999,income_100000_to_149999,income_150000_to_199999,income_more_than_200000
39,alcona,michigan,2462,3571,1732,4201,6179,0.280304,0.679883,1037,1111,3590,2871,12,12,2896,3404,84,Alcona,-83.2729,44.7122,MI,11238,10998,257,,,4608,2.43,2.94,77,559,309,337,1064,3758,2012,681,424,4137,5643,7,313,809,363,1229,1520,1194,202,202,52
45,alger,michigan,2212,2329,1663,2585,4513,0.368491,0.57279,2648,372,2316,2395,14,14,2472,2188,90,Alger,-86.4842,47.1604,MI,9604,8366,498,789.0,187.0,3688,2.24,2.73,69,545,255,358,661,3196,1382,917,330,3788,4376,47,224,915,296,800,1194,1062,237,237,47
47,allegan,michigan,19594,29023,17932,33812,55153,0.325132,0.613058,515,673,32952,19227,48,48,23526,29526,1025,Allegan,-86.25,42.565,MI,111385,103163,717,1433.0,317.0,42078,2.62,3.04,1725,6823,5093,3033,5061,28472,16513,9689,4460,56049,29190,92,1946,13907,2423,6495,12028,15327,4198,4198,762
59,alpena,michigan,6549,7298,4877,9090,14688,0.33204,0.618873,1674,666,7665,7406,18,18,7705,7125,255,Alpena,-83.1976,45.037,MI,29958,29241,819,,,13357,2.21,2.7,382,1644,1645,829,1822,7450,5409,1938,1370,14353,10115,41,665,3941,1119,3524,3876,3528,977,977,118
78,antrim,michigan,5107,7917,4448,8469,13572,0.327734,0.624005,1623,704,8379,5072,16,16,6079,7506,267,Antrim,-85.1769,45.006,MI,23975,23137,267,,,10043,2.33,2.75,250,1287,741,398,1419,6473,3863,2413,1631,11084,8458,62,654,2706,669,2141,3043,3008,807,807,192
87,arenac,michigan,3669,4054,2238,4704,7457,0.300121,0.630817,1034,830,4071,4076,20,20,4155,3807,166,Arenac,-83.7524,44.0455,MI,16487,15781,679,,,6686,2.33,2.83,112,951,521,672,1572,5041,2569,843,437,7091,6591,69,325,1654,570,1626,2241,1780,318,318,59
134,baraga,michigan,1574,1866,1156,2158,3486,0.331612,0.619048,2,671,1977,1660,8,8,1725,1846,73,Baraga,-88.342,46.7154,MI,8882,6671,714,599.0,299.0,3336,2.39,2.94,87,480,238,482,678,2799,1342,539,203,3841,3509,27,81,1037,326,620,1098,1019,201,201,38
144,barry,michigan,11496,16651,9109,19197,30265,0.300975,0.634297,10,840,18638,11312,25,25,13449,16431,685,Barry,-85.311,42.5978,MI,59576,58075,1117,,,22843,2.6,3.01,636,3644,3007,928,2883,16028,9836,4152,2494,30505,15866,14,762,7014,955,3586,6372,8510,2409,2409,416
156,bay,michigan,26797,23735,21641,28327,52932,0.408845,0.535158,2233,100,25443,31041,89,89,32589,23794,1044,Bay,-83.9427,43.7208,MI,108156,101781,650,1582.0,440.0,44345,2.41,2.91,1271,6056,6707,2739,6679,26758,17321,9178,4226,54158,32537,283,1573,13927,3528,7931,13545,13709,4022,4022,634
194,benzie,michigan,4683,5071,4108,5539,10221,0.401918,0.541923,393,149,5283,4383,13,13,5461,4687,171,Benzie,-86.2478,44.6304,MI,17705,17059,1009,,,7366,2.35,2.83,170,1032,753,232,1060,4302,2748,1922,1292,8754,5599,34,473,2441,372,1314,2510,2419,547,547,94


In [36]:
import re

blue_counties_2016 = []
red_counties_2016 = []

for k, county in enumerate(county_names):
    county_cleaned = re.sub("\.", "", county)
    county_cleaned = re.sub(" ", "", county_cleaned)
    county_cleaned = re.sub(".*-", "", county_cleaned)
    row = mi_voting_data.where(mi_voting_data[1] == county_cleaned)
    if (row.num_rows != 1):
        print("Error: " + county_cleaned)
    else:
        if (row["votes_dem.16"].item() > row["votes_gop.16"].item()):
            blue_counties_2016.append(michigan_data['features'][k])
        else:
            red_counties_2016.append(michigan_data['features'][k])
    

In [37]:
red_data_2016 = {"type": "FeatureCollection"}
red_data_2016['features'] = red_counties_2016

blue_data_2016 = {"type": "FeatureCollection"}
blue_data_2016['features'] = blue_counties_2016

with open('data/michigan-red-data-2016.json', 'w') as f:
    f.write(json.dumps(red_data_2016))
with open('data/michigan-blue-data-2016.json', 'w') as f:
    f.write(json.dumps(blue_data_2016))


In [39]:
import plotly.plotly as py
import plotly.graph_objs as graph_objs

mapbox_access_token = 'pk.eyJ1IjoiYWRuYW5oZW1hbmkiLCJhIjoiY2l5N3RnY3M4MDAxZTJxb3Y3anlsbnJ0ZSJ9.REcsaI8vU6ylkrpkjuMtqA'

data = graph_objs.Data([
    graph_objs.Scattermapbox(
        lat=['45.5017'],
        lon=['-73.5673'],
        mode='markers',
    )
])
layout = graph_objs.Layout(
    height=600,
    autosize=True,
    hovermode='closest',
    mapbox=dict(
        layers=[
            dict(
                sourcetype = 'geojson',
                source = 'https://raw.githubusercontent.com/adnanhemani/data8-assets/master/data/michigan-red-data-2016.json',
                type = 'fill',
                color = 'rgba(163,22,19,0.8)'
            ),
            dict(
                sourcetype = 'geojson',
                source = 'https://raw.githubusercontent.com/adnanhemani/data8-assets/master/data/michigan-blue-data-2016.json',
                type = 'fill',
                color = 'rgba(40,0,113,0.8)'
            )
        ],
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=44.5,
            lon=-84.8
        ),
        pitch=0,
        zoom=4.5,
        style='light'
    ),
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='county-level-choropleths-python')
