# Visualizations

This notebooks walks through the process for generating choropleths of the United States, to geographically visualize campaign finance information. 


## Getting the Data

For these visualizations, we want to sets of data: state of candidate running for election, and donation amount; and then state of the donor, and donation amount.

In order to end up with these two tables, we first need to pull the appropriate data from the database. Then we can drop the fields we will not need, to lessen the size of the data. 


In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import pandas as pd

In [None]:
#Define connection. 
#engine=create_engine('postgresql://username:password@host:port/databasename)
engine=create_engine('postgresql://')

#review table names
table_names=engine.table_names()
print(table_names)

In [None]:
df=pd.read_sql_query('SELECT * from individual_contribution_join_abbreviated', engine)

In [None]:
df.head()

In [None]:
df = df.drop("cmte_id", axis = 1)
df = df.drop("amndt_ind", axis = 1)
df = df.drop("rpt_tp", axis = 1)
df = df.drop("result", axis = 1)
df = df.drop("cand_pty_affliation", axis = 1)
df = df.drop("transaction_pgi", axis = 1)
df = df.drop("entity_tp", axis = 1)
df = df.drop("name", axis = 1)
df = df.drop("sub_id", axis = 1)

df.head()

In [None]:
df.to_csv('fecindividual.csv', sep = ",", index = False)

## Cleaning the Data

To get the data properly formatted, we will need substantial cleaning and wrangling of the data:
-	Filter to only 2014 campaign cycle donations
-	Identify the state of the candidate (based on the cand_id)
-	Sum all the donations together that are associated with the same state. 


In [2]:
df = pd.read_csv('fecindividual.csv')

In [3]:
df.head()

Unnamed: 0,cand_id,state,transaction_dt,transaction_amt
0,H4PA13124,PA,4132011.0,250.0
1,H4PA13124,NY,6302011.0,250.0
2,H4PA13124,NY,6302011.0,250.0
3,H4PA13124,PA,6242011.0,250.0
4,H4PA13124,PA,6242011.0,250.0


In [4]:
df['transaction_amt'] = df['transaction_amt'].astype('int')
df['cand_id'] = df['cand_id'].astype('str')
df['state'] = df['state'].astype('str')
df['transaction_dt'] = df['transaction_dt'].astype('str')

In [5]:
df['transaction_dt'] = df['transaction_dt'].str[3:7]
df.head()

Unnamed: 0,cand_id,state,transaction_dt,transaction_amt
0,H4PA13124,PA,2011,250
1,H4PA13124,NY,2011,250
2,H4PA13124,NY,2011,250
3,H4PA13124,PA,2011,250
4,H4PA13124,PA,2011,250


In [6]:
df.loc[df['transaction_dt'] == '2013', 'transaction_dt'] = '2014'
df.head()

Unnamed: 0,cand_id,state,transaction_dt,transaction_amt
0,H4PA13124,PA,2011,250
1,H4PA13124,NY,2011,250
2,H4PA13124,NY,2011,250
3,H4PA13124,PA,2011,250
4,H4PA13124,PA,2011,250


In [7]:
df = df[(df.transaction_dt == "2014")]
df = df[df['transaction_amt']> 0]
df.head()

Unnamed: 0,cand_id,state,transaction_dt,transaction_amt
991273,H0NY25078,NY,2014,1000
991274,H0NY25078,NY,2014,1000
991275,H0NY25078,NY,2014,1000
991276,H0NY25078,NY,2014,1000
991277,H0NY25078,NY,2014,1000


In [8]:
df.to_csv('donationsclean.csv', sep = ',', index = False)

## Format data for map of states showing how much money from out of state is used in their elections.

-   Identify rows of donations where the candidate's state does not match the donor's state. Drop rows where the states match.
-   Drop the donor state column.
-   Sum each donation by state of candidate to get sum of out of state money spent in each of those races
-   Generate choropleth

In [9]:
df = df.drop("transaction_dt", axis = 1)
df['cand_id'] = df['cand_id'].str[2:4]
df = df[(df.cand_id != df.state)]
df = df.drop("state", axis = 1)

In [10]:
dfgroup = df.groupby(['cand_id']).transaction_amt.sum()
dfgroup.head()

cand_id
AK      431484
AL      847748
AR    10198594
AZ     6410950
CA    16320598
Name: transaction_amt, dtype: int32

In [11]:
dfgroup.to_csv("DonationsByDonorState.csv", sep = ',', header = False)

In [12]:
dfgroup = pd.read_csv("DonationsByDonorState.csv")

In [13]:
dfgroup.columns = ['state', 'donations']

In [14]:
dfstates = pd.read_csv('HouseRepState.csv')
dfstates.head()

Unnamed: 0,﻿State,Number of House Seats from 2010,State Abbreviation
0,Alabama,7,AL
1,Alaska,1,AK
2,Arizona,9,AZ
3,Arkansas,4,AR
4,California,53,CA


In [15]:
dfstates.columns = ['state name', 'number of seats', 'state']

In [16]:
df_donor_state = dfgroup.merge(dfstates,on='state')
df_donor_state = df_donor_state.drop("state name", axis = 1)
df_donor_state.columns = ['state', 'donations', 'numberofseats']
df_donor_state = df_donor_state.drop("numberofseats", axis = 1)
df_donor_state.head()

Unnamed: 0,state,donations
0,AL,847748
1,AR,10198594
2,AZ,6410950
3,CA,16320598
4,CO,6807148


In [17]:
df_donor_state.to_csv("OutofStateDonationsSpentInState.csv", sep = ",", index = False)

In [18]:
dfdonor = pd.read_csv("DonationsByDonorStateFinal.csv")
dfdonor.head()

Unnamed: 0,state,donations
0,AL,847748
1,AR,10198594
2,AZ,6410950
3,CA,16320598
4,CO,6807148


In [19]:
init_notebook_mode(connected=True)

In [20]:
for col in dfdonor.columns:
    dfdonor[col] = dfdonor[col].astype(str)

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]


dfdonor['text'] = dfdonor['state'] + '<br>' +\
    'Amount Donated '+dfdonor['donations']

    
    
data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = dfdonor['state'],
        z = dfdonor['donations'].astype(float),
        locationmode = 'USA-states',
        text = dfdonor['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Dollars Donated")
        ) ]

layout = dict(
        title = 'Amount Spent in 2014 Congressional Elections By Out Of State of Donors' + '<br>' + '(Hover for breakdown)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
iplot(fig)

## Format data for map of states showing states where donors sent money out of state.
-   Identify rows of donations where the candidate's state does not match the donor's state. Drop rows where the states match.
-   Drop the candidate state column.
-   Sum each donation by state of donor to get sum of donor amount in each state going to other states' races
-   Generate choropleth

In [21]:
df = pd.read_csv('donationsclean.csv')

In [22]:
df = df.drop("transaction_dt", axis = 1)
df['cand_id'] = df['cand_id'].str[2:4]
df = df[(df.cand_id != df.state)]
df = df.drop("cand_id", axis = 1)

In [23]:
dfgroup = df.groupby(['state']).transaction_amt.sum()
dfgroup.head()

state
AE       1000
AK     141573
AL    1459226
AP       1000
AR     798422
Name: transaction_amt, dtype: int64

In [24]:
dfgroup.to_csv("OutOfStateDonationsByDonor.csv", sep = ',', header = False)

In [25]:
dfgroup = pd.read_csv("OutOfStateDonationsByDonor.csv")

In [26]:
dfgroup.columns = ['state', 'donations']

In [27]:
dfstates = pd.read_csv('HouseRepState.csv')
dfstates.head()

Unnamed: 0,﻿State,Number of House Seats from 2010,State Abbreviation
0,Alabama,7,AL
1,Alaska,1,AK
2,Arizona,9,AZ
3,Arkansas,4,AR
4,California,53,CA


In [28]:
dfstates.columns = ['state name', 'number of seats', 'state']

In [29]:
df_donor_state = dfgroup.merge(dfstates,on='state')
df_donor_state = df_donor_state.drop("state name", axis = 1)
df_donor_state.columns = ['state', 'donations', 'numberofseats']
df_donor_state = df_donor_state.drop("numberofseats", axis = 1)
df_donor_state.head()

Unnamed: 0,state,donations
0,AK,141573
1,AL,1459226
2,AR,798422
3,AZ,2685695
4,CA,24108588


In [30]:
df_donor_state.to_csv("OutofStateDonationsFromState.csv", sep = ",", index = False)

In [31]:
dfdonor = pd.read_csv("OutofStateDonationsFromState.csv")
dfdonor.head()

Unnamed: 0,state,donations
0,AK,141573
1,AL,1459226
2,AR,798422
3,AZ,2685695
4,CA,24108588


In [32]:
init_notebook_mode(connected=True)

In [33]:
for col in dfdonor.columns:
    dfdonor[col] = dfdonor[col].astype(str)

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]


dfdonor['text'] = dfdonor['state'] + '<br>' +\
    'Amount Donated '+dfdonor['donations']

    
    
data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = dfdonor['state'],
        z = dfdonor['donations'].astype(float),
        locationmode = 'USA-states',
        text = dfdonor['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Dollars Donated")
        ) ]

layout = dict(
        title = 'Donors who sent Money to Out of State 2014 Congressional Elections' + '<br>' + '(Hover for breakdown)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
iplot(fig)

## Format data for map average amount of money spent for each race, in each state

-   Label the cand_id with the state they are running for election in
-   Group all states together to get the amount spent (by individual donors) in each state
-   Normalize amount of donations for the number of races in each state

In [34]:
df = pd.read_csv("donationsclean.csv")
df.head()

Unnamed: 0,cand_id,state,transaction_dt,transaction_amt
0,H0NY25078,NY,2014,1000
1,H0NY25078,NY,2014,1000
2,H0NY25078,NY,2014,1000
3,H0NY25078,NY,2014,1000
4,H0NY25078,NY,2014,1000


In [35]:
df['cand_id'] = df['cand_id'].str[2:4]
df.head()

Unnamed: 0,cand_id,state,transaction_dt,transaction_amt
0,NY,NY,2014,1000
1,NY,NY,2014,1000
2,NY,NY,2014,1000
3,NY,NY,2014,1000
4,NY,NY,2014,1000


In [36]:
df = df.drop("state", axis = 1)

In [37]:
df.to_csv("DonationsByRaceState.csv", sep = ',', index = False)

In [38]:
df.head()

Unnamed: 0,cand_id,transaction_dt,transaction_amt
0,NY,2014,1000
1,NY,2014,1000
2,NY,2014,1000
3,NY,2014,1000
4,NY,2014,1000


In [39]:
df = df.drop("transaction_dt", axis = 1)

In [40]:
dfgroup = df.groupby(['cand_id']).transaction_amt.sum()
dfgroup.head()

cand_id
AK      733248
AL     4935335
AR    17697844
AZ    20393553
CA    77747745
Name: transaction_amt, dtype: int64

In [41]:
dfgroup.to_csv("DonationsByRaceState.csv", sep = ',', header = False)

In [42]:
dfgroup = pd.read_csv("DonationsByRaceState.csv", header = None)
dfgroup.head()

Unnamed: 0,0,1
0,AK,733248
1,AL,4935335
2,AR,17697844
3,AZ,20393553
4,CA,77747745


In [43]:
dfgroup.columns = ['state', 'donations']

In [44]:
dfstates = pd.read_csv('HouseRepState.csv')

In [45]:
dfstates.columns = ['state name', 'numberofseats', 'state']

In [46]:
df_race_state = dfgroup.merge(dfstates,on='state')
df_race_state.head()

Unnamed: 0,state,donations,state name,numberofseats
0,AK,733248,Alaska,1
1,AL,4935335,Alabama,7
2,AR,17697844,Arkansas,4
3,AZ,20393553,Arizona,9
4,CA,77747745,California,53


In [47]:
df_race_state['normalized'] = df_race_state['donations']/df_race_state['numberofseats']

In [48]:
df_race_state.head()

Unnamed: 0,state,donations,state name,numberofseats,normalized
0,AK,733248,Alaska,1,733248.0
1,AL,4935335,Alabama,7,705047.9
2,AR,17697844,Arkansas,4,4424461.0
3,AZ,20393553,Arizona,9,2265950.0
4,CA,77747745,California,53,1466939.0


In [49]:
df_race_state = df_race_state.drop("numberofseats", axis = 1)
df_race_state = df_race_state.drop("state name", axis = 1)
df_race_state = df_race_state.drop("donations", axis = 1)

In [50]:
df_race_state.to_csv("DonationsByRaceStateFinal.csv", sep = ',', index = False)

In [51]:
init_notebook_mode(connected=True)

In [52]:
df_state = pd.read_csv("DonationsByRaceStateFinal.csv")
df_state.head()

Unnamed: 0,state,normalized
0,AK,733248.0
1,AL,705047.9
2,AR,4424461.0
3,AZ,2265950.0
4,CA,1466939.0


In [53]:
for col in df_state.columns:
    df_state[col] = df_state[col].astype(str)

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]


df_state['text'] = df_state['state'] + '<br>' +\
    'Amount Donated '+df_state['normalized']

    
    
data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = df_state['state'],
        z = df_state['normalized'].astype(float),
        locationmode = 'USA-states',
        text = df_state['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Dollars Donated")
        ) ]

layout = dict(
        title = 'Average Amount Spent In Each State in 2014 Congressional Representatives Elections' + '<br>' + '(Hover for breakdown)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
iplot(fig)

## Generating the Chloropleths



Code is adapted from here: https://plot.ly/pandas/choropleth-maps/