In [1]:
import pandas as pd
import numpy as np
import json
import csv

Now we'll load in the data with Pandas read_csv. We'll also use the head() command to double check that we're getting what we want.

In [3]:
df = pd.read_csv("house_general_election_2016.csv")
df.head()

Unnamed: 0,fips,geo_name,individual_party,is_incumbent,is_winner,name,rank,reporting_pct,state,vote_pct,votes
0,1,1st District Election,republican,True,True,B. Byrne,1,100.0,Alabama,,
1,2,2nd District Election,republican,True,True,M. Roby,1,100.0,Alabama,54.6,134450.0
2,2,2nd District Election,democrat,False,False,N. Mathis,2,100.0,Alabama,45.4,111640.0
3,3,3rd District Election,republican,True,True,M. Rogers,1,100.0,Alabama,67.1,190724.0
4,3,3rd District Election,democrat,False,False,J. Smith,2,100.0,Alabama,32.9,93567.0


Notice that we have several datapoints all corresponding to one district! We need to change this to allow us to properly bind the data with D3 to our district GeoJSON path data. So we will iterate through the rows of the dataframe, using pd.iterrows(), adding elements from each row into a larger datapoint corresponding to one district.

We will output a JSON file. The JSON will have the following format:

`{"cd114": [{state: _, district:_, results:{D:[{candidate:_,
                                                rank:_,
                                                vote_pct_,
                                                votes:_,
                                                is_winner:_,
                                                is_incumbent:_},...],
                                           R:[...],
                                           I:[...]}}`

In [4]:
df_new = {"cd114": []}

next_row = {"state": None, "district": None, "results": {"D": [], "R": [], "I": []}}
# Keep track of the current district and state
curr_state = next(df.iterrows())[1]["state"]
curr_district = next(df.iterrows())[1]["fips"]
# Loop through iterrows
for row in df.iterrows():
    # Check if next datapoint has different state or district than the current
    # If so, we compile the current data point and update
    if row[1]["state"] != curr_state or row[1]["fips"] != curr_district:
        next_row["state"] = curr_state
        next_row["district"] = curr_district
        df_new["cd114"].append(next_row)
        next_row = {"state": None, "district": None, "results": {"D": [], "R": [], "I": []}}
        curr_state = row[1]["state"]
        curr_district = row[1]["fips"]
            
    curr_party = row[1]["individual_party"]
    vote_pct = "NaN"
    votes = "NaN"
    if not np.isnan(row[1]["vote_pct"]):
        vote_pct = row[1]["vote_pct"]
    if not np.isnan(row[1]["votes"]):
        votes = row[1]["votes"]
    
    # Result specific to a candidate.
    candidate_result = {"candidate": row[1]["name"],
                        "rank": row[1]["rank"],
                        "vote_pct": vote_pct,
                        "votes": votes,
                        "is_winner": row[1]["is_winner"],
                        "is_incumbent": row[1]["is_incumbent"]}
    
    if curr_party == "democrat":
        next_row["results"]["D"].append(candidate_result)
    elif curr_party == "republican":
        next_row["results"]["R"].append(candidate_result)
    else:
        next_row["results"]["I"].append(candidate_result)

next_row["state"] = str(df.get_value(df.shape[0]-1,"state"))
next_row["district"] = int(df.get_value(df.shape[0]-1,"fips"))
df_new["cd114"].append(next_row)      

Now we just need to output our JSON to a file (in this case a .txt file was used).

In [5]:
with open("congressional_results_2016.txt", "w") as f:
    json.dump(df_new, f, ensure_ascii=False)