In [1]:
import numpy as np
import pandas as pd

Original source: http://classic.fec.gov/disclosurep/PDownload.do

In [2]:
chunks = pd.read_csv('P00000001-ALL.csv', index_col=False, iterator=True, chunksize=1000)

In [3]:
df = pd.concat(chunks, ignore_index=True)

In [4]:
# keep only the 50 states and DC
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
domestic_df = df[df.contbr_st.map(lambda x: x in states)]

In [5]:
df = domestic_df[['cand_nm','contbr_st','contbr_occupation','contb_receipt_amt','contbr_zip']]
df = df[df.contb_receipt_amt > 0.0]

In [6]:
# add political party field
political_party = {'Rubio, Marco':'republican','Santorum, Richard J.':'republican',
                 'Perry, James R. (Rick)':'republican','Carson, Benjamin S.':'republican',
                 "Cruz, Rafael Edward 'Ted'":'republican','Paul, Rand':'republican',
                 'Fiorina, Carly':'republican','Huckabee, Mike':'republican',
                 'Graham, Lindsey O.':'republican','Bush, Jeb':'republican','Trump, Donald J.':'republican',
                 'Jindal, Bobby':'republican','Christie, Christopher J.':'republican',
                 'Walker, Scott':'republican','Kasich, John R.':'republican',
                 'Gilmore, James S III':'republican','Clinton, Hillary Rodham':'democrat',
                 'Sanders, Bernard':'democrat','Lessig, Lawrence':'democrat',
                 "O'Malley, Martin Joseph":'democrat','Pataki, George E.':'democrat',
                 'Webb, James Henry Jr.':'democrat','Johnson, Gary':'third_party',
                 'Stein, Jill':'third_party','McMullin, Evan':'third_party' }

In [7]:
df['party'] = df.cand_nm.map(political_party)

In [8]:
# clean up zip codes
df['contbr_zip2'] = pd.to_numeric(df['contbr_zip'], errors='coerse')
df = df.dropna()
df = df.reset_index(drop=True)

In [9]:
new_zips = []
for i in range(len(df[0:])):
    old_zip = int(df['contbr_zip'][i])
    if old_zip > 99999:
        zipcode = old_zip / 10000
        zipcode = str(zipcode)
        zipcode = zipcode[:5]
    else:
        zipcode = old_zip
        zipcode = str(zipcode)
    new_zips.append(zipcode)

In [10]:
df['fiveD_zipcodes'] = new_zips

In [11]:
df.to_csv('fec2016.csv', index=False)