# US Representative Voting Patterns and Funding Sources

Part 1: Pulling FEC funding data through API

In [1]:
import requests as rq # to pull data from FEC API
import pandas as pd # for data exploration
from keys import api_key_fec # holds secure API KEY
import sqlite3 as sq # to pull data from sqlite database
from fuzzywuzzy import fuzz # to fuzzy match names from candidate api to voteview member list
import time # for improved request pulls
from IPython.display import clear_output #for improved request pulls

***

## General Functions in All Data Pulls/Cleans

1. pull_list_of_dicts
   - reads through api request pulls that come in as [[{}]] into a dataframe
2. pull_sub_dict
   - modification of first function to pull out sub-dictionary, in the form of [[{{}}]] into a dataframe
3. page_count
   - reads the first page of an api json and relays the number of pages in the request

In [134]:
def pull_list_of_dicts(dataset):
    dictionary = {}
    for list in dataset:
        for sub_list in list:
            for key, value in sub_list.items():
                # if key is already in dictionary, append value
                if key in dictionary:
                    dictionary[key].append(value)
                # if key is not in dictionary, create new key with the first value 
                else:
                    dictionary[key] = [value] 
    return pd.DataFrame(dictionary)

In [3]:
# set up with params to pull the committee ids attached to each candidate, other keys may be specified if there is a need to iterate through other non-flat files
def pull_sub_dict(dataframe, filter_key='results', include_key='principal_committees'):
    dictionary = {}    
    # first loop cycles through each candidate's api request pull 
    for cand_dicts in dataframe:
        # second loop cycles specifically to one dictionary for each candidate's page, auto-setting is to the 'results' section
        for cand_results in cand_dicts[filter_key]:
            # third loop cycles through the keys and values of a dictionary
            for result_key, result_value in cand_results.items():        
                # specifies which key and values need to be gathered for dataframe, auto-setting is to the 'principal_committees key'
                if result_key == include_key:
                    # fourth loop cycles through the principal_committee lists for each candidate
                    for committee_list in result_value:
                        # cycle through keys, values in each list into a new dictionary and appends it into one
                        for key, value in committee_list.items():
                            # if key is already in dictionary, append value
                            if key in dictionary:
                                dictionary[key].append(value)
                            # if key is not in dictionary, create new key with the first value 
                            else:
                                dictionary[key] = [value] 
    return pd.DataFrame(dictionary)

In [4]:
def page_count(json, column='pagination', key='pages'):
    page_len = 0
    for keys, values in json[column].items():
        if keys == key:
            page_len += values
    return page_len

#### Reference Tables/Lists Needed
*data is created in top sections of this notebook*

1. candidate_ids
2. committee_ids

In [5]:
# for all functions, use lists of candidate ids or committee ids here: 
congress = pd.read_csv('./datasets/congress.csv')
candidate_ids = congress['candidate_id'].values.tolist()

committee = pd.read_csv('./datasets/committees.csv')
committee_ids = committee['committee_id'].values.tolist()

_______________________________________________________________________________________________


### Creating a reference table of registered committees - Independent Committees, Super PACs, etc

**Conditions for filters**
1. Cycle - 2012
2. Designation - All
3. Organization Type - All
4. Committee Type - All except:
   - Delegate
   - Presidential
   - Senate
   - National Party Non-Federal Account

**Steps**
1. Set up blank list and for loop for API pagination 
2. Get request from API with above filters added to url
3. Extend all pages pulled into one list and convert to DataFrame



In [61]:
# pull first page of api request to confirm results are as desired and find the number of pages in request
committees_rq = rq.get(f'https://api.open.fec.gov/v1/committees/?page=1&per_page=100&cycle=2012&designation=A&designation=J&designation=P&designation=U&designation=B&designation=D&organization_type=C&organization_type=L&organization_type=M&organization_type=T&organization_type=V&organization_type=W&committee_type=C&committee_type=E&committee_type=H&committee_type=I&committee_type=N&committee_type=O&committee_type=Q&committee_type=U&committee_type=V&committee_type=W&committee_type=X&committee_type=Y&sort=name&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')

print(committees_rq.status_code)
committees = committees_rq.json()
committees['pagination']

200


{'count': 3435, 'page': 1, 'pages': 35, 'per_page': 100}

In [62]:
def committee_dataframe(pages):
    committees_all_pages = []
    count = 0
    
    for i in range(1, pages+1):
        committees_rq = rq.get(f'https://api.open.fec.gov/v1/committees/?page={i}&per_page=100&cycle=2012&designation=A&designation=J&designation=P&designation=U&designation=B&designation=D&organization_type=C&organization_type=L&organization_type=M&organization_type=T&organization_type=V&organization_type=W&committee_type=C&committee_type=E&committee_type=H&committee_type=I&committee_type=N&committee_type=O&committee_type=Q&committee_type=U&committee_type=V&committee_type=W&committee_type=X&committee_type=Y&sort=name&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')

        # output confirmation during pull request
        clear_output(wait=True)
        count += 1
        print(f'On page {count} out of {pages}!')
        committees_rq.raise_for_status()

        # push data into open list
        committees = committees_rq.json()
        committees_all_pages.append(committees)

        time.sleep(.5)

    #merge all results into one DF
    blank_list = []
    
    for i in range(0, pages):
        blank_list.extend(committees_all_pages[i]['results'] )  

    return pd.DataFrame(blank_list)

In [63]:
committees_df = committee_dataframe(35)

On page 35 out of 35!


In [64]:
committees_df.head(2)

Unnamed: 0,affiliated_committee_name,candidate_ids,committee_id,committee_type,committee_type_full,cycles,designation,designation_full,filing_frequency,first_f1_date,...,last_file_date,name,organization_type,organization_type_full,party,party_full,sponsor_candidate_ids,sponsor_candidate_list,state,treasurer_name
0,1199 SEIU UNITED HEALTHCARE WORKERS EAST,[],C00348540,Q,PAC - Qualified,"[2000, 2002, 2004, 2006, 2008, 2010, 2012, 201...",U,Unauthorized,Q,1999-09-01,...,2023-09-06,1199 SEIU UNITED HEALTHCARE WORKERS EAST FEDER...,L,Labor Organization,,,,[],NY,"SCHAUB, HELEN"
1,1199 SEIU UNITED HEALTHCARE WORKERS EAST,[],C00344531,Q,PAC - Qualified,"[2000, 2002, 2004, 2006, 2008, 2010, 2012, 201...",U,Unauthorized,Q,1999-04-06,...,2023-07-31,1199 SEIU UNITED HEALTHCARE WORKERS EAST HOME ...,L,Labor Organization,,,,[],NY,"SCHAUB, HELEN"


In [74]:
committees_df.shape

(3435, 7)

#### Committee Referential Table Clean-Up

1. Dropping unnecessary columns (blank/mostly null or not relevant to analysis)
2. Renaming remaining columns
3. Checking for any nulls, exploring basic description of data before export
4. Saving as csv
5. Create list of committee ids for funding API requests

In [65]:
# dropping columns: 

committees_df.drop(columns=['candidate_ids','first_f1_date','first_file_date','last_f1_date','last_file_date','sponsor_candidate_ids','sponsor_candidate_list','filing_frequency','party','party_full','committee_type','organization_type','designation','cycles','treasurer_name'],inplace=True)

In [66]:
committees_df.columns

Index(['affiliated_committee_name', 'committee_id', 'committee_type_full',
       'designation_full', 'name', 'organization_type_full', 'state'],
      dtype='object')

In [67]:
# renaming column names 
committees_df.columns = ['affiliated_committee_name', 'committee_id', 'committee_type', 'designation_type', 'name', 'organization_type', 'state']

In [68]:
committees_df.head(1)

Unnamed: 0,affiliated_committee_name,committee_id,committee_type,designation_type,name,organization_type,state
0,1199 SEIU UNITED HEALTHCARE WORKERS EAST,C00348540,PAC - Qualified,Unauthorized,1199 SEIU UNITED HEALTHCARE WORKERS EAST FEDER...,Labor Organization,NY


In [69]:
committees_df.shape

(3435, 7)

In [70]:
committees_df.to_csv('./datasets/committees.csv',sep=',',index=False)

***

### Match Names from Members and Candidates

**Note that this method assumes that both datasets have matching districts and states. After the initial runthrough 9/433 members in the voteview 'c113m' dataframe had incorrect districts and were manually revised**

*This uses the candidate master flat file instead of api as it includes additional total raised information not as easily accessible through request version. Additional details from the API are merged afterwards*

1. Link members of the 113th congress to their candidate information from the FEC
   - use district and state as exact matches and names as fuzzy matches (to filter out unelected candidates in same races as members)
2. Add congress id to candidate table as a foreign key
3. Filter candidate table down to only member-matching rows

In [44]:
cn = pd.read_csv('./datasets/cn2.txt', sep='|', names=['candidate_id', 'name', 'ici', 'pty_cd', 'party', 'ttl_receipts', 'trans_from_comm', 'ttl_disb', 'trans_to_comm', 'coh_bop', 'coh_cop', 'cand_contrib', 'cand_loans', 'other_loans', 'cand_loan_repay', 'other_loan_repay', 'debts_owed_by', 'ttl_indiv_contrib', 'state', 'district', 'spec_election', 'prim_election', 'run_election', 'gen_election', 'gen_election_precent', 'other_pol_cmte_contrib', 'pol_pty_contrib', 'cvg_end_dt', 'indiv_refunds', 'cmte_refunds'])
members = pd.read_csv('./datasets/c113m.csv')

In [45]:
#change null values to 0, then convert data type to integer 
cn['district'] = cn['district'].fillna(0)
cn['district'] = cn['district'].astype(int)

# convert data type to integer
members['district'] = members['district'].astype(int)

In [46]:
# make all letters lowercase in both table names for future fuzzy matching
cn['name'] = cn['name'].str.lower()
members['name'] = members['name'].str.lower()

In [47]:
# creates dataframe that links all candidates to members who match on district and state ** this still includes all challengers who lost!
cn_match = pd.merge(members, cn, how='left', on=['state', 'district'])
cn_match.shape

(2585, 37)

In [48]:
# apply fuzzy matching on new dataframe to compare the name_x column (the candidates) to name_y (the congress members)
# use token_sort_ratio as it will sort and compare each section of a string (to better match first and last names)
cn_match['matching_ratio'] = cn_match.apply(lambda x: fuzz.token_set_ratio(x['name_x'], x['name_y']), axis=1)

In [49]:
# pull the highest matching ratio for each congress member, and then create a new dataframe with results
msk = cn_match.groupby(['congress_id'])['matching_ratio'].transform('max') == cn_match['matching_ratio']
out = cn_match.loc[msk, ['matching_ratio', 'chamber', 'icpsr', 'district', 'state', 'party_x', 'name_x', 'NOMINATE_dim1', 'NOMINATE_dim2', 'candidate_id', 'name_y', 'ici', 'pty_cd', 'party_y', 'ttl_receipts', 'trans_from_comm', 'ttl_disb', 'trans_to_comm', 'coh_bop', 'coh_cop', 'cand_contrib', 'cand_loans', 'other_loans', 'cand_loan_repay', 'other_loan_repay', 'debts_owed_by', 'ttl_indiv_contrib', 'state', 'district', 'spec_election', 'prim_election', 'run_election', 'gen_election', 'gen_election_precent', 'other_pol_cmte_contrib', 'pol_pty_contrib', 'cvg_end_dt', 'indiv_refunds', 'cmte_refunds']]

In [50]:
# manually check results are accurate
# incorrect from first runthrough: YOUNG, Donald Edwin; SANCHEZ, Linda T; CARNEY, John C. Jr.; DAINES, Steve; GRIMM, Michael G.; CRAMER, Kevin; NOEM, Kristi; WELCH, Peter; LUMMIS, Cynthia M. 
# 98% accurate - will manually fix the missing 9 member info
out



Unnamed: 0,matching_ratio,chamber,icpsr,district,state,party_x,name_x,NOMINATE_dim1,NOMINATE_dim2,candidate_id,...,spec_election,prim_election,run_election,gen_election,gen_election_precent,other_pol_cmte_contrib,pol_pty_contrib,cvg_end_dt,indiv_refunds,cmte_refunds
1,83,House,20300,1,AL,200,"bonner, jr., josiah robins (jo)",0.367,0.513,H2AL01077,...,,W,,W,100.0,523034.40,0.00,12/31/2012,1000.0,500.00
9,87,House,20301,3,AL,200,"rogers, mike dennis",0.363,0.455,H2AL03032,...,,W,,W,62.0,588500.00,2700.00,12/31/2012,1300.0,0.00
11,69,House,21102,7,AL,100,"sewell, terri",-0.396,0.398,H0AL07086,...,,W,,W,76.0,695969.43,1000.00,12/31/2012,2500.0,2500.00
17,100,House,21192,2,AL,200,"roby, martha",0.362,0.658,H0AL02087,...,,W,,W,65.0,522182.68,0.00,12/31/2012,4003.8,6884.62
27,100,House,21193,5,AL,200,"brooks, mo",0.652,-0.417,H0AL05163,...,,W,,W,64.0,459948.35,2500.00,12/31/2012,100.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2561,100,House,21190,8,WI,200,"ribble, reid",0.603,-0.333,H0WI08075,...,,W,,W,55.0,1098737.99,4787.56,12/31/2012,2650.0,1000.00
2563,100,House,21370,2,WI,100,"pocan, mark",-0.531,-0.552,H2WI02124,...,,W,,W,68.0,497098.42,2500.00,12/31/2012,1510.0,1000.00
2572,100,House,29769,3,WI,100,"kind, ron",-0.260,-0.080,H6WI03099,...,,W,,W,64.0,1291983.68,94.50,12/31/2012,0.0,4500.00
2578,100,House,29939,1,WI,200,"ryan, paul d.",0.556,-0.250,H8WI01024,...,,W,,W,54.0,1247900.00,5000.00,12/31/2012,7725.0,1000.00


In [51]:
#second runthrough pulled steven daines senatorial race id as a second match - dropping here
out.drop(1430, axis=0, inplace=True)

In [52]:
# clean up reference table so it only includes necessary columns 

congress = out.drop(columns=['matching_ratio', 'chamber', 'name_x', 'pty_cd', 'party_y', 'other_loans', 'other_loan_repay', 'cand_loan_repay', 'debts_owed_by', 'spec_election', 'prim_election', 'run_election', 'gen_election', 'gen_election_precent', 'cvg_end_dt', 'indiv_refunds', 'cmte_refunds'])

In [55]:
congress.columns

Index(['icpsr', 'district', 'state', 'party_x', 'NOMINATE_dim1',
       'NOMINATE_dim2', 'candidate_id', 'name_y', 'ici', 'ttl_receipts',
       'trans_from_comm', 'ttl_disb', 'trans_to_comm', 'coh_bop', 'coh_cop',
       'cand_contrib', 'cand_loans', 'ttl_indiv_contrib', 'state', 'district',
       'other_pol_cmte_contrib', 'pol_pty_contrib'],
      dtype='object')

In [56]:
congress.columns = ['icpsr', 'district', 'state', 'party', 'NOMINATE_dim1', 'NOMINATE_dim2', 'candidate_id', 'name', 'ici', 'ttl_receipts', 'comm_receipts', 'ttl_disb', 'comm_disb', 'start_cash', 'end_cash', 'cand_contrib', 'cand_loans', 'ind_contrib', 'district', 'state', 'other_comm_contrib', 'pty_contrib']

In [57]:
congress.to_csv('./datasets/congress.csv',sep=',',index=False)

### Creating a reference table of registered candidates by connected committee

**Filters for Candidate Search API**
1. Cycle - 2012
2. Election Year - 2012
3. Office - H
4. Year - 2012
5. Party - DEM, REP (elected officials only from these two parties)

Principal Committee ID and information will be merged with member table

In [6]:
# to get a dataframe of all committees principally connected to candidates 

def candidates_dataframe(candidate_id_list, candidate_num):
    candidates_all_pages = []
    count = 0

    # cycles through each provided candidate id in list and requests the candidate API information
    for ids in candidate_id_list:
        candidates_rq = rq.get(f'https://api.open.fec.gov/v1/candidates/search/?page=1&per_page=100&candidate_id={ids}&cycle=2012&sort=name&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')

        # uncomment to debug ---> did each id request go through?
        # print(candidates_rq.status_code)
        
        # output confirmation during pull request
        clear_output(wait=True)
        count += 1
        print(f'Pulling candidate {ids}, on {count} out of {candidate_num}!')
        candidates_rq.raise_for_status()

        # push data into open list
        candidates = candidates_rq.json()

        # uncomment to debug  ---> did each id request have the correct keys?
        # print(candidates.keys())
        
        candidates_all_pages.append(candidates)

        time.sleep(.5)

    return pull_sub_dict(candidates_all_pages)

In [7]:
# test pull
test_list = ['H0OH08029', 'H8CA05035']

test_df = candidates_dataframe(test_list, 2)
test_df

Pulling candidate H8CA05035, on 2 out of 2!


Unnamed: 0,affiliated_committee_name,candidate_ids,committee_id,committee_type,committee_type_full,cycles,designation,designation_full,filing_frequency,first_f1_date,first_file_date,last_f1_date,last_file_date,name,organization_type,organization_type_full,party,party_full,state,treasurer_name
0,FREEDOM PROJECT; THE,[H0OH08029],C00237198,Q,PAC - Qualified,"[1990, 1992, 1994, 1996, 1998, 2000, 2002, 200...",U,Unauthorized,T,1989-06-26,1989-06-26,2016-12-09,2017-02-07,FRIENDS OF JOHN BOEHNER,,,REP,REPUBLICAN PARTY,VA,"LISKER, LISA"
1,NANCY PELOSI VICTORY FUND,[H8CA05035],C00213512,H,House,"[1986, 1988, 1990, 1992, 1994, 1996, 1998, 200...",P,Principal campaign committee,Q,1987-02-18,1987-02-18,2023-04-28,2023-08-15,NANCY PELOSI FOR CONGRESS,,,DEM,DEMOCRATIC PARTY,DC,"SWIG, STEVEN"


In [102]:
candidates_df = candidates_dataframe(candidate_ids, 433)

Pulling candidate H8WY00148, on 433 out of 433!


In [103]:
# returns a string type of candidate id 

def one_string_in_list(df, column):
    for index, row in enumerate(df[column]):
        df[column][index] = row[0]
    return df

In [104]:
one_string_in_list(candidates_df, 'candidate_ids')

Unnamed: 0,affiliated_committee_name,candidate_ids,committee_id,committee_type,committee_type_full,cycles,designation,designation_full,filing_frequency,first_f1_date,first_file_date,last_f1_date,last_file_date,name,organization_type,organization_type_full,party,party_full,state,treasurer_name
0,GULF COAST VICTORY FUND,H2AL01077,C00375220,H,House,"[2002, 2004, 2006, 2008, 2010, 2012, 2014]",P,Principal campaign committee,T,2002-03-25,2002-03-25,2012-12-28,2014-08-01,JO BONNER FOR CONGRESS COMMITTEE,,,REP,REPUBLICAN PARTY,AL,"LILES, MARY BURKE"
1,NONE,H2AL03032,C00367862,H,House,"[2002, 2004, 2006, 2008, 2010, 2012, 2014, 201...",P,Principal campaign committee,Q,2001-07-20,2001-07-20,2023-07-13,2023-07-14,MIKE ROGERS FOR CONGRESS,,,REP,REPUBLICAN PARTY,AL,"JACKSON, WILLIAM"
2,NONE,H0AL07086,C00458976,H,House,"[2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024]",P,Principal campaign committee,Q,2009-02-10,2009-02-10,2018-10-29,2023-07-14,TERRI SEWELL FOR CONGRESS,,,DEM,DEMOCRATIC PARTY,AL,"ANGERHOLZER, LINDSAY F."
3,TEAM ROBY VICTORY FUND,H0AL02087,C00462143,H,House,"[2010, 2012, 2014, 2016, 2018, 2020, 2022]",P,Principal campaign committee,T,2009-05-20,2009-05-20,2018-12-14,2021-05-16,MARTHA ROBY FOR CONGRESS,,,REP,REPUBLICAN PARTY,AL,"SLAWSON, LEAH"
4,NONE,H0AL05163,C00464149,S,Senate,"[2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024]",P,Principal campaign committee,T,2009-07-09,2009-07-09,2021-03-21,2023-02-09,MO BROOKS FOR SENATE,,,REP,REPUBLICAN PARTY,AL,"BROOKS, MARTHA"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537,RIBBLE RESPONSIBLE GOVERNMENT COMMITTEE,H0WI08075,C00463620,H,House,"[2010, 2012, 2014, 2016, 2018]",P,Principal campaign committee,T,2009-07-07,2009-07-07,2014-02-04,2017-02-24,RIBBLE FOR CONGRESS,,,REP,REPUBLICAN PARTY,WI,"MURPHY, TODD M MR."
538,MINNESOTA -WISCONSIN VICTORY FUND,H2WI02124,C00502179,H,House,"[2012, 2014, 2016, 2018, 2020, 2022, 2024]",P,Principal campaign committee,Q,2011-09-15,2011-09-15,2020-07-23,2023-07-15,MARK POCAN FOR CONGRESS,,,DEM,DEMOCRATIC PARTY,WI,"POPE, SONDY"
539,BADGER VICTORY FUND 2020,H6WI03099,C00312017,H,House,"[1996, 1998, 2000, 2002, 2004, 2006, 2008, 201...",P,Principal campaign committee,Q,1996-01-30,1996-01-30,2021-06-30,2023-07-15,KIND FOR CONGRESS COMMITTEE,,,DEM,DEMOCRATIC PARTY,WI,"SMITH, BRENT"
540,PROSPERITY ACTION INC,H8WI01024,C00330894,Q,PAC - Qualified,"[1998, 2000, 2002, 2004, 2006, 2008, 2010, 201...",U,Unauthorized,Q,1997-10-28,1997-10-28,2019-10-01,2023-07-06,"RYAN FOR CONGRESS, INC.",,,REP,REPUBLICAN PARTY,WI,"MAIR, PAUL"


In [106]:
candidates_df

Unnamed: 0,affiliated_committee_name,candidate_ids,committee_id,committee_type,committee_type_full,cycles,designation,designation_full,filing_frequency,first_f1_date,first_file_date,last_f1_date,last_file_date,name,organization_type,organization_type_full,party,party_full,state,treasurer_name
0,GULF COAST VICTORY FUND,H2AL01077,C00375220,H,House,"[2002, 2004, 2006, 2008, 2010, 2012, 2014]",P,Principal campaign committee,T,2002-03-25,2002-03-25,2012-12-28,2014-08-01,JO BONNER FOR CONGRESS COMMITTEE,,,REP,REPUBLICAN PARTY,AL,"LILES, MARY BURKE"
1,NONE,H2AL03032,C00367862,H,House,"[2002, 2004, 2006, 2008, 2010, 2012, 2014, 201...",P,Principal campaign committee,Q,2001-07-20,2001-07-20,2023-07-13,2023-07-14,MIKE ROGERS FOR CONGRESS,,,REP,REPUBLICAN PARTY,AL,"JACKSON, WILLIAM"
2,NONE,H0AL07086,C00458976,H,House,"[2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024]",P,Principal campaign committee,Q,2009-02-10,2009-02-10,2018-10-29,2023-07-14,TERRI SEWELL FOR CONGRESS,,,DEM,DEMOCRATIC PARTY,AL,"ANGERHOLZER, LINDSAY F."
3,TEAM ROBY VICTORY FUND,H0AL02087,C00462143,H,House,"[2010, 2012, 2014, 2016, 2018, 2020, 2022]",P,Principal campaign committee,T,2009-05-20,2009-05-20,2018-12-14,2021-05-16,MARTHA ROBY FOR CONGRESS,,,REP,REPUBLICAN PARTY,AL,"SLAWSON, LEAH"
4,NONE,H0AL05163,C00464149,S,Senate,"[2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024]",P,Principal campaign committee,T,2009-07-09,2009-07-09,2021-03-21,2023-02-09,MO BROOKS FOR SENATE,,,REP,REPUBLICAN PARTY,AL,"BROOKS, MARTHA"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537,RIBBLE RESPONSIBLE GOVERNMENT COMMITTEE,H0WI08075,C00463620,H,House,"[2010, 2012, 2014, 2016, 2018]",P,Principal campaign committee,T,2009-07-07,2009-07-07,2014-02-04,2017-02-24,RIBBLE FOR CONGRESS,,,REP,REPUBLICAN PARTY,WI,"MURPHY, TODD M MR."
538,MINNESOTA -WISCONSIN VICTORY FUND,H2WI02124,C00502179,H,House,"[2012, 2014, 2016, 2018, 2020, 2022, 2024]",P,Principal campaign committee,Q,2011-09-15,2011-09-15,2020-07-23,2023-07-15,MARK POCAN FOR CONGRESS,,,DEM,DEMOCRATIC PARTY,WI,"POPE, SONDY"
539,BADGER VICTORY FUND 2020,H6WI03099,C00312017,H,House,"[1996, 1998, 2000, 2002, 2004, 2006, 2008, 201...",P,Principal campaign committee,Q,1996-01-30,1996-01-30,2021-06-30,2023-07-15,KIND FOR CONGRESS COMMITTEE,,,DEM,DEMOCRATIC PARTY,WI,"SMITH, BRENT"
540,PROSPERITY ACTION INC,H8WI01024,C00330894,Q,PAC - Qualified,"[1998, 2000, 2002, 2004, 2006, 2008, 2010, 201...",U,Unauthorized,Q,1997-10-28,1997-10-28,2019-10-01,2023-07-06,"RYAN FOR CONGRESS, INC.",,,REP,REPUBLICAN PARTY,WI,"MAIR, PAUL"


In [113]:
candidates_df['committee_type'].value_counts()

H    465
Q     26
N     22
Name: committee_type, dtype: int64

In [111]:
committees_to_keep = ['H', 'Q', 'N']

In [112]:
candidates_df = candidates_df.apply(lambda row: row[candidates_df['committee_type'].isin(committees_to_keep)])

In [114]:
candidates_df.drop(columns=['committee_type_full', 'cycles', 'designation_full', 'filing_frequency', 'first_f1_date', 'first_file_date', 'last_f1_date', 'last_file_date', 'organization_type_full', 'party', 'party_full', 'state', 'treasurer_name'],inplace=True)

In [115]:
candidates_df.columns = ['affiliated_org', 'candidate_id', 'committee_id', 'committee_type', 'designation', 'name', 'org_type']

In [116]:
candidates_df.columns

Index(['affiliated_org', 'candidate_id', 'committee_id', 'committee_type',
       'designation', 'name', 'org_type'],
      dtype='object')

In [117]:
candidates_df.shape

(513, 7)

In [118]:
candidates_df

Unnamed: 0,affiliated_org,candidate_id,committee_id,committee_type,designation,name,org_type
0,GULF COAST VICTORY FUND,H2AL01077,C00375220,H,P,JO BONNER FOR CONGRESS COMMITTEE,
1,NONE,H2AL03032,C00367862,H,P,MIKE ROGERS FOR CONGRESS,
2,NONE,H0AL07086,C00458976,H,P,TERRI SEWELL FOR CONGRESS,
3,TEAM ROBY VICTORY FUND,H0AL02087,C00462143,H,P,MARTHA ROBY FOR CONGRESS,
5,,H2AL06035,C00260547,H,P,BACHUS FOR CONGRESS COMMITTEE,
...,...,...,...,...,...,...,...
536,DUFFY VICTORY FUND,H0WI07051,C00464339,H,P,DUFFY FOR WISCONSIN,
537,RIBBLE RESPONSIBLE GOVERNMENT COMMITTEE,H0WI08075,C00463620,H,P,RIBBLE FOR CONGRESS,
538,MINNESOTA -WISCONSIN VICTORY FUND,H2WI02124,C00502179,H,P,MARK POCAN FOR CONGRESS,
539,BADGER VICTORY FUND 2020,H6WI03099,C00312017,H,P,KIND FOR CONGRESS COMMITTEE,


In [119]:
candidates_df.to_csv('./datasets/candidate_committees.csv',sep=',',index=False)

____________________________________________________________________________________________________________________


### API Funding Requests

**Conditions for Schedule A Filters**
1. Contributor Type - Committee
2. Two Year Transaction Period - 2012 (end of election cycle)
3. Recipient Committee Type - House, Senate

**Conditions for Schedule B Filters**
1. Cycle - 2012

**Conditions for Schedule E Filters**
1. Cycle - 2012
2. Election Full - True

#### Schedule A - Candidate Receipts

**Steps**
1. Pull list of candidate committee ids
   - Every candidate has a distinct committee connected to their election campaign which is necessary to pull donation receipts

In [None]:
# function to pull variables for disbursement date and index needed for main disbursement api pull 

def pagination_pull():
    first_request = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/?committee_id=C00053553&spender_committee_designation=A&spender_committee_designation=J&spender_committee_designation=P&spender_committee_designation=B&spender_committee_designation=D&spender_committee_org_type=C&spender_committee_org_type=L&spender_committee_org_type=M&spender_committee_org_type=T&spender_committee_org_type=V&spender_committee_org_type=W&spender_committee_type=C&spender_committee_type=D&spender_committee_type=E&spender_committee_type=H&spender_committee_type=I&spender_committee_type=N&spender_committee_type=O&spender_committee_type=P&spender_committee_type=Q&spender_committee_type=S&spender_committee_type=U&spender_committee_type=V&spender_committee_type=W&two_year_transaction_period=2012&per_page=100&sort=-disbursement_date&sort_hide_null=false&sort_null_only=false&api_key={api_key_fec}')

print(nra_rq.status_code)
nra_json = nra_rq.json()

nra_page_test = nra_json['pagination']
nra_index_test = nra_page_test['last_indexes']
nra_index_test['last_index']
nra_index_test['last_disbursement_date']

In [None]:
# loop through all candidate ids to get a master list of all receipts by congress person

def receipts_master_pull(cand_id_list):  # add list of FEC candidate ids
    receipts_all_pages = []
    count = 0
    
    # cycle through the first page of each rq to find the length of each expenditure list by candidate
    for ids in cand_id_list:
        receipts_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_e/by_candidate/?page=1&per_page=100&cycle=2012&election_full=true&candidate_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')
        pgone_rq = expenditures_rq.json()
        time.sleep(.5)
        
        # uncomment to debug 
        # print(pgone_rq.keys())   
        
        try:
            qty_pages = page_count(pgone_rq)
            count += 1

        except KeyError:
            print(f'API Request for candidate {ids} could not be found. Here\'s what was pulled:')
            print(pgone_rq)

        except JSONDecodeError:
            print(expenditures_rq.status_code)
            print(pgone_rq.keys())
            
        # with length of rq pull, loop through each page for every candidate and append to a blank list
        for i in range(1, qty_pages+1):
            all_expenditures_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_e/by_candidate/?page={i}&per_page=100&cycle=2012&election_full=true&candidate_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')
            
            # upgraded FEC api key can pull a max of 120 calls a minute
            time.sleep(.5)
            
            try:
                all_expenditures_json = all_expenditures_rq.json()
                results = all_expenditures_json['results']
                expenditures_all_pages.append(results)

            except JSONDecodeError:
                print(all_expenditures_rq.status_code)
            
        # check status during pull
        clear_output(wait=True)
        print(f'Pulling expenditures from {ids}, on number {count} out of 433!')
        expenditures_rq.raise_for_status()
        
    
    return expenditures_all_pages

In [None]:
boehner_json

In [None]:
boehner_list = boehner_json['results']
boehner_list

In [None]:
boehner_dict1 = boehner_list[0]

In [None]:
boehner_dict1

In [None]:
# create while loop for pagination capture

# variables set by first page request, count set to track number of pages requested
last_contribution_receipt_date = '2012-09-21'
last_index = '4102220121167751830'
count = 1

while True:
    try:
        # api request that inserts api key variable, as well as the last index and disbursement date from the first page request
        boehner_rq_loop = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_a/?contributor_id=C00053553&contributor_type=committee&two_year_transaction_period=2012&recipient_committee_type=H&recipient_committee_type=S&per_page=100&sort=-contribution_receipt_date&sort_hide_null=false&sort_null_only=false&api_key={api_key_fec}&last_disbursement_date={last_contribution_receipt_date}')

        # check status during while loop
        print(f'Requesting page {count}')
        boehner_rq_loop.raise_for_status()

        # convert request into json file
        boehner_json_loop = boehner_rq_loop.json()

        # move through json to pull new last index and disbursement dates
        boehner_page_loop = boehner_json_loop['pagination']
        boehner_index_loop = boehner_page_loop['last_indexes']
        boehner_index = boehner_index_loop['last_index']
        boehner_contribution_date = boehner_index_loop['last_contribution_receipt_date']
       
        # convert loop into a list and append to boehner_list
        boehner_list_loop = list(boehner_json_loop)
        boehner_list.append(boehner_list_loop)
        
        # add to counter
        count += 1
        
    except TypeError:
        print('No more valid pages to loop through')
        break



#### Schedule B - Disbursements by Committee

In [126]:
# loop through all committee ids to get a master list of all committee disbursements

def disbursements_master_pull(committee_id_list):  # add list of FEC candidate ids
    disbursements_all_pages = []
    count = 0
    
    # cycle through the first page of each rq to find the length of each expenditure list by candidate
    for ids in committee_id_list:
        disbursements_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/by_recipient_id/?page=1&per_page=100&cycle=2012&recipient_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')
        time.sleep(.5)
        
        # uncomment to debug
        # print(disbursements_rq)
        # print(pgone_rq.keys())
        try:
            pgone_rq = disbursements_rq.json()
            qty_pages = page_count(pgone_rq)
            count += 1

        except KeyError:
            print(f'API Request for committee {ids} could not be found. Here\'s what was pulled:')
            print(pgone_rq)

        except:
            print('JSON Decoder Error')
            continue
            
        # with length of rq pull, loop through each page for every candidate and append to a blank list
        for i in range(1, qty_pages+1):
            all_disbursements_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/by_recipient_id/?page={i}&per_page=100&cycle=2012&recipient_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')
            
            # upgraded FEC api key can pull a max of 120 calls a minute
            time.sleep(.5)
            
            try:
                all_disbursements_json = all_disbursements_rq.json()
                results = all_disbursements_json['results']
                disbursements_all_pages.append(results)

            except JSONDecodeError:
                print(all_disbursements_rq.status_code)
            
        # check status during pull
        clear_output(wait=True)
        print(f'Pulling disbursements from {ids}, on number {count} out of 3435!')
        disbursements_rq.raise_for_status()
        
    
    return disbursements_all_pages

In [127]:
disbursements = disbursements_master_pull(committee_ids)

Pulling disbursements from C00235036, on number 3430 out of 3435!


In [None]:
# #uncomment to troubleshoot errors in api requests


# problem_id = 'H4GA06087'

# problem_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/by_recipient_id/?page={i}&per_page=100&cycle=2012&recipient_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')

# print(problem_rq.status_code)
# problem_json = problem_rq.json()

# problem_json

In [128]:
disbursements

[[{'committee_id': 'C00004036',
   'committee_name': 'SEIU COPE (SERVICE EMPLOYEES INTERNATIONAL UNION COMMITTEE ON POLITICAL EDUCATION)',
   'count': 1,
   'cycle': 2012,
   'memo_count': 0,
   'memo_total': 0.0,
   'recipient_id': 'C00348540',
   'recipient_name': "1199 SERVICE EMPLOYEES INT'L UNION FEDERAL POLITICAL ACTION FUND",
   'total': 125000.0}],
 [{'committee_id': 'C00348540',
   'committee_name': '1199 SEIU UNITED HEALTHCARE WORKERS EAST FEDERAL POLITICAL ACTION FUND',
   'count': 1,
   'cycle': 2012,
   'memo_count': 0,
   'memo_total': 0.0,
   'recipient_id': 'C00344531',
   'recipient_name': '1199 32BJ/144 SERVICE EMPLOYEES INTERNATIONAL UNION HOME CARE POLITICAL ACTION FUND',
   'total': 3546.0}],
 [{'committee_id': 'C00330720',
   'committee_name': 'TRUST PAC TEAM REPUBLICANS FOR UTILIZING SENSIBLE TACTICS',
   'count': 1,
   'cycle': 2012,
   'memo_count': 0,
   'memo_total': 0.0,
   'recipient_id': 'C00040279',
   'recipient_name': 'ABBOTT LABORATORIES EMPLOYEE POLIT

In [135]:
disbursements_df = pull_list_of_dicts(disbursements)
disbursements_df.head()

Unnamed: 0,committee_id,committee_name,count,cycle,memo_count,memo_total,recipient_id,recipient_name,total
0,C00004036,SEIU COPE (SERVICE EMPLOYEES INTERNATIONAL UNI...,1,2012,0,0.0,C00348540,1199 SERVICE EMPLOYEES INT'L UNION FEDERAL POL...,125000.0
1,C00348540,1199 SEIU UNITED HEALTHCARE WORKERS EAST FEDER...,1,2012,0,0.0,C00344531,1199 32BJ/144 SERVICE EMPLOYEES INTERNATIONAL ...,3546.0
2,C00330720,TRUST PAC TEAM REPUBLICANS FOR UTILIZING SENSI...,1,2012,0,0.0,C00040279,ABBOTT LABORATORIES EMPLOYEE POLITICAL ACTION ...,1000.0
3,C00432401,BEN NELSON 2012,3,2012,0,0.0,C00040279,ABBOTT LABORATORIES EMPLOYEE POLITICAL ACTION ...,7000.0
4,C00467233,FISCAL RESPONSIBILITY PAC,1,2012,0,0.0,C00040279,ABBOTT LABORATORIES EMPLOYEE POLITICAL ACTION ...,500.0


In [136]:
disbursements_df.drop(columns=['cycle', 'memo_count', 'memo_total'], inplace=True)

In [137]:
disbursements_df.shape

(2711, 6)

In [138]:
disbursements_df.to_csv('./datasets/disbursements.csv',sep=',',index=False)

#### Schedule B - Draft Coding Run

In [82]:
# testing schedule b pull request for one committee id --> NRA : C00053553

nra_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/?committee_id=C00053553&spender_committee_designation=A&spender_committee_designation=J&spender_committee_designation=P&spender_committee_designation=B&spender_committee_designation=D&spender_committee_org_type=C&spender_committee_org_type=L&spender_committee_org_type=M&spender_committee_org_type=T&spender_committee_org_type=V&spender_committee_org_type=W&spender_committee_type=C&spender_committee_type=D&spender_committee_type=E&spender_committee_type=H&spender_committee_type=I&spender_committee_type=N&spender_committee_type=O&spender_committee_type=P&spender_committee_type=Q&spender_committee_type=S&spender_committee_type=U&spender_committee_type=V&spender_committee_type=W&two_year_transaction_period=2012&per_page=100&sort=-disbursement_date&sort_hide_null=false&sort_null_only=false&api_key={api_key_fec}')

print(nra_rq.status_code)
nra_json = nra_rq.json()

200


In [None]:
# create while loop for pagination capture

# variables set by first page request, count set to track number of pages requested
last_disbursement_date = '2012-10-31'
last_index = '4040220131185979472'
count = 1

while True:
    try:
        # api request that inserts api key variable, as well as the last index and disbursement date from the first page request
        nra_rq_loop = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/?committee_id=C00053553&spender_committee_designation=A&spender_committee_designation=J&spender_committee_designation=P&spender_committee_designation=B&spender_committee_designation=D&spender_committee_org_type=C&spender_committee_org_type=L&spender_committee_org_type=M&spender_committee_org_type=T&spender_committee_org_type=V&spender_committee_org_type=W&spender_committee_type=C&spender_committee_type=D&spender_committee_type=E&spender_committee_type=H&spender_committee_type=I&spender_committee_type=N&spender_committee_type=O&spender_committee_type=P&spender_committee_type=Q&spender_committee_type=S&spender_committee_type=U&spender_committee_type=V&spender_committee_type=W&two_year_transaction_period=2012&per_page=100&sort=-disbursement_date&sort_hide_null=false&sort_null_only=false&api_key={api_key_fec}&last_index={last_index}&last_disbursement_date={last_disbursement_date}')

        # check status during while loop
        print(f'Requesting page {count}')
        nra_rq_loop.raise_for_status()

        # convert request into json file
        nra_json_loop = nra_rq_loop.json()

        # move through json to pull new last index and disbursement dates
        nra_page_loop = nra_json_loop['pagination']
        nra_index_loop = nra_page_loop['last_indexes']
        last_index = nra_index_loop['last_index']
        last_disbursement_date = nra_index_loop['last_disbursement_date']
       
        # convert loop into a list and append to nra_list
        nra_list_loop = list(nra_json_loop)
        nra_list.append(nra_list_loop)
        
        # add to counter
        count += 1
        
    except TypeError:
        print('No more valid pages to loop through')
        break

In [None]:
nra_page_test = nra_json['pagination']
nra_index_test = nra_page_test['last_indexes']
nra_index_test['last_index']
nra_index_test['last_disbursement_date']

In [83]:
nra_list = nra_json['results']
nra_list

[{'amendment_indicator': 'A',
  'amendment_indicator_desc': 'ADD',
  'back_reference_schedule_id': None,
  'back_reference_transaction_id': None,
  'beneficiary_committee_name': None,
  'candidate_first_name': None,
  'candidate_id': None,
  'candidate_last_name': None,
  'candidate_middle_name': None,
  'candidate_name': None,
  'candidate_office': None,
  'candidate_office_description': None,
  'candidate_office_district': None,
  'candidate_office_state': None,
  'candidate_office_state_full': None,
  'candidate_prefix': None,
  'candidate_suffix': None,
  'category_code': '001',
  'category_code_full': 'Administrative/Salary/Overhead Expenses ',
  'comm_dt': None,
  'committee': {'affiliated_committee_name': 'THE NATIONAL RIFLE ASSOCIATION OF AMERICA',
   'candidate_ids': [],
   'city': 'FAIRFAX',
   'committee_id': 'C00053553',
   'committee_type': 'Q',
   'committee_type_full': 'PAC - Qualified',
   'cycle': 2012,
   'cycles': [1976,
    1978,
    1980,
    1982,
    1984,
    19

In [86]:
# create while loop for pagination capture

# variables set by first page request, count set to track number of pages requested
last_disbursement_date = '2012-10-31'
last_index = '4040220131185979472'
count = 1

while True:
    try:
        # api request that inserts api key variable, as well as the last index and disbursement date from the first page request
        nra_rq_loop = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/?committee_id=C00053553&spender_committee_designation=A&spender_committee_designation=J&spender_committee_designation=P&spender_committee_designation=B&spender_committee_designation=D&spender_committee_org_type=C&spender_committee_org_type=L&spender_committee_org_type=M&spender_committee_org_type=T&spender_committee_org_type=V&spender_committee_org_type=W&spender_committee_type=C&spender_committee_type=D&spender_committee_type=E&spender_committee_type=H&spender_committee_type=I&spender_committee_type=N&spender_committee_type=O&spender_committee_type=P&spender_committee_type=Q&spender_committee_type=S&spender_committee_type=U&spender_committee_type=V&spender_committee_type=W&two_year_transaction_period=2012&per_page=100&sort=-disbursement_date&sort_hide_null=false&sort_null_only=false&api_key={api_key_fec}&last_index={last_index}&last_disbursement_date={last_disbursement_date}')

        # check status during while loop
        print(f'Requesting page {count}')
        nra_rq_loop.raise_for_status()

        # convert request into json file
        nra_json_loop = nra_rq_loop.json()

        # move through json to pull new last index and disbursement dates
        nra_page_loop = nra_json_loop['pagination']
        nra_index_loop = nra_page_loop['last_indexes']
        last_index = nra_index_loop['last_index']
        last_disbursement_date = nra_index_loop['last_disbursement_date']
       
        # convert loop into a list and append to nra_list
        nra_list_loop = list(nra_json_loop)
        nra_list.append(nra_list_loop)
        
        # add to counter
        count += 1
        
    except TypeError:
        print('No more valid pages to loop through')
        break

Requesting page 1
Requesting page 2
Requesting page 3
Requesting page 4
Requesting page 5
Requesting page 6
Requesting page 7
Requesting page 8
Requesting page 9
Requesting page 10
Requesting page 11
Requesting page 12
Requesting page 13
Requesting page 14
Requesting page 15
Requesting page 16
Requesting page 17
Requesting page 18
Requesting page 19
Requesting page 20
Requesting page 21
Requesting page 22
Requesting page 23
Requesting page 24
Requesting page 25
Requesting page 26
Requesting page 27
Requesting page 28
Requesting page 29
No more valid pages to loop through


In [87]:
nra_list

[{'amendment_indicator': 'A',
  'amendment_indicator_desc': 'ADD',
  'back_reference_schedule_id': None,
  'back_reference_transaction_id': None,
  'beneficiary_committee_name': None,
  'candidate_first_name': None,
  'candidate_id': None,
  'candidate_last_name': None,
  'candidate_middle_name': None,
  'candidate_name': None,
  'candidate_office': None,
  'candidate_office_description': None,
  'candidate_office_district': None,
  'candidate_office_state': None,
  'candidate_office_state_full': None,
  'candidate_prefix': None,
  'candidate_suffix': None,
  'category_code': '001',
  'category_code_full': 'Administrative/Salary/Overhead Expenses ',
  'comm_dt': None,
  'committee': {'affiliated_committee_name': 'THE NATIONAL RIFLE ASSOCIATION OF AMERICA',
   'candidate_ids': [],
   'city': 'FAIRFAX',
   'committee_id': 'C00053553',
   'committee_type': 'Q',
   'committee_type_full': 'PAC - Qualified',
   'cycle': 2012,
   'cycles': [1976,
    1978,
    1980,
    1982,
    1984,
    19

In [None]:
del nra_list[100:]

In [None]:
[type(d) for d in nra_list]

In [None]:
# delete empty list items
nra_no_committee = [{k: v for k, v in d.items() if k != 'committee'} for d in nra_list]

In [None]:
nra_df = pd.DataFrame(nra_no_committee)

In [None]:
nra_df

In [None]:
nra_df.shape

In [None]:
nra_df.info()

In [None]:
#dropping null/unnecessary columns (1st round)
nra_df.drop(columns=['back_reference_schedule_id', 'back_reference_transaction_id', 'candidate_first_name', 'candidate_last_name', 'candidate_middle_name', 'candidate_suffix', 'comm_dt', 'conduit_committee_city', 'conduit_committee_name', 'conduit_committee_state', 'conduit_committee_street1', 'conduit_committee_street2', 'conduit_committee_zip', 'memo_code', 'memo_code_full', 'national_committee_nonfederal_account', 'original_sub_id', 'payee_employer', 'payee_first_name', 'payee_last_name', 'payee_middle_name', 'payee_occupation', 'payee_prefix', 'payee_suffix', 'ref_disp_excess_flg'], inplace=True)

In [None]:
pd.set_option('display.max_columns', None)
nra_df.head(100)

In [None]:
#dropping additional unnecessary columns (2nd round)
nra_df.drop(columns=['amendment_indicator_desc', 'candidate_office_state_full', 'candidate_prefix', 'category_code', 'entity_type_desc', 'fec_election_type_desc', 'fec_election_year', 'file_number', 'filing_form', 'image_number', 'line_number', 'link_id', 'load_date', 'memoed_subtotal', 'pdf_url', 'report_type', 'report_year', 'schedule_type', 'schedule_type_full', 'semi_annual_bundled_refund', 'spender_committee_designation', 'sub_id', 'two_year_transaction_period'], inplace=True)

In [None]:
nra_df.head(100)

In [None]:
#dropping additional unnecessary columns (3rd round)
nra_df.drop(columns=['candidate_office_description', 'disbursement_type', 'disbursement_type_description', 'recipient_committee', 'election_type', 'election_type_full', 'recipient_state', 'recipient_zip'], inplace=True)

In [None]:
nra_df.columns

In [None]:
#dropping additional unnecessary columns (4th round)
nra_df.drop(columns=['beneficiary_committee_name', 'candidate_office_district', 'candidate_office_state', 'line_number_label', 'memo_text', 'recipient_city'], inplace=True)

In [None]:
nra_df.shape

In [None]:
nra_df.columns

In [None]:
nra_df['candidate_name'].value_counts()

#### Schedule E - Expenditures

In [90]:
# loop through all candidate ids to get a master list of all expenditures made on behalf of elected congress members

def expenditures_master_pull(cand_id_list):  # add list of FEC candidate ids
    expenditures_all_pages = []
    count = 0
    
    # cycle through the first page of each rq to find the length of each expenditure list by candidate
    for ids in cand_id_list:
        expenditures_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_e/by_candidate/?page=1&per_page=100&cycle=2012&election_full=true&candidate_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')
        pgone_rq = expenditures_rq.json()
        time.sleep(.5)
        
        # uncomment to debug 
        # print(pgone_rq.keys())   
        
        try:
            qty_pages = page_count(pgone_rq)
            count += 1

        except KeyError:
            print(f'API Request for candidate {ids} could not be found. Here\'s what was pulled:')
            print(pgone_rq)

        except JSONDecodeError:
            print(expenditures_rq.status_code)
            print(pgone_rq.keys())
            
        # with length of rq pull, loop through each page for every candidate and append to a blank list
        for i in range(1, qty_pages+1):
            all_expenditures_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_e/by_candidate/?page={i}&per_page=100&cycle=2012&election_full=true&candidate_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')
            
            # upgraded FEC api key can pull a max of 120 calls a minute
            time.sleep(.5)
            
            try:
                all_expenditures_json = all_expenditures_rq.json()
                results = all_expenditures_json['results']
                expenditures_all_pages.append(results)

            except JSONDecodeError:
                print(all_expenditures_rq.status_code)
            
        # check status during pull
        clear_output(wait=True)
        print(f'Pulling expenditures from {ids}, on number {count} out of 433!')
        expenditures_rq.raise_for_status()
        
    
    return expenditures_all_pages

In [91]:
expenditures = expenditures_master_pull(candidate_ids)

Pulling expenditures from H0CA32101, on number 39 out of 433!


KeyboardInterrupt: 

In [65]:
# #uncomment to troubleshoot errors in api requests


# problem_id = 'H4GA06087'

# problem_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_e/by_candidate/?page=1&per_page=100&cycle=2012&election_full=true&candidate_id={problem_id}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')

# print(problem_rq.status_code)
# problem_json = problem_rq.json()

# problem_json

200


{'api_version': '1.0',
 'pagination': {'count': 1, 'page': 1, 'pages': 1, 'per_page': 100},
 'results': [{'candidate_id': 'H4GA06087',
   'candidate_name': 'PRICE, THOMAS EDMUNDS',
   'committee_id': 'C00343137',
   'committee_name': 'POLITICAL ACTION COMMITTEE OF THE AMERICAN ASSOCIATION OF ORTHOPAEDIC SURGEONS',
   'count': 1,
   'cycle': 2012,
   'support_oppose_indicator': 'S',
   'total': 20000.0}]}

In [139]:
# converting the list of lists of dictionaries into a dataframe 
expend_dict = pull_list_of_dicts(expenditures)
expend_df = pd.DataFrame(expend_dict)
expend_df.head(2)

Unnamed: 0,candidate_id,candidate_name,committee_id,committee_name,count,cycle,support_oppose_indicator,total
0,H2AL01077,"BONNER, JOSIAH ROBIAS",C00502849,CAMPAIGN FOR PRIMARY ACCOUNTABILITY INC,13,2012,O,123679.84
1,H0AL02087,"ROBY, MARTHA",C00000935,DEMOCRATIC CONGRESSIONAL CAMPAIGN COMMITTEE,3,2012,O,11676.41


In [140]:
expend_df.to_csv('./datasets/expenditures.csv',sep=',',index=False)