# US Representative Voting Patterns and Funding Sources

*Exploratory analysis of how funding influences voting habits in Congress*

Part 1: Pulling FEC funding data through API

In [1]:
import requests as rq # to pull data from FEC API
import pandas as pd # for data exploration
from keys import api_key_fec # holds secure API KEY
import sqlite3 as sq # to pull data from sqlite database
from fuzzywuzzy import fuzz # to fuzzy match names from candidate api to voteview member list
import time # for improved request pulls
from IPython.display import clear_output #for improved request pulls

***

## General Functions in All Data Pulls/Cleans

1. pull_list_of_dicts
   - reads through api request pulls that come in as [[{}]] into a dataframe
2. page_count
   - reads the first page of an api json and relays the number of pages in the request

In [2]:
def pull_list_of_dicts(list_of_dicts):
    new_dictionary = {}
    for list in list_of_dicts:
        for list2 in list:
            for key, value in list2.items():
                if key in new_dictionary:
                    new_dictionary[key].append(value)
                else:
                    new_dictionary[key] = [value]
    return pd.DataFrame(new_dictionary)

In [3]:
def page_count(json, column='pagination', key='pages'):
    page_len = 0
    for keys, values in json[column].items():
        if keys == key:
            page_len += values
    return page_len

#### Reference Tables/Lists Needed
*data is created in top sections of this notebook*

1. candidate_ids
2. committee_ids

In [6]:
# for all functions, use lists of candidate ids or committee ids here: 
congress = pd.read_csv('./datasets/congress.csv')
candidate_ids = congress['candidate_id'].values.tolist()

committee = pd.read_csv('./datasets/committees.csv')
committee_ids = committee['committee_id'].values.tolist()

_______________________________________________________________________________________________


### Creating a reference table of registered committees - Independent Committees, Super PACs, etc

**Conditions for filters**
1. Cycle - 2012
2. Designation - All
3. Organization Type - All
4. Committee Type - All except:
   - Delegate
   - Presidential
   - Senate
   - National Party Non-Federal Account

**Steps**
1. Set up blank list and for loop for API pagination 
2. Get request from API with above filters added to url
3. Extend all pages pulled into one list and convert to DataFrame



In [61]:
# pull first page of api request to confirm results are as desired and find the number of pages in request
committees_rq = rq.get(f'https://api.open.fec.gov/v1/committees/?page=1&per_page=100&cycle=2012&designation=A&designation=J&designation=P&designation=U&designation=B&designation=D&organization_type=C&organization_type=L&organization_type=M&organization_type=T&organization_type=V&organization_type=W&committee_type=C&committee_type=E&committee_type=H&committee_type=I&committee_type=N&committee_type=O&committee_type=Q&committee_type=U&committee_type=V&committee_type=W&committee_type=X&committee_type=Y&sort=name&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')

print(committees_rq.status_code)
committees = committees_rq.json()
committees['pagination']

200


{'count': 3435, 'page': 1, 'pages': 35, 'per_page': 100}

In [62]:
def committee_dataframe(pages):
    committees_all_pages = []
    count = 0
    
    for i in range(1, pages+1):
        committees_rq = rq.get(f'https://api.open.fec.gov/v1/committees/?page={i}&per_page=100&cycle=2012&designation=A&designation=J&designation=P&designation=U&designation=B&designation=D&organization_type=C&organization_type=L&organization_type=M&organization_type=T&organization_type=V&organization_type=W&committee_type=C&committee_type=E&committee_type=H&committee_type=I&committee_type=N&committee_type=O&committee_type=Q&committee_type=U&committee_type=V&committee_type=W&committee_type=X&committee_type=Y&sort=name&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')

        # output confirmation during pull request
        clear_output(wait=True)
        count += 1
        print(f'On page {count} out of {pages}!')
        committees_rq.raise_for_status()

        # push data into open list
        committees = committees_rq.json()
        committees_all_pages.append(committees)

        time.sleep(.5)

    #merge all results into one DF
    blank_list = []
    
    for i in range(0, pages):
        blank_list.extend(committees_all_pages[i]['results'] )  

    return pd.DataFrame(blank_list)

In [63]:
committees_df = committee_dataframe(35)

On page 35 out of 35!


In [64]:
committees_df.head(2)

Unnamed: 0,affiliated_committee_name,candidate_ids,committee_id,committee_type,committee_type_full,cycles,designation,designation_full,filing_frequency,first_f1_date,...,last_file_date,name,organization_type,organization_type_full,party,party_full,sponsor_candidate_ids,sponsor_candidate_list,state,treasurer_name
0,1199 SEIU UNITED HEALTHCARE WORKERS EAST,[],C00348540,Q,PAC - Qualified,"[2000, 2002, 2004, 2006, 2008, 2010, 2012, 201...",U,Unauthorized,Q,1999-09-01,...,2023-09-06,1199 SEIU UNITED HEALTHCARE WORKERS EAST FEDER...,L,Labor Organization,,,,[],NY,"SCHAUB, HELEN"
1,1199 SEIU UNITED HEALTHCARE WORKERS EAST,[],C00344531,Q,PAC - Qualified,"[2000, 2002, 2004, 2006, 2008, 2010, 2012, 201...",U,Unauthorized,Q,1999-04-06,...,2023-07-31,1199 SEIU UNITED HEALTHCARE WORKERS EAST HOME ...,L,Labor Organization,,,,[],NY,"SCHAUB, HELEN"


In [74]:
committees_df.shape

(3435, 7)

#### Committee Referential Table Clean-Up

1. Dropping unnecessary columns (blank/mostly null or not relevant to analysis)
2. Renaming remaining columns
3. Checking for any nulls, exploring basic description of data before export
4. Saving as csv
5. Create list of committee ids for funding API requests

In [65]:
# dropping columns: 

committees_df.drop(columns=['candidate_ids','first_f1_date','first_file_date','last_f1_date','last_file_date','sponsor_candidate_ids','sponsor_candidate_list','filing_frequency','party','party_full','committee_type','organization_type','designation','cycles','treasurer_name'],inplace=True)

In [66]:
committees_df.columns

Index(['affiliated_committee_name', 'committee_id', 'committee_type_full',
       'designation_full', 'name', 'organization_type_full', 'state'],
      dtype='object')

In [67]:
# renaming column names 
committees_df.columns = ['affiliated_committee_name', 'committee_id', 'committee_type', 'designation_type', 'name', 'organization_type', 'state']

In [68]:
committees_df.head(1)

Unnamed: 0,affiliated_committee_name,committee_id,committee_type,designation_type,name,organization_type,state
0,1199 SEIU UNITED HEALTHCARE WORKERS EAST,C00348540,PAC - Qualified,Unauthorized,1199 SEIU UNITED HEALTHCARE WORKERS EAST FEDER...,Labor Organization,NY


In [69]:
committees_df.shape

(3435, 7)

In [70]:
committees_df.to_csv('./datasets/committees.csv',sep=',',index=False)

***

### Match Names from Members and Candidates

**Note that this method assumes that both datasets have matching districts and states. After the initial runthrough 9/433 members in the voteview 'c113m' dataframe had incorrect districts and were manually revised**

*This uses the candidate master flat file instead of api as it includes additional total raised information not as easily accessible through request version. Additional details from the API are merged afterwards*

1. Link members of the 113th congress to their candidate information from the FEC
   - use district and state as exact matches and names as fuzzy matches (to filter out unelected candidates in same races as members)
2. Add congress id to candidate table as a foreign key
3. Filter candidate table down to only member-matching rows

In [8]:
cn = pd.read_csv('./datasets/cn2.txt', sep='|', names=['candidate_id', 'name', 'ici', 'pty_cd', 'party', 'ttl_receipts', 'trans_from_comm', 'ttl_disb', 'trans_to_comm', 'coh_bop', 'coh_cop', 'cand_contrib', 'cand_loans', 'other_loans', 'cand_loan_repay', 'other_loan_repay', 'debts_owed_by', 'ttl_indiv_contrib', 'state', 'district', 'spec_election', 'prim_election', 'run_election', 'gen_election', 'gen_election_precent', 'other_pol_cmte_contrib', 'pol_pty_contrib', 'cvg_end_dt', 'indiv_refunds', 'cmte_refunds'])
members = pd.read_csv('./datasets/c113m.csv')

In [11]:
#change null values to 0, then convert data type to integer 
cn['district'] = cn['district'].fillna(0)
cn['district'] = cn['district'].astype(int)

# convert data type to integer
members['district'] = members['district'].astype(int)

In [12]:
# make all letters lowercase in both table names for future fuzzy matching
cn['name'] = cn['name'].str.lower()
members['name'] = members['name'].str.lower()

In [13]:
# creates dataframe that links all candidates to members who match on district and state ** this still includes all challengers who lost!
cn_match = pd.merge(members, cn, how='left', on=['state', 'district'])
cn_match.shape

(2585, 37)

In [14]:
# apply fuzzy matching on new dataframe to compare the name_x column (the candidates) to name_y (the congress members)
# use token_sort_ratio as it will sort and compare each section of a string (to better match first and last names)
cn_match['matching_ratio'] = cn_match.apply(lambda x: fuzz.token_set_ratio(x['name_x'], x['name_y']), axis=1)

In [16]:
# pull the highest matching ratio for each congress member, and then create a new dataframe with results
msk = cn_match.groupby(['congress_id'])['matching_ratio'].transform('max') == cn_match['matching_ratio']
out = cn_match.loc[msk, ['matching_ratio', 'chamber', 'icpsr', 'district', 'state', 'party_x', 'name_x', 'NOMINATE_dim1', 'NOMINATE_dim2', 'candidate_id', 'name_y', 'ici', 'pty_cd', 'party_y', 'ttl_receipts', 'trans_from_comm', 'ttl_disb', 'trans_to_comm', 'coh_bop', 'coh_cop', 'cand_contrib', 'cand_loans', 'other_loans', 'cand_loan_repay', 'other_loan_repay', 'debts_owed_by', 'ttl_indiv_contrib', 'state', 'district', 'spec_election', 'prim_election', 'run_election', 'gen_election', 'gen_election_precent', 'other_pol_cmte_contrib', 'pol_pty_contrib', 'cvg_end_dt', 'indiv_refunds', 'cmte_refunds']]

In [17]:
# manually check results are accurate
# incorrect from first runthrough: YOUNG, Donald Edwin; SANCHEZ, Linda T; CARNEY, John C. Jr.; DAINES, Steve; GRIMM, Michael G.; CRAMER, Kevin; NOEM, Kristi; WELCH, Peter; LUMMIS, Cynthia M. 
# 98% accurate - will manually fix the missing 9 member info
out



Unnamed: 0,matching_ratio,chamber,icpsr,district,state,party_x,name_x,NOMINATE_dim1,NOMINATE_dim2,candidate_id,name_y,ici,pty_cd,party_y,ttl_receipts,trans_from_comm,ttl_disb,trans_to_comm,coh_bop,coh_cop,cand_contrib,cand_loans,other_loans,cand_loan_repay,other_loan_repay,debts_owed_by,ttl_indiv_contrib,state.1,district.1,spec_election,prim_election,run_election,gen_election,gen_election_precent,other_pol_cmte_contrib,pol_pty_contrib,cvg_end_dt,indiv_refunds,cmte_refunds
1,83,House,20300,1,AL,200,"bonner, jr., josiah robins (jo)",0.367,0.513,H2AL01077,"bonner, josiah robias",I,2,REP,1140566.63,42871.65,1263168.28,0.0,279810.87,157209.22,0.0,0.0,0.0,0.0,0.0,0.0,564463.0,AL,1,,W,,W,100.0,523034.4,0.0,12/31/2012,1000.0,500.0
9,87,House,20301,3,AL,200,"rogers, mike dennis",0.363,0.455,H2AL03032,"rogers, michael dennis",I,2,REP,1069891.19,0.0,982102.34,270000.0,204259.44,292048.29,0.0,0.0,0.0,0.0,0.0,0.0,474560.95,AL,3,,W,,W,62.0,588500.0,2700.0,12/31/2012,1300.0,0.0
11,69,House,21102,7,AL,100,"sewell, terri",-0.396,0.398,H0AL07086,"sewell, terrycina andrea",I,1,DEM,1205399.57,950.0,863803.98,125000.0,36018.28,377613.87,0.0,0.0,0.0,0.0,0.0,0.0,504236.47,AL,7,,W,,W,76.0,695969.43,1000.0,12/31/2012,2500.0,2500.0
17,100,House,21192,2,AL,200,"roby, martha",0.362,0.658,H0AL02087,"roby, martha",I,2,REP,1035717.36,32921.59,862502.48,0.0,13281.47,186496.35,0.0,0.0,0.0,0.0,0.0,0.0,480059.23,AL,2,,W,,W,65.0,522182.68,0.0,12/31/2012,4003.8,6884.62
27,100,House,21193,5,AL,200,"brooks, mo",0.652,-0.417,H0AL05163,"brooks, mo",I,2,REP,904753.18,14673.52,455514.82,0.0,50420.23,499658.59,0.0,0.0,0.0,0.0,0.0,0.0,426014.35,AL,5,,W,,W,64.0,459948.35,2500.0,12/31/2012,100.0,0.0
31,100,House,29301,6,AL,200,"bachus, spencer t., iii",0.387,0.228,H2AL06035,"bachus, spencer t iii",I,2,REP,2725003.77,0.0,2904500.84,0.0,439959.36,260462.29,0.0,0.0,0.0,0.0,0.0,15000.0,1128152.4,AL,6,,W,,W,71.0,1485107.9,0.0,12/31/2012,5750.0,12500.0
35,100,House,29701,4,AL,200,"aderholt, robert",0.386,0.561,H6AL04098,"aderholt, robert brown",I,2,REP,1247169.37,0.0,1140896.88,0.0,46609.33,152881.82,0.0,0.0,100000.0,0.0,150000.0,0.0,534734.58,AL,4,,W,,W,73.0,611000.0,0.0,12/31/2012,947.0,6000.0
44,92,House,14066,0,AK,200,"young, donald edwin",0.283,0.022,H6AK00045,"young, donald e",I,2,REP,1003531.63,0.0,665974.39,0.0,170742.22,508299.46,0.0,0.0,0.0,0.0,0.0,0.0,624494.86,AK,0,,W,,W,64.0,374040.13,477.2,12/31/2012,3900.0,1000.0
59,100,House,20304,8,AZ,200,"franks, trent",0.749,0.087,H4AZ04024,"franks, trent",I,2,REP,378998.03,0.0,399105.03,0.0,15657.54,-4450.06,0.0,2000.0,0.0,45000.0,0.0,300586.4,176256.75,AZ,8,,W,,W,62.0,195578.94,0.0,12/31/2012,0.0,0.0
64,97,House,20305,3,AZ,100,"grijalva, raúl m.",-0.598,-0.256,H2AZ07070,"grijalva, raul m",I,1,DEM,908543.77,0.0,930949.81,0.0,30271.77,7865.73,0.0,0.0,0.0,0.0,0.0,25544.7,572022.71,AZ,3,,W,,W,58.0,325477.97,5.38,12/31/2012,5900.0,776.82


In [18]:
#second runthrough pulled steven daines senatorial race id as a second match - dropping here
out.drop(1430, axis=0, inplace=True)

In [19]:
# clean up reference table so it only includes necessary columns 

congress = out.drop(columns=['matching_ratio', 'district', 'state', 'chamber', 'name_x', 'pty_cd', 'party_y', 'other_loans', 'other_loan_repay', 'cand_loan_repay', 'debts_owed_by', 'spec_election', 'prim_election', 'run_election', 'gen_election', 'gen_election_precent', 'cvg_end_dt', 'indiv_refunds', 'cmte_refunds'])

In [20]:
congress.head(2)

Unnamed: 0,icpsr,party_x,NOMINATE_dim1,NOMINATE_dim2,candidate_id,name_y,ici,ttl_receipts,trans_from_comm,ttl_disb,trans_to_comm,coh_bop,coh_cop,cand_contrib,cand_loans,ttl_indiv_contrib,other_pol_cmte_contrib,pol_pty_contrib
1,20300,200,0.367,0.513,H2AL01077,"bonner, josiah robias",I,1140566.63,42871.65,1263168.28,0.0,279810.87,157209.22,0.0,0.0,564463.0,523034.4,0.0
9,20301,200,0.363,0.455,H2AL03032,"rogers, michael dennis",I,1069891.19,0.0,982102.34,270000.0,204259.44,292048.29,0.0,0.0,474560.95,588500.0,2700.0


In [22]:
congress.columns = ['icpsr', 'party', 'NOMINATE_dim1', 'NOMINATE_dim2', 'candidate_id', 'name', 'ici', 'ttl_receipts', 'comm_receipts', 'ttl_disb', 'comm_disb', 'start_cash', 'end_cash', 'cand_contrib', 'cand_loans', 'ind_contrib', 'other_comm_contrib', 'pty_contrib']

In [72]:
congress.to_csv('./datasets/congress.csv',sep=',',index=False)

### Creating a reference table of registered candidates by connected committee

**Filters for Candidate Search API**
1. Cycle - 2012
2. Election Year - 2012
3. Office - H
4. Year - 2012
5. Party - DEM, REP (elected officials only from these two parties)

In [37]:
# for loop to pull all pages of requests


def candidates_dataframe(candidate_id_list, candidate_num):
    candidates_all_pages = []
    count = 0

    for ids in candidate_id_list:
        candidates_rq = rq.get(f'https://api.open.fec.gov/v1/candidates/search/?page=1&per_page=100&candidate_id={ids}&cycle=2012&sort=name&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')

        # uncomment to debug ---> did each id request go through?
        # print(candidates_rq.status_code)
        
        # output confirmation during pull request
        clear_output(wait=True)
        count += 1
        print(f'Pulling candidate {ids}, on {count} out of {candidate_num}!')
        candidates_rq.raise_for_status()

        # push data into open list
        candidates = candidates_rq.json()

        # uncomment to debug  ---> did each id request have the correct keys?
        # print(candidates.keys())
        
        candidates_all_pages.append(candidates)

        time.sleep(.5)

    # #merge all results into one DF
    # blank_list = []
    
    # for i in range(0, candidate_num): 
    #     blank_list.extend(candidates_all_pages[i]['results']) 
        
    # return pd.DataFrame(blank_list)

In [None]:
candidates_df = candidates_dataframe(candidate_ids, 433)

Pulling candidate H2TX16185, on 378 out of 433!


In [27]:
candidates_df.shape

(433, 25)

In [28]:
candidates_df.drop(columns=['active_through', 'cycles', 'candidate_inactive', 'district_number', 'election_districts', 'election_years', 'federal_funds_flag', 'first_file_date', 'inactive_election_years', 'incumbent_challenge', 'last_f2_date', 'last_file_date', 'load_date', 'party', 'office'],inplace=True)

In [29]:
candidates_df.columns

Index(['candidate_id', 'candidate_status', 'district', 'has_raised_funds',
       'incumbent_challenge_full', 'name', 'office_full', 'party_full',
       'principal_committees', 'state'],
      dtype='object')

In [30]:
candidates_df.columns = ['candidate_id', 'status', 'district', 'has_raised_funds', 'incumbent_challenge', 'name', 'office', 'party', 'committees', 'state']

In [31]:
candidates_df.columns

Index(['candidate_id', 'status', 'district', 'has_raised_funds',
       'incumbent_challenge', 'name', 'office', 'party', 'committees',
       'state'],
      dtype='object')

In [22]:
# candidates_df = candidates_df.apply(lambda row: row[candidates_df['office'].isin(['House'])])

In [23]:
# candidates_df.drop_duplicates(subset='name', inplace=True)

In [32]:
candidates_df.shape

(433, 10)

In [36]:
candidates_df['committees']

0      [{'affiliated_committee_name': 'GULF COAST VIC...
1      [{'affiliated_committee_name': 'NONE', 'candid...
2      [{'affiliated_committee_name': 'NONE', 'candid...
3      [{'affiliated_committee_name': 'TEAM ROBY VICT...
4      [{'affiliated_committee_name': 'NONE', 'candid...
                             ...                        
428    [{'affiliated_committee_name': 'RIBBLE RESPONS...
429    [{'affiliated_committee_name': 'MINNESOTA -WIS...
430    [{'affiliated_committee_name': 'BADGER VICTORY...
431    [{'affiliated_committee_name': 'PROSPERITY ACT...
432    [{'affiliated_committee_name': 'FINANCIAL INNO...
Name: committees, Length: 433, dtype: object

In [117]:
# loop through candidate dataframe to get table of candidates and their associated committee

def candidate_committee_dataframe(df):
    can_com_df = []
    for i in range(0, candidate_num): 
        blank_list.append(candidates_all_pages[i]['results']) 
        
    return pd.DataFrame(blank_list)

[{'affiliated_committee_name': None,
  'candidate_ids': ['H2MT00039'],
  'committee_id': 'C00496802',
  'committee_type': 'H',
  'committee_type_full': 'House',
  'cycles': [2012, 2014, 2016, 2018],
  'designation': 'P',
  'designation_full': 'Principal campaign committee',
  'filing_frequency': 'A',
  'first_f1_date': '2011-05-25',
  'first_file_date': '2011-05-25',
  'last_f1_date': '2011-09-07',
  'last_file_date': '2018-04-26',
  'name': 'FRIENDS OF JOHN ABARR',
  'organization_type': None,
  'organization_type_full': None,
  'party': 'REP',
  'party_full': 'REPUBLICAN PARTY',
  'state': 'MT',
  'treasurer_name': 'JOHN ALLEN ABARR'}]

In [96]:
candidates_df.to_csv('./datasets/candidates.csv',sep=',',index=False)

____________________________________________________________________________________________________________________


### API Funding Requests

**Conditions for Schedule A Filters**
1. Contributor Type - Committee
2. Two Year Transaction Period - 2012 (end of election cycle)
3. Recipient Committee Type - House, Senate

**Conditions for Schedule B Filters**
1. Cycle - 2012

**Conditions for Schedule E Filters**
1. Cycle - 2012
2. Election Full - True

#### Schedule A - Candidate Receipts

**Steps**
1. Pull list of candidate committee ids
   - Every candidate has a distinct committee connected to their election campaign which is necessary to pull donation receipts

In [None]:
# function to pull variables for disbursement date and index needed for main disbursement api pull 

def pagination_pull():
    first_request = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/?committee_id=C00053553&spender_committee_designation=A&spender_committee_designation=J&spender_committee_designation=P&spender_committee_designation=B&spender_committee_designation=D&spender_committee_org_type=C&spender_committee_org_type=L&spender_committee_org_type=M&spender_committee_org_type=T&spender_committee_org_type=V&spender_committee_org_type=W&spender_committee_type=C&spender_committee_type=D&spender_committee_type=E&spender_committee_type=H&spender_committee_type=I&spender_committee_type=N&spender_committee_type=O&spender_committee_type=P&spender_committee_type=Q&spender_committee_type=S&spender_committee_type=U&spender_committee_type=V&spender_committee_type=W&two_year_transaction_period=2012&per_page=100&sort=-disbursement_date&sort_hide_null=false&sort_null_only=false&api_key={api_key_fec}')

print(nra_rq.status_code)
nra_json = nra_rq.json()

nra_page_test = nra_json['pagination']
nra_index_test = nra_page_test['last_indexes']
nra_index_test['last_index']
nra_index_test['last_disbursement_date']

In [None]:
# loop through all candidate ids to get a master list of all receipts by congress person

def receipts_master_pull(cand_id_list):  # add list of FEC candidate ids
    receipts_all_pages = []
    count = 0
    
    # cycle through the first page of each rq to find the length of each expenditure list by candidate
    for ids in cand_id_list:
        receipts_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_e/by_candidate/?page=1&per_page=100&cycle=2012&election_full=true&candidate_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')
        pgone_rq = expenditures_rq.json()
        time.sleep(.5)
        
        # uncomment to debug 
        # print(pgone_rq.keys())   
        
        try:
            qty_pages = page_count(pgone_rq)
            count += 1

        except KeyError:
            print(f'API Request for candidate {ids} could not be found. Here\'s what was pulled:')
            print(pgone_rq)

        except JSONDecodeError:
            print(expenditures_rq.status_code)
            print(pgone_rq.keys())
            
        # with length of rq pull, loop through each page for every candidate and append to a blank list
        for i in range(1, qty_pages+1):
            all_expenditures_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_e/by_candidate/?page={i}&per_page=100&cycle=2012&election_full=true&candidate_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')
            
            # upgraded FEC api key can pull a max of 120 calls a minute
            time.sleep(.5)
            
            try:
                all_expenditures_json = all_expenditures_rq.json()
                results = all_expenditures_json['results']
                expenditures_all_pages.append(results)

            except JSONDecodeError:
                print(all_expenditures_rq.status_code)
            
        # check status during pull
        clear_output(wait=True)
        print(f'Pulling expenditures from {ids}, on number {count} out of 433!')
        expenditures_rq.raise_for_status()
        
    
    return expenditures_all_pages

In [None]:
boehner_json

In [None]:
boehner_list = boehner_json['results']
boehner_list

In [None]:
boehner_dict1 = boehner_list[0]

In [None]:
boehner_dict1

In [None]:
# create while loop for pagination capture

# variables set by first page request, count set to track number of pages requested
last_contribution_receipt_date = '2012-09-21'
last_index = '4102220121167751830'
count = 1

while True:
    try:
        # api request that inserts api key variable, as well as the last index and disbursement date from the first page request
        boehner_rq_loop = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_a/?contributor_id=C00053553&contributor_type=committee&two_year_transaction_period=2012&recipient_committee_type=H&recipient_committee_type=S&per_page=100&sort=-contribution_receipt_date&sort_hide_null=false&sort_null_only=false&api_key={api_key_fec}&last_disbursement_date={last_contribution_receipt_date}')

        # check status during while loop
        print(f'Requesting page {count}')
        boehner_rq_loop.raise_for_status()

        # convert request into json file
        boehner_json_loop = boehner_rq_loop.json()

        # move through json to pull new last index and disbursement dates
        boehner_page_loop = boehner_json_loop['pagination']
        boehner_index_loop = boehner_page_loop['last_indexes']
        boehner_index = boehner_index_loop['last_index']
        boehner_contribution_date = boehner_index_loop['last_contribution_receipt_date']
       
        # convert loop into a list and append to boehner_list
        boehner_list_loop = list(boehner_json_loop)
        boehner_list.append(boehner_list_loop)
        
        # add to counter
        count += 1
        
    except TypeError:
        print('No more valid pages to loop through')
        break



#### Schedule B - Committee Funds by Committee

In [84]:
# loop through all committee ids to get a master list of all committee disbursements

def receipts_master_pull(committee_id_list):  # add list of FEC candidate ids
    receipts_all_pages = []
    count = 0
    
    # cycle through the first page of each rq to find the length of each expenditure list by candidate
    for ids in committee_id_list:
        receipts_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/by_recipient_id/?page=1&per_page=100&cycle=2012&recipient_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')

        # uncomment to debug
        print(receipts_rq)
        
        pgone_rq = receipts_rq.json()

        # uncomment to debug
        print(pgone_rq.keys())
        
        time.sleep(.5)
            
        # uncomment to debug 
        # print(pgone_rq.keys())   
        
        try:
            qty_pages = page_count(pgone_rq)
            count += 1

        except KeyError:
            print(f'API Request for committee {ids} could not be found. Here\'s what was pulled:')
            print(pgone_rq)
   
        # with length of rq pull, loop through each page for every candidate and append to a blank list
        for i in range(1, qty_pages+1):
            all_receipts_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/by_recipient_id/?page={i}&per_page=100&cycle=2012&recipient_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')
            
            # upgraded FEC api key can pull a max of 120 calls a minute
            time.sleep(.5)
            
            try:
                all_receipts_json = all_receipts_rq.json()
                results = all_receipts_json['results']
                receipts_all_pages.append(results)

            except JSONDecodeError:
                print(all_receipts_rq.status_code)
            
        # check status during pull
        clear_output(wait=True)
        print(f'Pulling receipts from {ids}, on number {count} out of 3435!')
        receipts_rq.raise_for_status()
        
    
    return receipts_all_pages

In [83]:
receipts = receipts_master_pull(committee_ids)

Pulling receipts from C00235036, on number 3435 out of 3485!


In [None]:
# #uncomment to troubleshoot errors in api requests


# problem_id = 'H4GA06087'

# problem_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/by_recipient_id/?page={i}&per_page=100&cycle=2012&recipient_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')

# print(problem_rq.status_code)
# problem_json = problem_rq.json()

# problem_json

In [88]:
receipts

[[{'committee_id': 'C00004036',
   'committee_name': 'SEIU COPE (SERVICE EMPLOYEES INTERNATIONAL UNION COMMITTEE ON POLITICAL EDUCATION)',
   'count': 1,
   'cycle': 2012,
   'memo_count': 0,
   'memo_total': 0.0,
   'recipient_id': 'C00348540',
   'recipient_name': "1199 SERVICE EMPLOYEES INT'L UNION FEDERAL POLITICAL ACTION FUND",
   'total': 125000.0}],
 [{'committee_id': 'C00348540',
   'committee_name': '1199 SEIU UNITED HEALTHCARE WORKERS EAST FEDERAL POLITICAL ACTION FUND',
   'count': 1,
   'cycle': 2012,
   'memo_count': 0,
   'memo_total': 0.0,
   'recipient_id': 'C00344531',
   'recipient_name': '1199 32BJ/144 SERVICE EMPLOYEES INTERNATIONAL UNION HOME CARE POLITICAL ACTION FUND',
   'total': 3546.0}],
 [{'committee_id': 'C00467233',
   'committee_name': 'FISCAL RESPONSIBILITY PAC',
   'count': 1,
   'cycle': 2012,
   'memo_count': 0,
   'memo_total': 0.0,
   'recipient_id': 'C00040279',
   'recipient_name': 'ABBOTT LABORATORIES EMPLOYEE POLITICAL ACTION COMMITTEE',
   'tota

In [91]:
receipts_df = pull_list_of_dicts(receipts)
receipts_df.head()

Unnamed: 0,committee_id,committee_name,count,cycle,memo_count,memo_total,recipient_id,recipient_name,total
0,C00004036,SEIU COPE (SERVICE EMPLOYEES INTERNATIONAL UNI...,1,2012,0,0.0,C00348540,1199 SERVICE EMPLOYEES INT'L UNION FEDERAL POL...,125000.0
1,C00348540,1199 SEIU UNITED HEALTHCARE WORKERS EAST FEDER...,1,2012,0,0.0,C00344531,1199 32BJ/144 SERVICE EMPLOYEES INTERNATIONAL ...,3546.0
2,C00467233,FISCAL RESPONSIBILITY PAC,1,2012,0,0.0,C00040279,ABBOTT LABORATORIES EMPLOYEE POLITICAL ACTION ...,500.0
3,C00432401,BEN NELSON 2012,3,2012,0,0.0,C00040279,ABBOTT LABORATORIES EMPLOYEE POLITICAL ACTION ...,7000.0
4,C00330720,TRUST PAC TEAM REPUBLICANS FOR UTILIZING SENSI...,1,2012,0,0.0,C00040279,ABBOTT LABORATORIES EMPLOYEE POLITICAL ACTION ...,1000.0


In [92]:
receipts_df.shape

(2712, 9)

In [93]:
receipts_df.to_csv('./datasets/receipts.csv',sep=',',index=False)

#### Schedule B - Draft Coding Run

In [82]:
# testing schedule b pull request for one committee id --> NRA : C00053553

nra_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/?committee_id=C00053553&spender_committee_designation=A&spender_committee_designation=J&spender_committee_designation=P&spender_committee_designation=B&spender_committee_designation=D&spender_committee_org_type=C&spender_committee_org_type=L&spender_committee_org_type=M&spender_committee_org_type=T&spender_committee_org_type=V&spender_committee_org_type=W&spender_committee_type=C&spender_committee_type=D&spender_committee_type=E&spender_committee_type=H&spender_committee_type=I&spender_committee_type=N&spender_committee_type=O&spender_committee_type=P&spender_committee_type=Q&spender_committee_type=S&spender_committee_type=U&spender_committee_type=V&spender_committee_type=W&two_year_transaction_period=2012&per_page=100&sort=-disbursement_date&sort_hide_null=false&sort_null_only=false&api_key={api_key_fec}')

print(nra_rq.status_code)
nra_json = nra_rq.json()

200


In [None]:
# create while loop for pagination capture

# variables set by first page request, count set to track number of pages requested
last_disbursement_date = '2012-10-31'
last_index = '4040220131185979472'
count = 1

while True:
    try:
        # api request that inserts api key variable, as well as the last index and disbursement date from the first page request
        nra_rq_loop = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/?committee_id=C00053553&spender_committee_designation=A&spender_committee_designation=J&spender_committee_designation=P&spender_committee_designation=B&spender_committee_designation=D&spender_committee_org_type=C&spender_committee_org_type=L&spender_committee_org_type=M&spender_committee_org_type=T&spender_committee_org_type=V&spender_committee_org_type=W&spender_committee_type=C&spender_committee_type=D&spender_committee_type=E&spender_committee_type=H&spender_committee_type=I&spender_committee_type=N&spender_committee_type=O&spender_committee_type=P&spender_committee_type=Q&spender_committee_type=S&spender_committee_type=U&spender_committee_type=V&spender_committee_type=W&two_year_transaction_period=2012&per_page=100&sort=-disbursement_date&sort_hide_null=false&sort_null_only=false&api_key={api_key_fec}&last_index={last_index}&last_disbursement_date={last_disbursement_date}')

        # check status during while loop
        print(f'Requesting page {count}')
        nra_rq_loop.raise_for_status()

        # convert request into json file
        nra_json_loop = nra_rq_loop.json()

        # move through json to pull new last index and disbursement dates
        nra_page_loop = nra_json_loop['pagination']
        nra_index_loop = nra_page_loop['last_indexes']
        last_index = nra_index_loop['last_index']
        last_disbursement_date = nra_index_loop['last_disbursement_date']
       
        # convert loop into a list and append to nra_list
        nra_list_loop = list(nra_json_loop)
        nra_list.append(nra_list_loop)
        
        # add to counter
        count += 1
        
    except TypeError:
        print('No more valid pages to loop through')
        break

In [None]:
nra_page_test = nra_json['pagination']
nra_index_test = nra_page_test['last_indexes']
nra_index_test['last_index']
nra_index_test['last_disbursement_date']

In [83]:
nra_list = nra_json['results']
nra_list

[{'amendment_indicator': 'A',
  'amendment_indicator_desc': 'ADD',
  'back_reference_schedule_id': None,
  'back_reference_transaction_id': None,
  'beneficiary_committee_name': None,
  'candidate_first_name': None,
  'candidate_id': None,
  'candidate_last_name': None,
  'candidate_middle_name': None,
  'candidate_name': None,
  'candidate_office': None,
  'candidate_office_description': None,
  'candidate_office_district': None,
  'candidate_office_state': None,
  'candidate_office_state_full': None,
  'candidate_prefix': None,
  'candidate_suffix': None,
  'category_code': '001',
  'category_code_full': 'Administrative/Salary/Overhead Expenses ',
  'comm_dt': None,
  'committee': {'affiliated_committee_name': 'THE NATIONAL RIFLE ASSOCIATION OF AMERICA',
   'candidate_ids': [],
   'city': 'FAIRFAX',
   'committee_id': 'C00053553',
   'committee_type': 'Q',
   'committee_type_full': 'PAC - Qualified',
   'cycle': 2012,
   'cycles': [1976,
    1978,
    1980,
    1982,
    1984,
    19

In [86]:
# create while loop for pagination capture

# variables set by first page request, count set to track number of pages requested
last_disbursement_date = '2012-10-31'
last_index = '4040220131185979472'
count = 1

while True:
    try:
        # api request that inserts api key variable, as well as the last index and disbursement date from the first page request
        nra_rq_loop = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_b/?committee_id=C00053553&spender_committee_designation=A&spender_committee_designation=J&spender_committee_designation=P&spender_committee_designation=B&spender_committee_designation=D&spender_committee_org_type=C&spender_committee_org_type=L&spender_committee_org_type=M&spender_committee_org_type=T&spender_committee_org_type=V&spender_committee_org_type=W&spender_committee_type=C&spender_committee_type=D&spender_committee_type=E&spender_committee_type=H&spender_committee_type=I&spender_committee_type=N&spender_committee_type=O&spender_committee_type=P&spender_committee_type=Q&spender_committee_type=S&spender_committee_type=U&spender_committee_type=V&spender_committee_type=W&two_year_transaction_period=2012&per_page=100&sort=-disbursement_date&sort_hide_null=false&sort_null_only=false&api_key={api_key_fec}&last_index={last_index}&last_disbursement_date={last_disbursement_date}')

        # check status during while loop
        print(f'Requesting page {count}')
        nra_rq_loop.raise_for_status()

        # convert request into json file
        nra_json_loop = nra_rq_loop.json()

        # move through json to pull new last index and disbursement dates
        nra_page_loop = nra_json_loop['pagination']
        nra_index_loop = nra_page_loop['last_indexes']
        last_index = nra_index_loop['last_index']
        last_disbursement_date = nra_index_loop['last_disbursement_date']
       
        # convert loop into a list and append to nra_list
        nra_list_loop = list(nra_json_loop)
        nra_list.append(nra_list_loop)
        
        # add to counter
        count += 1
        
    except TypeError:
        print('No more valid pages to loop through')
        break

Requesting page 1
Requesting page 2
Requesting page 3
Requesting page 4
Requesting page 5
Requesting page 6
Requesting page 7
Requesting page 8
Requesting page 9
Requesting page 10
Requesting page 11
Requesting page 12
Requesting page 13
Requesting page 14
Requesting page 15
Requesting page 16
Requesting page 17
Requesting page 18
Requesting page 19
Requesting page 20
Requesting page 21
Requesting page 22
Requesting page 23
Requesting page 24
Requesting page 25
Requesting page 26
Requesting page 27
Requesting page 28
Requesting page 29
No more valid pages to loop through


In [87]:
nra_list

[{'amendment_indicator': 'A',
  'amendment_indicator_desc': 'ADD',
  'back_reference_schedule_id': None,
  'back_reference_transaction_id': None,
  'beneficiary_committee_name': None,
  'candidate_first_name': None,
  'candidate_id': None,
  'candidate_last_name': None,
  'candidate_middle_name': None,
  'candidate_name': None,
  'candidate_office': None,
  'candidate_office_description': None,
  'candidate_office_district': None,
  'candidate_office_state': None,
  'candidate_office_state_full': None,
  'candidate_prefix': None,
  'candidate_suffix': None,
  'category_code': '001',
  'category_code_full': 'Administrative/Salary/Overhead Expenses ',
  'comm_dt': None,
  'committee': {'affiliated_committee_name': 'THE NATIONAL RIFLE ASSOCIATION OF AMERICA',
   'candidate_ids': [],
   'city': 'FAIRFAX',
   'committee_id': 'C00053553',
   'committee_type': 'Q',
   'committee_type_full': 'PAC - Qualified',
   'cycle': 2012,
   'cycles': [1976,
    1978,
    1980,
    1982,
    1984,
    19

In [None]:
del nra_list[100:]

In [None]:
[type(d) for d in nra_list]

In [None]:
# delete empty list items
nra_no_committee = [{k: v for k, v in d.items() if k != 'committee'} for d in nra_list]

In [None]:
nra_df = pd.DataFrame(nra_no_committee)

In [None]:
nra_df

In [None]:
nra_df.shape

In [None]:
nra_df.info()

In [None]:
#dropping null/unnecessary columns (1st round)
nra_df.drop(columns=['back_reference_schedule_id', 'back_reference_transaction_id', 'candidate_first_name', 'candidate_last_name', 'candidate_middle_name', 'candidate_suffix', 'comm_dt', 'conduit_committee_city', 'conduit_committee_name', 'conduit_committee_state', 'conduit_committee_street1', 'conduit_committee_street2', 'conduit_committee_zip', 'memo_code', 'memo_code_full', 'national_committee_nonfederal_account', 'original_sub_id', 'payee_employer', 'payee_first_name', 'payee_last_name', 'payee_middle_name', 'payee_occupation', 'payee_prefix', 'payee_suffix', 'ref_disp_excess_flg'], inplace=True)

In [None]:
pd.set_option('display.max_columns', None)
nra_df.head(100)

In [None]:
#dropping additional unnecessary columns (2nd round)
nra_df.drop(columns=['amendment_indicator_desc', 'candidate_office_state_full', 'candidate_prefix', 'category_code', 'entity_type_desc', 'fec_election_type_desc', 'fec_election_year', 'file_number', 'filing_form', 'image_number', 'line_number', 'link_id', 'load_date', 'memoed_subtotal', 'pdf_url', 'report_type', 'report_year', 'schedule_type', 'schedule_type_full', 'semi_annual_bundled_refund', 'spender_committee_designation', 'sub_id', 'two_year_transaction_period'], inplace=True)

In [None]:
nra_df.head(100)

In [None]:
#dropping additional unnecessary columns (3rd round)
nra_df.drop(columns=['candidate_office_description', 'disbursement_type', 'disbursement_type_description', 'recipient_committee', 'election_type', 'election_type_full', 'recipient_state', 'recipient_zip'], inplace=True)

In [None]:
nra_df.columns

In [None]:
#dropping additional unnecessary columns (4th round)
nra_df.drop(columns=['beneficiary_committee_name', 'candidate_office_district', 'candidate_office_state', 'line_number_label', 'memo_text', 'recipient_city'], inplace=True)

In [None]:
nra_df.shape

In [None]:
nra_df.columns

In [None]:
nra_df['candidate_name'].value_counts()

#### Schedule E - Expenditures

In [110]:
# loop through all candidate ids to get a master list of all expenditures made on behalf of elected congress members

def expenditures_master_pull(cand_id_list):  # add list of FEC candidate ids
    expenditures_all_pages = []
    count = 0
    
    # cycle through the first page of each rq to find the length of each expenditure list by candidate
    for ids in cand_id_list:
        expenditures_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_e/by_candidate/?page=1&per_page=100&cycle=2012&election_full=true&candidate_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')
        pgone_rq = expenditures_rq.json()
        time.sleep(.5)
        
        # uncomment to debug 
        # print(pgone_rq.keys())   
        
        try:
            qty_pages = page_count(pgone_rq)
            count += 1

        except KeyError:
            print(f'API Request for candidate {ids} could not be found. Here\'s what was pulled:')
            print(pgone_rq)

        except JSONDecodeError:
            print(expenditures_rq.status_code)
            print(pgone_rq.keys())
            
        # with length of rq pull, loop through each page for every candidate and append to a blank list
        for i in range(1, qty_pages+1):
            all_expenditures_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_e/by_candidate/?page={i}&per_page=100&cycle=2012&election_full=true&candidate_id={ids}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')
            
            # upgraded FEC api key can pull a max of 120 calls a minute
            time.sleep(.5)
            
            try:
                all_expenditures_json = all_expenditures_rq.json()
                results = all_expenditures_json['results']
                expenditures_all_pages.append(results)

            except JSONDecodeError:
                print(all_expenditures_rq.status_code)
            
        # check status during pull
        clear_output(wait=True)
        print(f'Pulling expenditures from {ids}, on number {count} out of 433!')
        expenditures_rq.raise_for_status()
        
    
    return expenditures_all_pages

In [111]:
expenditures = expenditures_master_pull(candidate_ids)

Pulling expenditures from H8WY00148, on number 433 out of 433!


In [65]:
# #uncomment to troubleshoot errors in api requests


# problem_id = 'H4GA06087'

# problem_rq = rq.get(f'https://api.open.fec.gov/v1/schedules/schedule_e/by_candidate/?page=1&per_page=100&cycle=2012&election_full=true&candidate_id={problem_id}&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key={api_key_fec}')

# print(problem_rq.status_code)
# problem_json = problem_rq.json()

# problem_json

200


{'api_version': '1.0',
 'pagination': {'count': 1, 'page': 1, 'pages': 1, 'per_page': 100},
 'results': [{'candidate_id': 'H4GA06087',
   'candidate_name': 'PRICE, THOMAS EDMUNDS',
   'committee_id': 'C00343137',
   'committee_name': 'POLITICAL ACTION COMMITTEE OF THE AMERICAN ASSOCIATION OF ORTHOPAEDIC SURGEONS',
   'count': 1,
   'cycle': 2012,
   'support_oppose_indicator': 'S',
   'total': 20000.0}]}

In [139]:
# converting the list of lists of dictionaries into a dataframe 
expend_dict = pull_list_of_dicts(expenditures)
expend_df = pd.DataFrame(expend_dict)
expend_df.head(2)

Unnamed: 0,candidate_id,candidate_name,committee_id,committee_name,count,cycle,support_oppose_indicator,total
0,H2AL01077,"BONNER, JOSIAH ROBIAS",C00502849,CAMPAIGN FOR PRIMARY ACCOUNTABILITY INC,13,2012,O,123679.84
1,H0AL02087,"ROBY, MARTHA",C00000935,DEMOCRATIC CONGRESSIONAL CAMPAIGN COMMITTEE,3,2012,O,11676.41


In [140]:
expend_df.to_csv('./datasets/expenditures.csv',sep=',',index=False)