## Maine Senate overview script

This file pulls in individual contributions to all Senate committees as well as independent expenditures to support or oppose the candidates _and_ party-coordinated expenditures to support or oppose the candidates.

The data fuels a dashboard that contains an overview of the race so far. It also provides the information to reconcile itemized contribution data with campaign totals available from the FEC.

All of the files are written to a repository at data.world, where they are combined together in SQL scripts to fuel Tableau Public dashboards.

In [2]:
import numpy as np
import pandas as pd
import requests
import config
import os
import datadotworld as dw
import time
from pandas.io.json import json_normalize

##Schedule_a API guide: https://api.open.fec.gov/developers/#/receipts/get_schedules_schedule_a_˜

In [3]:
### INDIVIDUAL DONATIONS TO MAINE SENATE CAMPAIGNS ###
## SENATE COMMITTEE SEARCH ##
#Set search for all 2020 (two-year transaction period) Maine Senate candidates
fec_key = config.fec_key

cand_state = 'ME'
cycle='2020'
parameters = {'election_year':cycle
            ,'state':cand_state
            ,'api_key':fec_key}

#Requests candidate info
r_cands = requests.get('https://api.open.fec.gov/v1/candidates/search',params=parameters).json()

In [4]:
## Create and publish candidate id - committee id table
cand = json_normalize(data=r_cands['results'])[['candidate_id'
                                                ,'name'
                                                ,'party_full'
                                                ,'incumbent_challenge_full'
                                                ,'office_full'
                                                ,'first_file_date']]

#Create committee lookup table
comm = json_normalize(data=r_cands['results'],
                      record_path='principal_committees')[['candidate_ids'
                                                           ,'committee_id'
                                                           ,'name']]
comm['candidate_ids'] = comm['candidate_ids'].str[0]

#Merge candidate and committee lookups
cand = cand.merge(comm,left_on='candidate_id',right_on='candidate_ids')

#Rename columns
colnm = {
    'name_x':'candidate_name'
    ,'name_y':'committee_name'
}
cand.rename(columns=colnm,inplace=True)
cand.drop(columns='candidate_ids',inplace=True)

cand

#Write out files to data.world
# with dw.open_remote_file('darrenfishell/2020-election-repo','candidate_committee_lookup.csv') as w:
#     cand.to_csv(w,index=False)

Unnamed: 0,candidate_id,candidate_name,party_full,incumbent_challenge_full,office_full,first_file_date,committee_id,committee_name
0,H0ME01119,"ALLEN, JAY THOMAS",REPUBLICAN PARTY,Challenger,House,2019-08-10,C00715532,JAY ALLEN FOR MAINE
1,H0ME02075,"BENNETT, ADRIENNE",REPUBLICAN PARTY,Challenger,House,2019-10-08,C00722108,ADRIENNE FOR CONGRESS
2,H0ME02067,"BRAKEY, ERIC",REPUBLICAN PARTY,Challenger,House,2019-07-29,C00713958,BRAKEY FOR CONGRESS
3,S0ME00061,"BUNKER, MICHAEL P MR",DEMOCRATIC PARTY,Challenger,Senate,2018-11-12,C00710087,MICHAEL BUNKER FOR MAINE
4,S6ME00159,"COLLINS, SUSAN M.",REPUBLICAN PARTY,Incumbent,Senate,1996-02-22,C00314575,COLLINS FOR SENATOR
5,H0ME02083,"CRAFTS, DALE",REPUBLICAN PARTY,Challenger,House,2019-10-10,C00722454,DALE CRAFTS FOR CONGRESS
6,S0ME00111,"GIDEON, SARA",DEMOCRATIC PARTY,Challenger,Senate,2019-06-24,C00709899,SARA GIDEON FOR MAINE
7,H8ME02185,"GOLDEN, JARED",DEMOCRATIC PARTY,Incumbent,House,2017-08-24,C00653816,COMMITTEE TO ELECT JARED GOLDEN
8,H0ME02091,"HIATT, JOHN DAVID",REPUBLICAN PARTY,Challenger,House,2019-12-02,C00729251,JOHN HIATT FOR CONGRESS
9,S0ME00087,"KIDMAN, BRE MX.",DEMOCRATIC PARTY,Challenger,Senate,2019-04-20,C00703413,FRIENDS OF BRE KIDMAN


<Response [405]>

In [67]:
start = time.time()
## FOR LOOP TO COLLECT CONTRIBUTION RECORDS ##
#Initialize dataframe collector for itemized contribs
idfs=[]
udfs=[]
commid=0

comm_ids = cand['committee_id']

#Initialize query dictionary
itemdict = {
    'per_page':'100'
    ,'sort':'contribution_receipt_date'
    ,'api_key':fec_key
    ,'is_individual':'true'
    ,'two_year_transaction_period':cycle
    ,'last_index':[]
    ,'last_contribution_receipt_date':[]
    ,'committee_id':comm_ids[commid]
}

#Dict for unitemized contributions
unitemdict = {
'api_key':fec_key
,'cycle':cycle
,'per_page':'100'
,'committee_id':comm_ids[commid]
}

#Page through results for each committee id
for x in range(0,len(comm_ids)-1):
    
    u_r = requests.get('https://api.open.fec.gov/v1/committee/'+comm_ids[commid]+'/totals',params=unitemdict).json()
    udf = json_normalize(u_r['results'])
    udfs.append(udf)
    
    #Get first itemized payload for a candidate
    r = requests.get('https://api.open.fec.gov/v1/schedules/schedule_a/',params=itemdict).json()
    
    #Last page variables
    while r['pagination']['last_indexes'] is not None:
        
        #Store results of payload
        idf = json_normalize(r['results'])
        idfs.append(idf)
                
        #Assign last_index and date values, update itemdict
        last_index=pd.to_numeric(r['pagination']['last_indexes']['last_index'])
        last_date=r['pagination']['last_indexes']['last_contribution_receipt_date']
        #Update dictionary with new indices
        itemdict.update([('last_index',last_index)
                        ,('last_contribution_receipt_date',last_date)])

        #Get next payload with updated dict
        r = requests.get('https://api.open.fec.gov/v1/schedules/schedule_a/',params=itemdict).json()
    
    commid+=1
    
    #Update dictionary with next candidate in list and reset last indices
    itemdict.update([('committee_id',comm_ids[commid])
                     ,('last_index',[])
                     ,('last_contribution_receipt_date',[])])
    
    unitemdict.update([('committee_id',comm_ids[commid])])

# Concatenate all dfs
itemdf=pd.concat(idfs,sort=False,ignore_index=True)
itemdf=itemdf.drop_duplicates(subset='transaction_id')

udf=pd.concat(udfs,sort=False,ignore_index=True)
udf=udf.drop_duplicates()

end = time.time()

In [68]:
end - start

288.0130841732025

In [21]:
#Initialize dataframe collector for itemized contribs
idfs=[]
udfs=[]
id=0

#ITEMIZED dict query
itemdict = {
    'per_page':'100'
    ,'sort':'contribution_receipt_date'
    ,'api_key':fec_key
    ,'is_individual':'true'
    ,'two_year_transaction_period':cycle
    ,'last_index':[]
    ,'last_contribution_receipt_date':[]
    ,'committee_id':cand['committee_id'][id]
}

#UNITEMIZED dict query
unitemdict = {
'api_key':fec_key
,'cycle':cycle
,'per_page':'100'
,'committee_id':cand['committee_id'][id]
}

#Loop collects unitemized contributions, then itemized
for x in cand['committee_id']:

    print(str(id)+' '+cand['candidate_name'][id])
    
    unitemdict.update([('committee_id',cand['committee_id'][id])])
    
    u_r = requests.get('https://api.open.fec.gov/v1/committee/'+cand['committee_id'][id]+'/totals',params=unitemdict).json()
    udf = json_normalize(u_r['results'])
    udfs.append(udf)

    #Get first itemized payload for a candidate
    r = requests.get('https://api.open.fec.gov/v1/schedules/schedule_a/',params=itemdict).json()

    #Last page variables
    while r['pagination']['last_indexes'] is not None:
        
        itemdict.update([('committee_id',cand['committee_id'][id])
                     ,('last_index',[])
                     ,('last_contribution_receipt_date',[])])

        #Store results of payload
        idf = json_normalize(r['results'])
        idfs.append(idf)

        #Assign last_index and date values, update itemdict
        last_index=pd.to_numeric(r['pagination']['last_indexes']['last_index'])
        last_date=r['pagination']['last_indexes']['last_contribution_receipt_date']
        #Update dictionary with new indices
        itemdict.update([('last_index',last_index)
                        ,('last_contribution_receipt_date',last_date)])

        #Get next payload with updated dict
        r = requests.get('https://api.open.fec.gov/v1/schedules/schedule_a/',params=itemdict).json()

    id+=1
    #Update dictionary with next candidate in list and reset last indices


# Concatenate all dfs
itemdf=pd.concat(idfs,sort=False,ignore_index=True)
itemdf=itemdf.drop_duplicates(subset='transaction_id')

udf=pd.concat(udfs,sort=False,ignore_index=True)
udf=udf.drop_duplicates()

# Transformation and cleaning steps

#Clean Itemized ZIPs
itemdf['contributor_zip'] = itemdf['contributor_zip'].str[:5]

#Create unitemized table and conform to itemized contribs table
cols=itemdf.columns.values.tolist()
unitemdf=[]
unitemdf = pd.DataFrame(columns=cols)

## Select data for unitemized df and rename columns ##
unitemdf[['committee.name'
        ,'committee.party_full'
        ,'committee_id'
        ,'contribution_receipt_amount'
        ,'contribution_receipt_date'
        ,'fec_election_type_desc']] = udf[['committee_name'
                                        ,'party_full'
                                        ,'committee_id'
                                        ,'individual_unitemized_contributions'
                                        ,'coverage_end_date'
                                        ,'last_report_type_full']]

#Label unitemized rows, apply individual entity_type
unitemdf['contributor_name'] = 'Unitemized individual contributions'
unitemdf['entity_type'] = 'IND'

#Union Itemized and Unitemized contributions
ind_df = pd.concat([itemdf,unitemdf,comm_df],sort=False,ignore_index=True)
ind_df['contribution_receipt_date'] = ind_df['contribution_receipt_date'].str.split('T', expand=True)[0]

print(str(len(ind_df)))

print(ind_df.head())

0 ALLEN, JAY THOMAS


KeyError: 'results'

{'error': {'code': 'OVER_RATE_LIMIT',
  'message': 'You have exceeded your rate limit of 40 calls per hour for the DEMO_KEY, 1000 calls per hour for a personal key, or 120 calls per minute for an upgraded key. You can either try again later, sign up for a personal key at https://api.data.gov/signup/, or email apiinfo@fec.gov to upgrade your key.'}}

In [75]:
i=0
for x in cand['committee_id']:
    print(i)
    i+=1

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17


In [57]:
## FOR LOOP TO COLLECT COMMITTEE RECORDS ##
#Initialize dataframe collector for itemized contribs
idfs=[]
commid=0

#Initialize query dictionary
itemdict = {
    'per_page':'100'
    ,'sort':'contribution_receipt_date'
    ,'api_key':fec_key
    #Committees and is_individual:false
    ,'contributor_type':'committee'
    ,'is_individual':'false'
    ,'two_year_transaction_period':cycle
    ,'last_index':[]
    ,'last_contribution_receipt_date':[]
    ,'committee_id':comm_ids[commid]
}

#Page through results for each committee id
for x in range(0,len(comm_ids)-1):
    
    #Get first itemized payload for a candidate
    r = requests.get('https://api.open.fec.gov/v1/schedules/schedule_a/',params=itemdict).json()
    
    #Last page variables
    while r['pagination']['last_indexes'] is not None:
        
        #Store results of payload
        idf = json_normalize(r['results'])
        idfs.append(idf)
                
        #Assign last_index and date values, update itemdict
        last_index=pd.to_numeric(r['pagination']['last_indexes']['last_index'])
        last_date=r['pagination']['last_indexes']['last_contribution_receipt_date']
        #Update dictionary with new indices
        itemdict.update([('last_index',last_index)
                        ,('last_contribution_receipt_date',last_date)])

        #Get next payload with updated dict
        r = requests.get('https://api.open.fec.gov/v1/schedules/schedule_a/',params=itemdict).json()
    
    commid+=1
    
    #Update dictionary with next candidate in list and reset last indices
    itemdict.update([('committee_id',comm_ids[commid])
                     ,('last_index',[])
                     ,('last_contribution_receipt_date',[])])
    
comm_df=pd.concat(idfs,sort=False,ignore_index=True)
comm_df=comm_df.drop_duplicates(subset='transaction_id')

{'per_page': '100', 'sort': 'contribution_receipt_date', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'contributor_type': 'committee', 'is_individual': 'false', 'two_year_transaction_period': '2020', 'last_index': [], 'last_contribution_receipt_date': [], 'committee_id': 'C00715532'}
{'per_page': '100', 'sort': 'contribution_receipt_date', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'contributor_type': 'committee', 'is_individual': 'false', 'two_year_transaction_period': '2020', 'last_index': [], 'last_contribution_receipt_date': [], 'committee_id': 'C00722108'}
{'per_page': '100', 'sort': 'contribution_receipt_date', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'contributor_type': 'committee', 'is_individual': 'false', 'two_year_transaction_period': '2020', 'last_index': [], 'last_contribution_receipt_date': [], 'committee_id': 'C00713958'}
{'per_page': '100', 'sort': 'contribution_receipt_date', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'co

In [85]:
list(comm_df.columns)

['election_type_full',
 'conduit_committee_street1',
 'contributor_last_name',
 'candidate_name',
 'amendment_indicator_desc',
 'contributor_street_1',
 'is_individual',
 'contributor_employer',
 'conduit_committee_id',
 'conduit_committee_city',
 'candidate_prefix',
 'conduit_committee_name',
 'contributor_street_2',
 'candidate_middle_name',
 'back_reference_schedule_name',
 'pdf_url',
 'receipt_type_full',
 'memo_code',
 'contributor_state',
 'memo_code_full',
 'memoed_subtotal',
 'sub_id',
 'committee_id',
 'two_year_transaction_period',
 'election_type',
 'candidate_first_name',
 'national_committee_nonfederal_account',
 'donor_committee_name',
 'line_number_label',
 'schedule_type',
 'line_number',
 'candidate_id',
 'candidate_suffix',
 'contributor',
 'candidate_office_state_full',
 'conduit_committee_state',
 'committee_name',
 'contribution_receipt_date',
 'receipt_type_desc',
 'fec_election_type_desc',
 'image_number',
 'fec_election_year',
 'contributor_middle_name',
 'file_

In [6]:
#ITEMIZED DATA CLEANING#
itemdf['contributor_zip'] = itemdf['contributor_zip'].str[:5]
# comm_df['contributor_zip'] = comm_df['contributor_zip'].str[5]

#Create DataFrame with columns to match itemized table
cols=itemdf.columns.values.tolist()
unitemdf=[]
unitemdf = pd.DataFrame(columns=cols)

## Select data for unitemized df ##
unitemdf[['committee.name'
        ,'committee.party_full'
        ,'committee_id'
        ,'contribution_receipt_amount'
        ,'contribution_receipt_date'
        ,'fec_election_type_desc']] = udf[['committee_name'
                                        ,'party_full'
                                        ,'committee_id'
                                        ,'individual_unitemized_contributions'
                                        ,'coverage_end_date'
                                        ,'last_report_type_full']]

#Label as unitemized
unitemdf['contributor_name'] = 'Unitemized individual contributions'
unitemdf['entity_type'] = 'IND'

#Union Itemized and Unitemized contributions
ind_df = pd.concat([itemdf,unitemdf,comm_df],sort=False,ignore_index=True)
ind_df['contribution_receipt_date'] = ind_df['contribution_receipt_date'].str.split('T', expand=True)[0]

In [23]:
## WRITE OUT INDIVIDUAL DONATION FILES ##
#Write full files out to data.world project

##Test if results are longer than current file. If so, write.
results = dw.query('darrenfishell/2020-election-repo', 'SELECT * FROM individual_congressional_contributions')
if len(results.dataframe) < len(ind_df):
    with dw.open_remote_file('darrenfishell/2020-election-repo','individual-congressional-contributions.csv') as w:
        ind_df.to_csv(w,index=False)

##Tests if contribution sum is greater than old file. If so, write.        
results = dw.query('darrenfishell/2020-election-repo', 'SELECT * FROM congress_financial_summaries')
if sum(results.dataframe['receipts']) < len(udf['receipts']):
    with dw.open_remote_file('darrenfishell/2020-election-repo','congress_financial_summaries.csv') as w:
        udf.to_csv(w,index=False)

In [17]:
sum(udf['receipts'])

11626577.79

In [6]:
### INDEPENDENT EXPENDITURE RETRIEVAL ###
## SENATE CANDIDATE ID SEARCH ##
cand_ids=cand['candidate_id']

#Declare loop variables
candid=0
iedict = {
    'per_page':'100'
    ,'api_key':fec_key
    ,'cycle':cycle
    ,'last_index':[]
    ,'last_expenditure_date':[]
    ,'candidate_id':cand_ids[candid]
}
edfs = []

#Page through results for each committee id
for x in range(0,len(cand_ids)-1):
    
    #Get first itemized payload for a candidate
    ier = requests.get('https://api.open.fec.gov/v1/schedules/schedule_e/',params=iedict).json()
    
    #Last page variables
    while ier['pagination']['last_indexes'] is not None:
        
        #Store results of payload
        edf = json_normalize(ier['results'])
        edfs.append(edf)
                
        #Assign last_index and date values, update itemdict
        last_index=ier['pagination']['last_indexes']['last_index']
        last_date=ier['pagination']['last_indexes']['last_expenditure_date']
        #Update dictionary with new indices
        iedict.update([('last_index',last_index)
                        ,('last_expenditure_date',last_date)])

        #Get next payload with updated dict
        ier = requests.get('https://api.open.fec.gov/v1/schedules/schedule_e/',params=iedict).json()
    
    candid+=1
    
    #Update dictionary with next candidate in list and reset last indices
    iedict.update([('candidate_id',cand_ids[candid])
                    ,('last_index',[])
                    ,('last_expenditure_date',[])])

edf=pd.concat(edfs,sort=False,ignore_index=True)
edf=edf.drop_duplicates(subset='transaction_id')

#Clean up ZIP codes
edf['committee.zip'] = edf['committee.zip'].str[:5]
edf['expenditure_date'] = edf['expenditure_date'].str.split('T', expand=True)[0]

#Write out files to data.world
results = dw.query('darrenfishell/2020-election-repo', 'SELECT * FROM congress_independent_expenditures')
if len(results.dataframe) < len(edf):
    with dw.open_remote_file('darrenfishell/2020-election-repo','congress-independent-expenditures.csv') as w:
        edf.to_csv(w,index=False)

In [10]:
## COORDINATED POLITICAL SPENDING RETRIEVAL ## 
#Declare loop variables
candid=0
i=1
pdict = {
    'per_page':'100'
    ,'api_key':fec_key
    ,'two_year_transaction_period':cycle
    ,'page':i
    ,'candidate_id':cand_ids[candid]
}
pdfs = []

#Page through results for each committee id
for x in range(0,len(cand_ids)-1):
    
    #Get first itemized payload for a candidate
    p_r = requests.get('https://api.open.fec.gov/v1/schedules/schedule_f/',params=pdict).json()
    
    #Last page variables
    while p_r['pagination']['page']<=p_r['pagination']['pages']:
        
        #Store results of payload
        pdf = json_normalize(p_r['results'])
        pdfs.append(pdf)
                
        #Increment and update page
        i+=1
        pdict.update([('page',i)])

        #Get next payload with updated dict
        p_r = requests.get('https://api.open.fec.gov/v1/schedules/schedule_f/',params=pdict).json()
    
    candid+=1
    
    #Update dictionary with next candidate in list and reset last indices
    pdict.update([('candidate_id',cand_ids[candid])
                    ,('page',1)])

pdf=pd.concat(pdfs,sort=False,ignore_index=True)
pdf=pdf.drop_duplicates(subset='transaction_id')
pdf['committee.zip'] = pdf['committee.zip'].str[:5]

#Write out file to data.world
results = dw.query('darrenfishell/2020-election-repo', 'SELECT * FROM congress_party_coordinated_expenditures')

if len(results.dataframe) < len(pdf):
    with dw.open_remote_file('darrenfishell/2020-election-repo','congress-party-coordinated-expenditures.csv') as w:
        pdf.to_csv(w,index=False)

{'per_page': '100', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'two_year_transaction_period': '2020', 'page': 1, 'candidate_id': 'H0ME01119'}
{'per_page': '100', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'two_year_transaction_period': '2020', 'page': 1, 'candidate_id': 'H0ME02075'}
{'per_page': '100', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'two_year_transaction_period': '2020', 'page': 1, 'candidate_id': 'H0ME02067'}
{'per_page': '100', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'two_year_transaction_period': '2020', 'page': 1, 'candidate_id': 'S0ME00061'}
{'per_page': '100', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'two_year_transaction_period': '2020', 'page': 1, 'candidate_id': 'S6ME00159'}
{'per_page': '100', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'two_year_transaction_period': '2020', 'page': 1, 'candidate_id': 'H0ME02083'}
{'per_page': '100', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', '