## Maine Senate overview script

This file pulls in individual contributions to all Senate committees as well as independent expenditures to support or oppose the candidates _and_ party-coordinated expenditures to support or oppose the candidates.

The data fuels a dashboard that contains an overview of the race so far. It also provides the information to reconcile itemized contribution data with campaign totals available from the FEC.

For Susan Collins, there is potentially significant variance between these sources and the cause is not yet clear.

In [2]:
import numpy as np
import pandas as pd
import requests
import config
import os
import json
import pygsheets
from pandas.io.json import json_normalize
import time

##Schedule_a API guide: https://api.open.fec.gov/developers/#/receipts/get_schedules_schedule_a_˜

In [97]:
### INDIVIDUAL DONATIONS TO MAINE SENATE CAMPAIGNS ###

## SENATE COMMITTEE SEARCH ##
#Set search for all 2020 (two-year transaction period) Maine Senate candidates
cand_state = 'ME'
period='2020'
parameters = {'election_year':period
            ,'state':cand_state
            ,'office':'S'
            ,'api_key':config.api_key}

#Requests candidate info
r_cands = requests.get('https://api.open.fec.gov/v1/candidates/search',params=parameters).json()

#Locates and sets Committee ID from 'principal_committees' sub-array
#Output list of IDs
comm_ids=json_normalize(data=r_cands['results'],record_path='principal_committees')['committee_id'].tolist()

## FOR LOOP TO COLLECT CONTRIBUTION RECORDS ##

#Initialize dataframe collector for itemized contribs
cycle='2020'
idfs=[]
udfs=[]
commid=0

#Initialize query dictionary
itemdict = {
    'per_page':'100'
    ,'sort':'contribution_receipt_date'
    ,'api_key':config.api_key
    ,'is_individual':'true'
    ,'two_year_transaction_period':cycle
    ,'last_index':[]
    ,'last_contribution_receipt_date':[]
    ,'committee_id':comm_ids[commid]
}

#Dict for unitemized contributions
unitemdict = {
'api_key':config.api_key
,'cycle':cycle
,'per_page':'100'
,'committee_id':comm_ids[commid]
}

#Page through results for each committee id
for x in comm_ids):
    
    u_r = requests.get('https://api.open.fec.gov/v1/committee/'+comm_ids[commid]+'/totals',params=unitemdict).json()
    udf = json_normalize(u_r['results'])
    udfs.append(udf)
    
    #Get first itemized payload for a candidate
    itemr = requests.get('https://api.open.fec.gov/v1/schedules/schedule_a/',params=itemdict).json()
    
    #Print itemdict to validate
    print(itemdict)
    
    #Last page variables
    while itemr['pagination']['last_indexes'] is not None:
        
        #Store results of payload
        idf = json_normalize(itemr['results'])
        idfs.append(idf)
                
        #Assign last_index and date values, update itemdict
        last_index=pd.to_numeric(itemr['pagination']['last_indexes']['last_index'])
        last_date=itemr['pagination']['last_indexes']['last_contribution_receipt_date']
        #Update dictionary with new indices
        itemdict.update([('last_index',last_index)
                        ,('last_contribution_receipt_date',last_date)])

        #Get next payload with updated dict
        itemr = requests.get('https://api.open.fec.gov/v1/schedules/schedule_a/',params=itemdict).json()
    
    commid+=1
    
    #Update dictionary with next candidate in list and reset last indices
    itemdict.update([('committee_id',comm_ids[commid])
                     ,('last_index',[])
                     ,('last_contribution_receipt_date',[])])
    
    unitemdict.update([('committee_id',comm_ids[commid])])
    
# Concatenate all dfs
itemdf=pd.concat(idfs,sort=False,ignore_index=True)
itemdf=itemdf.drop_duplicates(subset='transaction_id')
udf=pd.concat(udfs,sort=False,ignore_index=True)
udf=udf.drop_duplicates()


#ITEMIZED DATA CLEANING#
itemdf['contributor_zip'] = itemdf['contributor_zip'].str[:5]

#Table slimming
cols = [
    'committee.name'
    ,'committee.party_full'
    ,'contribution_receipt_amount'
    ,'contribution_receipt_date'
    ,'entity_type'
    ,'contributor_city'
    ,'contributor_state'
    ,'contributor_street_1'
    ,'contributor_zip'
    ,'contributor_employer'
    ,'contributor_name'
    ,'contributor_occupation'
    ,'fec_election_type_desc'
    ,'load_date'
    ,'pdf_url'
    ,'transaction_id'
]

itemdf=itemdf[cols]
itemdf


#Create DataFrame with columns to match itemized table
unitemdf=[]
unitemdf = pd.DataFrame(columns=cols)

## Select data for unitemized df ##
unitemdf[['committee.name'
        ,'committee.party_full'
        ,'contribution_receipt_amount'
        ,'contribution_receipt_date'
        ,'fec_election_type_desc']] = udf[['committee_name'
                                        ,'party_full'
                                        ,'individual_unitemized_contributions'
                                        ,'coverage_end_date'
                                        ,'last_report_type_full']]

#Label as unitemized
unitemdf['contributor_name'] = 'Unitemized individual contributions'
unitemdf['entity_type'] = 'IND'

#Union Itemized and Unitemized contributions
ind_df = pd.concat([itemdf,unitemdf],sort=False,ignore_index=True)

#Write itemized individual results to local CSV
cwd = os.getcwd()
ind_df.to_csv(cwd+'/data/individual-senate-contributions.csv')

#Write out summary file from totals endpoint
udf.to_csv(cwd+'/senate-financial-summary.csv')

# #QA - compare itemized values to totals
# qadf = itemdf.groupby(['committee.name']).agg({'contribution_receipt_amount':['sum']})
# qadf

# totaldf = udf[['committee_name'
#              ,'individual_itemized_contributions'
#              ,'individual_unitemized_contributions']]

# qadf = qadf.merge(totaldf, left_on='committee.name',right_on='committee_name')

{'per_page': '100', 'sort': 'contribution_receipt_date', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'is_individual': 'true', 'two_year_transaction_period': '2020', 'last_index': [], 'last_contribution_receipt_date': [], 'committee_id': 'C00710087'}
{'per_page': '100', 'sort': 'contribution_receipt_date', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'is_individual': 'true', 'two_year_transaction_period': '2020', 'last_index': [], 'last_contribution_receipt_date': [], 'committee_id': 'C00314575'}
{'per_page': '100', 'sort': 'contribution_receipt_date', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'is_individual': 'true', 'two_year_transaction_period': '2020', 'last_index': [], 'last_contribution_receipt_date': [], 'committee_id': 'C00709899'}
{'per_page': '100', 'sort': 'contribution_receipt_date', 'api_key': 'egxSs7endLz5xMuoprm5zfVZCeoyeZbO5D6HFzJz', 'is_individual': 'true', 'two_year_transaction_period': '2020', 'last_index': [], 'last_contribution_rece

IndexError: list index out of range

In [135]:
## SENATE CANDIDATE ID SEARCH ##
#Set search for all 2020 (two-year transaction period) Maine Senate candidates
cand_params = {'election_year':period
            ,'state':cand_state
            ,'office':'S'
            ,'api_key':config.api_key}

#Requests candidate info
cand_r = requests.get('https://api.open.fec.gov/v1/candidates/search',params=cand_params).json()

#Locates and sets Committee ID from 'principal_committees' sub-array
#Output list of IDs
cand_ids=json_normalize(data=cand_r['results'],record_path='principal_committees')['candidate_id'].tolist()

### INDEPENDENT EXPENDITURE RETRIEVAL ###
#Reset committee ID
candid=0

iedict = {
    'per_page':'100'
    ,'api_key':config.api_key
    ,'is_individual':'true'
    ,'two_year_transaction_period':cycle
    ,'last_index':[]
    ,'last_disbursement_date':[]
    ,'candidate_id':cand_ids[candid]
}

#Page through results for each committee id
for x in comm_ids:
    
    #Get first itemized payload for a candidate
    ier = requests.get('https://api.open.fec.gov/v1/schedules/schedule_e/',params=iedict).json()
    
    #Print itemdict to validate
    print(iedict)
    
    #Last page variables
    while ier['pagination']['last_indexes'] is not None:
        
        #Store results of payload
        edf = json_normalize(ier['results'])
        edfs.append(edf)
                
        #Assign last_index and date values, update itemdict
        last_index=pd.to_numeric(ier['pagination']['last_indexes']['last_index'])
        last_date=ier['pagination']['last_indexes']['last_disbursement_date']
        #Update dictionary with new indices
        iedict.update([('last_index',last_index)
                        ,('last_contribution_receipt_date',last_date)])

        #Get next payload with updated dict
        ier = requests.get('https://api.open.fec.gov/v1/schedules/schedule_e/',params=itemdict).json()
    
    commid+=1
    
    #Update dictionary with next candidate in list and reset last indices
    iedict.update([('committee_id',comm_ids[commid])
                     ,('last_index',[])
                     ,('last_disbursement_date',[])])

edf=pd.concat(edfs,sort=False,ignore_index=True)
edf=edf.drop_duplicates()

edf

KeyError: 'candidate_id'

In [149]:
## SENATE CANDIDATE ID SEARCH ##
cand_ids=json_normalize(data=r_cands['results'],record_path='principal_committees')['committee_id'].tolist()
cand_ids

#Output list of IDs
# cand_ids=json_normalize(data=cand_r['results']['candidate_id'].tolist())

['C00710087',
 'C00314575',
 'C00709899',
 'C00703413',
 'C00706739',
 'C00683193',
 'C00723411',
 'C00709279',
 'C00712323',
 'C00694364']

In [40]:
# #Google Credentials
# gc = pygsheets.authorize(service_file=cwd+'/../me-congress-2020-creds.json')

# #Select sheet and worksheet
# sh = gc.open('maine-senate-2020')
# # sh = gc.open_by_key('1AKrgHT9NLpoddV16B7_M_0PEjJmMQAGtXJUnLCTDHjA')
# wks = sh[0]

# #Clear sheet before load
# wks.clear(start='A1',fields='*')

# #Write contribs dataframe to sheet
# wks.set_dataframe(df_cull,(1,1))

HttpError: <HttpError 400 when requesting https://sheets.googleapis.com/v4/spreadsheets/12bHf1qEtKtGGje0a3lBJJ8r-IvYcs3yl5jxbP6jNMCo/values/Individual%20itemized%21A3334%3AP6667?valueInputOption=USER_ENTERED&alt=json returned "Range ('Individual itemized'!A3334:P6667) exceeds grid limits. Max rows: 3333, max columns: 120">