In [1]:
import json
from crpapi import CRP

In [2]:
def create_crp_object(json_file='open_secrets_config.json',json_key='open_secrets_info'):
    with open(json_file) as f:
        data = json.load(f)
    
    user_values = data[json_key]
    crp = CRP(user_values['api_key'])
    return crp

In [3]:
crp = create_crp_object()

In [7]:
# get all basic state legislator information
states = [ \
    'AL','AK','AZ','AR','CA','CO','CT','DE','DC','FL','GA','HI','ID','IL',\
          'IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT',\
          'NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI',\
          'SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY']

legislators = []
for state in states:
    legislators.append(crp.candidates.get(state))

In [12]:
import pandas as pd

In [44]:
df_legislators = pd.DataFrame(columns = legislators[0][0]['@attributes'].keys())

In [45]:
# add basic legislator data into dataframe
for state in legislators:
    for candidate in state:
        if type(candidate)==dict:
            df_legislators = df_legislators.append(candidate['@attributes'],ignore_index=True)

In [49]:
df_legislators.cid[0]

'N00035380'

In [51]:
# get memPFDprofiles for each legislator in df_legislators
mem_pfd_profiles = []
for cid in df_legislators.cid:
    try:
        mem_pfd_profiles.append(crp.candidates.pfd(cid))
    except:
        print('error with legislator id {}'.format(cid))
            

error with legislator id N00035187
error with legislator id N00009918


In [77]:
df_legislators.head(1)

Unnamed: 0,cid,firstlast,lastname,party,office,gender,first_elected,exit_code,comments,phone,...,website,webform,congress_office,bioguide_id,votesmart_id,feccandid,twitter_id,youtube_url,facebook_id,birthdate
0,N00035380,Bradley Byrne,BYRNE,R,AL01,M,2013,20,Lost Senate primary,202-225-4931,...,https://byrne.house.gov,https://byrne.house.gov/contact/email-me,119 Cannon House Office Building,B001289,27584,H4AL01123,RepByrne,,RepByrne,1955-02-16


In [79]:
# get candContrib for each legislator in df_legislators
import re

cand_contrib = []
sen_years = ['2012', '2014', '2016', '2018', '2020']
rep_years = ['2018', '2020']
last_cid = None
last_year = None
for ix,cid in enumerate(df_legislators.cid):
    is_senate = re.search("S[12]", df_legislators.iloc[ix]['office'])
    if is_senate:
        years = sen_years
    else:
        years = rep_years
        
    for year in years:
        try:
            cand_contrib.append(crp.candidates.contrib(cid,cycle=year))
        except:
            print('error with legislator id {}, year {}'.format(cid,year))
            
        last_year = year
    
    last_cid = cid

error with legislator id N00035380, year 2020
error with legislator id N00024817, year 2012
error with legislator id N00024817, year 2014
error with legislator id N00024817, year 2016
error with legislator id N00035774, year 2012
error with legislator id N00044298, year 2018
error with legislator id N00036915, year 2012
error with legislator id N00036915, year 2014
error with legislator id N00030829, year 2020
error with legislator id N00028133, year 2018
error with legislator id N00028133, year 2020
error with legislator id N00040876, year 2018
error with legislator id N00040876, year 2020
error with legislator id N00027510, year 2018
error with legislator id N00027510, year 2020
error with legislator id N00030608, year 2012
error with legislator id N00030608, year 2014
error with legislator id N00030608, year 2016
error with legislator id N00030608, year 2018
error with legislator id N00030608, year 2020
error with legislator id N00030780, year 2012
error with legislator id N00030780

In [87]:
cand_contrib[-1][0]['@attributes']['cid'] = 'foo'
cand_contrib[-1][0]['@attributes']

{'org_name': 'DISH Network',
 'total': '13500',
 'pacs': '0',
 'indivs': '13500',
 'cid': 'foo'}

In [89]:
df_legislators.head(1)

Unnamed: 0,cid,firstlast,lastname,party,office,gender,first_elected,exit_code,comments,phone,...,website,webform,congress_office,bioguide_id,votesmart_id,feccandid,twitter_id,youtube_url,facebook_id,birthdate
0,N00035380,Bradley Byrne,BYRNE,R,AL01,M,2013,20,Lost Senate primary,202-225-4931,...,https://byrne.house.gov,https://byrne.house.gov/contact/email-me,119 Cannon House Office Building,B001289,27584,H4AL01123,RepByrne,,RepByrne,1955-02-16


In [90]:
crp.candidates.summary(df_legislators.iloc[0]['cid'],cycle='2018')

{'cand_name': 'Byrne, Bradley',
 'cid': 'N00035380',
 'cycle': '2018',
 'state': 'AL',
 'party': 'R',
 'chamber': 'H',
 'first_elected': '2013',
 'next_election': '2018',
 'total': '1460040.82',
 'spent': '831634.13',
 'cash_on_hand': '1074725.02',
 'debt': '0',
 'origin': 'Center for Responsive Politics',
 'source': 'https://www.opensecrets.org/members-of-congress/summary?cid=N00035380&cycle=2018',
 'last_updated': '12/31/2018'}

In [103]:
# define method to get generalized data for each legislator in df_legislators
def get_crp_cand_method_data(df,meth,current_calls = 0, sen_years=['2012','2014','2016','2018','2020'],rep_years=['2018','2020'],last_cid=None,last_year=None):
    max_calls_dict = {'pfd': 2000, 'summary': 2000, 'contrib': 200, 'industries': 200, 'contrib_by_ind': 2000, 'sector': 200}
    max_calls = max_calls_dict[meth]
    
    cand_list = []
    
    # remove all prior cids if last_cid given
    keep_going = True
    if last_cid is not None:
        try:
            ix = df['cid'].to_list().index(last_cid)
            df = df.iloc[ix:]
        except: # just start from beginning if last_cid not in df
            print('last_cid {} not in df. starting from beginning...'.format(last_cid))
            last_cid = df.iloc[0]['cid']
        
        #  continue last_cid from last_year to last possible year
        is_senate = re.search("S[12]", df.iloc[0]['office'])
        if is_senate:
            if is_senate:
                years = sen_years
            else:
                years = rep_years

        if last_year is not None:
            years = years[years.index(last_year):]

        for year in years:
            if current_calls == max_calls:
                keep_going = False
                break
            try:
                cand_list.append(getattr(crp.candidates,meth)(last_cid,cycle=year))
                current_calls += 1
                cand_contrib[-1][0]['@attributes']['cid'] = last_cid
                cand_contrib[-1][0]['@attributes']['cycle'] = year
            except:
                print('error with legislator id {}, year {}'.format(cid,year))

            last_year = year

        # if reached limit already, return
        if keep_going is False:
            return cand_list, last_cid, last_year

        # else, continue through cids in df
        # remove last_cid (completed above) from df
        df = df.iloc[1:]
        
    for ix,cid in enumerate(df.cid):
        if keep_going is False:
            break
        is_senate = re.search("S[12]", df.iloc[ix]['office'])
        if is_senate:
            years = sen_years
        else:
            years = rep_years

        for year in years:
            if current_calls == max_calls:
                keep_going = False
                break
            try:
                cand_list.append(getattr(crp.candidates,meth)(cid,cycle=year))
                current_calls += 1
                
                cand_contrib[-1][0]['@attributes']['cid'] = cid
                cand_contrib[-1][0]['@attributes']['cycle'] = year
            except:
                print('error with legislator id {}, year {}'.format(cid,year))

            last_year = year

        last_cid = cid

    return cand_list, last_cid, last_year

In [104]:
# get candSummary (summary) for each legislator in df_legislators
meth = 'summary'
cand_sum, last_cid_sum, last_year_sum = get_crp_cand_method_data(df_legislators,meth=meth,current_calls=16)

error with legislator id N00024817, year 2012
error with legislator id N00024817, year 2014
error with legislator id N00024817, year 2016
error with legislator id N00035774, year 2012
error with legislator id N00040644, year 2020
error with legislator id N00044298, year 2018
error with legislator id N00036915, year 2012
error with legislator id N00036915, year 2014
error with legislator id N00043290, year 2012
error with legislator id N00043290, year 2014
error with legislator id N00043290, year 2016
error with legislator id N00035516, year 2012
error with legislator id N00046125, year 2012
error with legislator id N00046125, year 2014
error with legislator id N00046125, year 2016
error with legislator id N00046125, year 2018
error with legislator id N00028138, year 2012
error with legislator id N00041731, year 2012
error with legislator id N00041731, year 2014
error with legislator id N00041731, year 2016
error with legislator id N00035483, year 2012
error with legislator id N00001799

In [105]:
import pickle
mem_pfd_cid_errors = ['N00035187','N00009918']
data_to_save = (df_legislators,(mem_pfd_profiles,mem_pfd_cid_errors),(cand_sum, last_cid_sum, last_year_sum))
with open('20200621_fetched_api_data_opensecrets.pickle', 'wb') as to_write:
    pickle.dump(data_to_save, to_write)

In [None]:
##################################################################################################################
##################################################################################################################
##################################################################################################################

In [None]:
# get candIndustry (industries) for each legislator in df_legislators
meth = 'industries'
cand_ind, last_cid_ind, last_year_ind = get_crp_cand_method_data(df_legislators,meth=meth)

In [None]:
# get candIndByInd (contrib_by_ind)for each legislator in df_legislators
meth = 'contrib_by_ind'
cand_ind_by_ind, last_cid_ind_by_ind, last_year_ind_by_ind = get_crp_cand_method_data(df_legislators,meth=meth)

In [None]:
# get candSector (sector) for each legislator in df_legislators
meth = 'sector'
cand_ind_sector, last_cid_sector, last_year_sector = get_crp_cand_method_data(df_legislators,meth=meth)

In [None]:
# get candContrib for each legislator in df_legislators
meth = 'contrib'
cand_contrib, last_cid_contrib, last_year_contrib = get_crp_cand_method_data(df_legislators,meth=meth)

In [None]:
# get congCmteIndus for each legislator in df_legislators
meth = 'contrib_by_ind'
cand_ind_by_ind, last_cid_ind_by_ind, last_year_ind_by_ind = get_crp_cand_method_data(df_legislators,meth=meth)

In [None]:
# get getOrgs for each legislator in df_legislators
meth = 'contrib_by_ind'
cand_ind_by_ind, last_cid_ind_by_ind, last_year_ind_by_ind = get_crp_cand_method_data(df_legislators,meth=meth)

In [None]:
# get orgSummary for each legislator in df_legislators
meth = 'contrib_by_ind'
cand_ind_by_ind, last_cid_ind_by_ind, last_year_ind_by_ind = get_crp_cand_method_data(df_legislators,meth=meth)

In [None]:
# get independentExpend for each legislator in df_legislators


<crpapi.CRP object at 0x11d268cc0>
