In [1]:
import numpy as np
import pandas as pd
import requests
import json
import os 

In [2]:
opensecretsapi = os.getenv('opensecretsapi')
r = requests.get('https://httpbin.org/user-agent')
useragent = json.loads(r.text)['user-agent']

In [8]:
headers = {'user_agent': useragent,
            'From': 'ean8fr@virginia.edu'}

 * Voteview:
   * Votes: https://voteview.com/articles/data_help_votes
   * Ideology: https://voteview.com/articles/data_help_members
 * Open Secrets:
   * Contributions: https://www.opensecrets.org/api/?method=candContrib&output=doc
   * Candidate info: https://www.opensecrets.org/api/?method=getLegislators&output=doc
 * Congress API:
   * Member info: https://api.congress.gov/#/member/member_list
   * Bill info (specifically, who sponsored the bill): https://api.congress.gov/#/bill/bill_details

## Problem 1
For each, identify what features in the data identify a member of Congress. How can we use the ids to match data about the same member of congress acorss all 6 datasets?

* Voteview:
    * Votes:
        * icpsr
    * Ideology:
        * icpsr, bioguide ID
* Open Secrets:
    * Contributions:
        * CID
    * Candidate info:
        * CID, bioguide_id
* Congress API
    * Member
        * bioguide_id
    * Bill Info
        * sponsor -> bioguide_id
 

## Problem 2
Collect all the data from OpenSecretsAPI. Loop over hte states, pull all the data from getLegislators in JSON. Convert the JSON to a pd dataframe.

In [5]:
states = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']

In [46]:
results = pd.DataFrame()
for i in range(len(states)):
    root = 'http://www.opensecrets.org/api/'
    parameters = {'method': 'getLegislators',
                 'apikey': opensecretsapi,
                 'id': states[i],
                 'output': 'json'}
    r = requests.get(root, 
                     params = parameters, 
                     headers = headers)
    
    result = json.loads(r.text)
    result_list = []
    for h in range(len(result['response']['legislator'])):
        dictionary = result['response']['legislator'][h]['@attributes']
        result_list.append(dictionary)
    result = pd.DataFrame.from_dict(result_list)
    print(states[i])
    results = pd.concat([results, result])

AK
AL
AR
AZ
CA
CO
CT
DE
FL
GA
HI
IA
ID
IL
IN
KS
KY
LA
MA
MD
ME
MI
MN
MO
MS
MT
NC
ND
NE
NH
NJ
NM
NV
NY
OH
OK
OR
PA
RI
SC
SD
TN
TX
UT
VA
VT
WA
WI
WV
WY


## Problem 3: Bob Good's Top Contributors

In [56]:
cid = results.query("lastname == 'Good'")['cid']
cid[4]

'N00045557'

In [70]:
# Use bob to obtain the top contribtuors to Bob Good's campaign using candContrib API
root = 'http://www.opensecrets.org/api/'
parameters = {'method': 'candContrib',
                 'apikey': opensecretsapi,
                 'cid': cid[4],
                  'cycle': '2022',
                 'output': 'json'}
r = requests.get(root,
                params = parameters,
                headers = headers)
r

<Response [200]>

In [71]:
good_contributers = json.loads(r.text)

In [72]:
good_contributers = good_contributers['response']['contributors']['contributor']
dictionaries = []
for h in range(len(good_contributers)):
    result = good_contributers[h]['@attributes']
    dictionaries.append(result)
good_df = pd.DataFrame.from_dict(dictionaries)

In [79]:
good_df

Unnamed: 0,org_name,total,pacs,indivs
0,House Freedom Fund,33200,10000,23200
1,Rst Marketing,13500,0,13500
2,Dimensional Fund Advisors,11600,0,11600
3,Kinsale Capital Group,11600,0,11600
4,Marcus Foundation,11600,0,11600
5,Western National Group,11600,0,11600
6,Majority Cmte PAC,10000,10000,0
7,Prescott Investors,9108,0,9108
8,Station Casinos,8324,0,8324
9,Gun Owners of America,7500,7500,0
