# Compiling all data sources

In [1]:
import numpy as np
import pandas as pd
import requests
import json
import os
import time
opensecretsapi = os.getenv('opensecretsapi')
congressapi = os.getenv('congressapi')
r = requests.get('https://httpbin.org/user-agent')
useragent = json.loads(r.text)['user-agent']

## OpenSecrets: Members

In [2]:
states = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']

In [3]:
opensecrets_members = pd.DataFrame()
for s in states:
    #print(s)
    root = 'https://www.opensecrets.org/api/'
    headers = {'User-Agent': useragent,
              'From': 'jkropko@virginia.edu'}
    params = {'method': 'getLegislators',
              'id': s,
              'output': 'json',
              'apikey':opensecretsapi}
    r = requests.get(root, params=params, headers=headers)
    myjson = json.loads(r.text)
    leg = pd.json_normalize(myjson, record_path=['response', 'legislator'])
    opensecrets_members = pd.concat([opensecrets_members, leg])

In [4]:
opensecrets_members.columns = opensecrets_members.columns.str.replace('@attributes.', '')
opensecrets_members.to_csv('Data/opensecrets_members.csv', index=False)

## https://theunitedstates.io/: Members

In [5]:
url = 'https://theunitedstates.io/congress-legislators/legislators-current.json'
r = requests.get(url, headers=headers)
myjson = json.loads(r.text)
usio_members = pd.json_normalize(myjson)
usio_members.columns = usio_members.columns.str.replace('id.', '')
usio_members.to_csv('Data/usio_members.csv', index=False)

## Congress API: members

In [8]:
root = 'https://api.congress.gov'
params = {'api_key': congressapi}

bio_ids = usio_members['bioguide']
congress_members = pd.DataFrame()
i = 0
for b in bio_ids:
    time.sleep(5)
    if (i % 10)==0:
        print(f'Now working on member {i} of {len(bio_ids)}')
    try:
        if b != '':
            endpoint = f'/v3/member/{b}'
            r = requests.get(root + endpoint, params=params, headers=headers)
            myjson = json.loads(r.text)
            mem = pd.json_normalize(myjson['member'])
            congress_members = pd.concat([congress_members, mem])
    except:
        pass
    i += 1

Now working on member 0 of 539
Now working on member 10 of 539
Now working on member 20 of 539
Now working on member 30 of 539
Now working on member 40 of 539
Now working on member 50 of 539
Now working on member 60 of 539
Now working on member 70 of 539
Now working on member 80 of 539
Now working on member 90 of 539
Now working on member 100 of 539
Now working on member 110 of 539
Now working on member 120 of 539
Now working on member 130 of 539
Now working on member 140 of 539
Now working on member 150 of 539
Now working on member 160 of 539
Now working on member 170 of 539
Now working on member 180 of 539
Now working on member 190 of 539
Now working on member 200 of 539
Now working on member 210 of 539
Now working on member 220 of 539
Now working on member 230 of 539
Now working on member 240 of 539
Now working on member 250 of 539
Now working on member 260 of 539
Now working on member 270 of 539
Now working on member 280 of 539
Now working on member 290 of 539
Now working on member

In [73]:
congress_members.to_csv('Data/congress_members.csv', index=False)

## Congress API: sponsored legislation

In [15]:
congress_members = pd.read_csv('Data/congress_members.csv')

In [31]:
sponsor = pd.DataFrame()
params = {'api_key': congressapi,
         'limit': 250}
i = 1
l = len(congress_members['sponsoredLegislation.url'])
for u in congress_members['sponsoredLegislation.url']:
    time.sleep(5)
    print(f'Now compiling bills for member {i} of {l}')
    try:
        r = requests.get(url, headers=headers, params=params)
        myjson = json.loads(r.text)
        spons = pd.json_normalize(myjson, record_path=['sponsoredLegislation'])
        spons = spons.query("congress==118")
        sponsor = pd.concat([sponsor, spons])
    except:
        pass
    i += 1

Now compiling bills for member 1 of 528
Now compiling bills for member 2 of 528
Now compiling bills for member 3 of 528
Now compiling bills for member 4 of 528
Now compiling bills for member 5 of 528
Now compiling bills for member 6 of 528
Now compiling bills for member 7 of 528
Now compiling bills for member 8 of 528
Now compiling bills for member 9 of 528
Now compiling bills for member 10 of 528
Now compiling bills for member 11 of 528
Now compiling bills for member 12 of 528
Now compiling bills for member 13 of 528
Now compiling bills for member 14 of 528
Now compiling bills for member 15 of 528
Now compiling bills for member 16 of 528
Now compiling bills for member 17 of 528
Now compiling bills for member 18 of 528
Now compiling bills for member 19 of 528
Now compiling bills for member 20 of 528
Now compiling bills for member 21 of 528
Now compiling bills for member 22 of 528
Now compiling bills for member 23 of 528
Now compiling bills for member 24 of 528
Now compiling bills for m

In [74]:
sponsor.to_csv('Data/bills.csv', index=False)

## Congress API: bills

## Congress API: committees/subcommittees

In [50]:
params = {'api_key': congressapi,
         'limit': 250,
         'offset': 0}
url = 'https://api.congress.gov/v3/committee'
r = requests.get(url, headers=headers,
                params=params)
myjson = json.loads(r.text)

In [51]:
committees = pd.json_normalize(myjson, record_path=['committees'])
committees

Unnamed: 0,chamber,committeeTypeCode,name,systemCode,url,parent.name,parent.systemCode,parent.url,subcommittees
0,House,Other,Bicentenary Committee,hcza00,https://api.congress.gov/v3/committee/house/hc...,,,,
1,House,Standing,Energy (Ad Hoc) Committee,hhah00,https://api.congress.gov/v3/committee/house/hh...,,,,
2,House,Select,U.S. Role in Iranian Arms Committee,hlbz00,https://api.congress.gov/v3/committee/house/hl...,,,,
3,House,Select,"Children, Youth, and Families (Select) Committee",hlcf00,https://api.congress.gov/v3/committee/house/hl...,,,,
4,House,Select,Select Committee on the Climate Crisis,hlcn00,https://api.congress.gov/v3/committee/house/hl...,,,,
...,...,...,...,...,...,...,...,...,...
245,House,Standing,"Innovation, Data, and Commerce Subcommittee",hsif17,https://api.congress.gov/v3/committee/house/hs...,Energy and Commerce Committee,hsif00,https://api.congress.gov/v3/committee/house/hs...,
246,House,Standing,"Environment, Manufacturing, and Critical Mater...",hsif18,https://api.congress.gov/v3/committee/house/hs...,Energy and Commerce Committee,hsif00,https://api.congress.gov/v3/committee/house/hs...,
247,House,Standing,Natural Resources Committee,hsii00,https://api.congress.gov/v3/committee/house/hs...,,,,[{'name': 'National Parks and Public Lands Sub...
248,House,Standing,National Parks and Public Lands Subcommittee,hsii01,https://api.congress.gov/v3/committee/house/hs...,Natural Resources Committee,hsii00,https://api.congress.gov/v3/committee/house/hs...,


In [52]:
params = {'api_key': congressapi,
         'limit': 250,
         'offset': 250}
r = requests.get(url, headers=headers,
                params=params)
myjson = json.loads(r.text)
com2 = pd.json_normalize(myjson, record_path=['committees'])
committees = pd.concat([committees, com2])

In [53]:
params = {'api_key': congressapi,
         'limit': 250,
         'offset': 500}
r = requests.get(url, headers=headers,
                params=params)
myjson = json.loads(r.text)
com2 = pd.json_normalize(myjson, record_path=['committees'])
committees = pd.concat([committees, com2])

In [75]:
committees.to_csv("Data/committees.csv", index=False)

## https://theunitedstates.io/: committee assignments

In [76]:
url = 'https://theunitedstates.io/congress-legislators/committee-membership-current.json'
r = requests.get(url, headers=headers)
myjson = json.loads(r.text)
committee_members = pd.concat({k: pd.DataFrame(v) for k, v in myjson.items()}, axis=0).reset_index()
committee_members = committee_members[['level_0', 'rank', 'title', 'bioguide']]
committee_members = committee_members.rename({'level_0': 'committee_code'}, axis=1)
committee_members.to_csv('Data/committee_members.csv', index=False)

## OpenSecrets API: members

In [72]:
states = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']
legislators = pd.DataFrame()
for s in states:
    print(s)
    root = 'https://www.opensecrets.org/api/'
    headers = {'User-Agent': useragent,
              'From': 'jkropko@virginia.edu'}
    params = {'method': 'getLegislators',
              'id': s,
              'output': 'json',
              'apikey':opensecretsapi}
    r = requests.get(root, params=params, headers=headers)
    myjson = json.loads(r.text)
    leg = pd.json_normalize(myjson, record_path=['response', 'legislator'])
    legislators = pd.concat([legislators, leg])

AK
AL
AR
AZ
CA
CO
CT
DE
FL
GA
HI
IA
ID
IL
IN
KS
KY
LA
MA
MD
ME
MI
MN
MO
MS
MT
NC
ND
NE
NH
NJ
NM
NV
NY
OH
OK
OR
PA
RI
SC
SD
TN
TX
UT
VA
VT
WA
WI
WV
WY


In [78]:
legislators.columns = legislators.columns.str.strip('@attributes.')
legislators.to_csv("Data/os_legislators.csv", index=False)

## OpenSecrets API: contributions

In [None]:
cid = [x for x in legislators['cid']]
root = 'https://www.opensecrets.org/api/'
headers = {'User-Agent': useragent,
          'From': 'jkropko@virginia.edu'}
contributions = pd.DataFrame()
i = 1
l = len(cid)
for c in cid:
    print(f'Now working on {i} of {l}')
    try:
        params = {'method': 'candContrib',
          'cid': c,
          'cycle':2022,
          'output': 'json',
          'apikey':opensecretsapi}
        r = requests.get(root, params=params, headers=headers)
        myjson = json.loads(r.text)
        contrib = pd.json_normalize(myjson, 
                                      record_path = ['response',
                                                     'contributors',
                                                     'contributor'])
        contributions = pd.concat([contributions, contrib])
    except:
        pass
    i += 1

Now working on 1 of 537
Now working on 2 of 537
Now working on 3 of 537
Now working on 4 of 537
Now working on 5 of 537
Now working on 6 of 537
Now working on 7 of 537
Now working on 8 of 537
Now working on 9 of 537
Now working on 10 of 537
Now working on 11 of 537
Now working on 12 of 537
Now working on 13 of 537
Now working on 14 of 537
Now working on 15 of 537
Now working on 16 of 537
Now working on 17 of 537
Now working on 18 of 537
Now working on 19 of 537
Now working on 20 of 537
Now working on 21 of 537
Now working on 22 of 537
Now working on 23 of 537
Now working on 24 of 537
Now working on 25 of 537
Now working on 26 of 537
Now working on 27 of 537
Now working on 28 of 537
Now working on 29 of 537
Now working on 30 of 537
Now working on 31 of 537
Now working on 32 of 537
Now working on 33 of 537
Now working on 34 of 537
Now working on 35 of 537
Now working on 36 of 537
Now working on 37 of 537
Now working on 38 of 537
Now working on 39 of 537
Now working on 40 of 537
Now worki

In [None]:
contributions.columns = contributions.columns.str.strip('@attributes.')
contributions.to_csv("Data/os_contributions.csv", index=False)

## Voteview: voting similarity matrix

## Voteview: ideology

In [None]:
url = 'https://voteview.com/static/data/out/members/HS118_members.csv'
ideology = pd.read_csv(url)
ideology = to_csv("Data/ideology.csv", index=False)

## MIT Elections: House and Senate