In this code, I download basic bill information for the 93rd-114th Congresses from the GovTrack API. I test out my request and processing code with the current Congress, then repeat back to the earliest year of data. I do some minor cleaning, then export the data to a csv. (To avoid requesting too much data at once, I did this in several batches; the for loops at the end of the code are functionally identical.)

1) Test code- Download data from 114th Congress (current)

In [54]:
##Import necessary packages
import requests
import urllib
import json
from __future__ import division
import math
import csv

In [98]:
##Set parameters for the get request 
base_url = "https://www.govtrack.us/api/v2/bill"
search_params = {"current_status":"enacted_signed", #limit to enacted bills signed by the president
                "format":"json", #set format
                 "congress":"114", #limit to current Congress
                 "limit":"243", #normally 100, I checked how many bills were in this subset and changed it here 
                 "fields":"current_status_date,display_number,link,sponsor_role,terms,title"} #fields of interest

In [99]:
r = requests.get(base_url, params=search_params)

In [100]:
print(r.url)

https://www.govtrack.us/api/v2/bill?fields=current_status_date%2Cdisplay_number%2Clink%2Csponsor_role%2Cterms%2Ctitle%2Ccommittees&format=json&limit=243&current_status=enacted_signed&congress=114


2) Check and parse the text

In [94]:
response_text = r.text
data=json.loads(response_text) #convert response to readable text

243

In [95]:
#extract the data, check that the lengths match up
data2 = data['objects']
len(data['objects'])
len(data2)

243

In [96]:
#extract sponsor party from sponsor info dictionary
for i in range(len(data2)):
    data2[i]['party'] = ""
    data2[i]['party'] = data2[i]['sponsor_role']['party']


In [97]:
#export the 114th Congress data
import csv
keys = data2[0].keys()
with open('bills_API.csv', 'w') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(data2)

3) Repeat tested code for the remaining Congresses

In [158]:
alldata = [] #create a receptacle for acquired documents
for i in range(110,115):
    ##find out how many bills in each Congress 
    search_params["congress"] = str(i) #set the Congress number to request
    r = requests.get(base_url, params=search_params) #get request
    response_text = r.text
    data=json.loads(response_text) #convert
    count = data['meta']['total_count'] #extract number of bills from metadata
    #print(count)
    
    #request all bills
    search_params["limit"] = str(count) #set limit to number of bills in Congress 
    r = requests.get(base_url, params=search_params) #get request
    response_text = r.text
    data=json.loads(response_text) #convert
    data2 = data['objects'] #extract data
    #print(len(data2))
    
    for j in range(len(data2)): #extract sponsor party if present
        data2[j]['party'] = ""
        if data2[j]['sponsor_role'] == None:
            data2[j]['party'] = "NA"
        else:
            data2[j]['party'] = data2[j]['sponsor_role']['party']
        
    alldata.extend(data2) #add new bills to full data
    print(len(alldata))

456
841
1125
1421
1664


In [125]:
print(alldata[0]) #check that the export worked

{'current_status_date': '2007-12-26', 'sponsor_role': {'website': 'http://www.house.gov/lowey', 'description': "Representative for New York's 18th congressional district", 'role_type_label': 'Representative', 'current': False, 'state': 'NY', 'leadership_title': None, 'phone': None, 'caucus': None, 'startdate': '2007-01-04', 'id': 2040, 'congress_numbers': [110], 'title_long': 'Representative', 'enddate': '2009-01-03', 'senator_rank': None, 'person': 400246, 'party': 'Democrat', 'senator_class': None, 'role_type': 'representative', 'extra': None, 'district': 18, 'title': 'Rep.'}, 'link': 'https://www.govtrack.us/congress/bills/110/hr2764', 'display_number': 'H.R. 2764', 'party': 'Republican', 'title': 'H.R. 2764 (110th): Consolidated Appropriations Act, 2008'}


In [160]:
keys = alldata[0].keys() #export to CSV
with open('bills_API.csv', 'w') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(alldata)

I downloaded the bill information in batches of 5 Congresses to keep from requesting too much data at once.

In [161]:
alldata = []
for i in range(105,110):
    #find out how many bills in each Congress 
    search_params["congress"] = str(i)
    r = requests.get(base_url, params=search_params)
    response_text = r.text
    data=json.loads(response_text)
    count = data['meta']['total_count']
    #print(count)
    
    #request all bills
    search_params["limit"] = str(count)
    r = requests.get(base_url, params=search_params)
    response_text = r.text
    data=json.loads(response_text)
    data2 = data['objects']
    #print(len(data2))
    
    for j in range(len(data2)):
        data2[j]['party'] = ""
        if data2[j]['sponsor_role'] == None:
            data2[j]['party'] = "NA"
        else:
            data2[j]['party'] = data2[j]['sponsor_role']['party']
        
    alldata.extend(data2)
    print(len(alldata))

403
1007
1390
1894
2377


In [162]:
keys = alldata[0].keys()
with open('bills0509_API.csv', 'w') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(alldata)

In [163]:
alldata = []
for i in range(100,105):
    #find out how many bills in each Congress 
    search_params["congress"] = str(i)
    r = requests.get(base_url, params=search_params)
    response_text = r.text
    data=json.loads(response_text)
    count = data['meta']['total_count']
    #print(count)
    
    #request all bills
    search_params["limit"] = str(count)
    r = requests.get(base_url, params=search_params)
    response_text = r.text
    data=json.loads(response_text)
    data2 = data['objects']
    #print(len(data2))
    
    for j in range(len(data2)):
        data2[j]['party'] = ""
        if data2[j]['sponsor_role'] == None:
            data2[j]['party'] = "NA"
        else:
            data2[j]['party'] = data2[j]['sponsor_role']['party']
        
    alldata.extend(data2)
    print(len(alldata))

757
1420
2029
2502
2836


In [164]:
keys = alldata[0].keys()
with open('bills0004_API.csv', 'w') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(alldata)

In [165]:
alldata = []
for i in range(95,100):
    #find out how many bills in each Congress 
    search_params["congress"] = str(i)
    r = requests.get(base_url, params=search_params)
    response_text = r.text
    data=json.loads(response_text)
    count = data['meta']['total_count']
    #print(count)
    
    #request all bills
    search_params["limit"] = str(count)
    r = requests.get(base_url, params=search_params)
    response_text = r.text
    data=json.loads(response_text)
    data2 = data['objects']
    #print(len(data2))
    
    for j in range(len(data2)):
        data2[j]['party'] = ""
        if data2[j]['sponsor_role'] == None:
            data2[j]['party'] = "NA"
        else:
            data2[j]['party'] = data2[j]['sponsor_role']['party']
        
    alldata.extend(data2)
    print(len(alldata))

804
1538
2065
2740
3425


In [166]:
keys = alldata[0].keys()
with open('bills9599_API.csv', 'w') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(alldata)

In [167]:
alldata = []
for i in range(90,95):
    #find out how many bills in each Congress 
    search_params["congress"] = str(i)
    r = requests.get(base_url, params=search_params)
    response_text = r.text
    data=json.loads(response_text)
    count = data['meta']['total_count']
    #print(count)
    
    #request all bills
    search_params["limit"] = str(count)
    r = requests.get(base_url, params=search_params)
    response_text = r.text
    data=json.loads(response_text)
    data2 = data['objects']
    #print(len(data2))
    
    for j in range(len(data2)):
        data2[j]['party'] = ""
        if data2[j]['sponsor_role'] == None:
            data2[j]['party'] = "NA"
        else:
            data2[j]['party'] = data2[j]['sponsor_role']['party']
        
    alldata.extend(data2)
    print(len(alldata))

0
0
0
767
1487


In [169]:
keys = alldata[0].keys()
with open('bills9394_API.csv', 'w') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(alldata)