In [43]:
import numpy as np
import pandas as pd
import requests
import os
import json
from bs4 import BeautifulSoup 
import time
import collections
collections.Callable = collections.abc.Callable

In [2]:
propublica_token = os.environ['propublica_token']

useragent_url = 'https://httpbin.org/user-agent'
r = requests.get(useragent_url)
useragent = json.loads(r.text)['user-agent']
useragent

'python-requests/2.28.1'

In [3]:
headers = {'X-API-Key': propublica_token,
          'User-Agent':useragent,
          'From': 'jkropko@virginia.edu'}

# Goal: Get the bills sponsored by Bob Good
## Step 1: Get Bob Good's ID number

In [4]:
root = "https://api.propublica.org"
congress = '117'
chamber = 'house'
endpoint = "/congress/v1/{congress}/{chamber}/members.json".format(congress = congress,
                                                                  chamber = chamber)
r = requests.get(root + endpoint,
                headers = headers)
myjson = json.loads(r.text)
membersdf = pd.json_normalize(myjson, record_path = ['results', 'members'])
membersdf.head(3).T

Unnamed: 0,0,1,2
id,A000370,A000055,A000371
title,Representative,Representative,Representative
short_title,Rep.,Rep.,Rep.
api_uri,https://api.propublica.org/congress/v1/members...,https://api.propublica.org/congress/v1/members...,https://api.propublica.org/congress/v1/members...
first_name,Alma,Robert,Pete
middle_name,,B.,
last_name,Adams,Aderholt,Aguilar
suffix,,,
date_of_birth,1946-05-27,1965-07-22,1979-06-19
gender,F,M,M


In [5]:
bobgood = membersdf.query("last_name == 'Good'")

In [6]:
bobgood

Unnamed: 0,id,title,short_title,api_uri,first_name,middle_name,last_name,suffix,date_of_birth,gender,...,office,phone,fax,state,district,at_large,geoid,missed_votes_pct,votes_with_party_pct,votes_against_party_pct
158,G000595,Representative,Rep.,https://api.propublica.org/congress/v1/members...,Robert,,Good,,1965-09-11,M,...,1213 Longworth House Office Building,202-225-4711,,VA,5,False,5105,3.98,77.78,22.11


In [7]:
bobgoodid = bobgood.reset_index()['id'][0]

## Step 2: Use BG's ID to query the bills API 

In [8]:
endpoint = '/congress/v1/members/{memberid}/bills/{billtype}.json'.format(memberid = bobgoodid,
                                                                         billtype = 'introduced')
r = requests.get(root + endpoint,
                headers = headers)
myjson = json.loads(r.text)
bgbills1 = pd.json_normalize(myjson, record_path = ['results', 'bills'])

In [9]:
r = requests.get(root + endpoint,
                headers = headers,
                params = {'offset': 20})
myjson = json.loads(r.text)
bgbills2 = pd.json_normalize(myjson, record_path = ['results', 'bills'])

In [10]:
bgbills = pd.concat([bgbills1, bgbills2], ignore_index=True)
bgbills.head(3).T

Unnamed: 0,0,1,2
congress,117,117,117
bill_id,hr8935-117,hr8767-117,hres1297-117
bill_type,hr,hr,hres
number,H.R.8935,H.R.8767,H.RES.1297
bill_uri,https://api.propublica.org/congress/v1/117/bil...,https://api.propublica.org/congress/v1/117/bil...,https://api.propublica.org/congress/v1/117/bil...
title,To amend the Labor-Management Reporting and Di...,To establish a private right of action for par...,"Designating the week beginning November 7, 202..."
short_title,To amend the Labor-Management Reporting and Di...,Empowering Parents Act,"Designating the week beginning November 7, 202..."
sponsor_title,Rep.,Rep.,Rep.
sponsor_id,G000595,G000595,G000595
sponsor_name,Robert Good,Robert Good,Robert Good


In [11]:
bgbills['congressdotgov_url'][11]

'https://www.congress.gov/bill/117th-congress/house-bill/5731'

In [12]:
urltoscrape = bgbills['congressdotgov_url'][11] + '/text?format=txt'

In [13]:
r = requests.get(urltoscrape,
                headers = {'User-Agent': useragent,
                          'From': 'jkropko@virginia.edu'})
myhtml = BeautifulSoup(r.text, 'html.parser')

In [28]:
myhtml.find_all('h3', "currentVersion")[0].text

'Shown Here:Introduced in House (10/26/2021)'

In [36]:
print(myhtml.find_all('pre')[0].text)

[Congressional Bills 117th Congress]
[From the U.S. Government Publishing Office]
[H.R. 5731 Introduced in House (IH)]








117th CONGRESS
  1st Session
                                H. R. 5731

 To provide that no Federal funds may be expended to implement certain 
         law enforcement partnerships, and for other purposes.


_______________________________________________________________________


                    IN THE HOUSE OF REPRESENTATIVES

                            October 26, 2021

Mr. Good of Virginia (for himself, Mr. Gosar, Mrs. Boebert, Mr. Duncan, 
 Mr. Perry, Mrs. Miller of Illinois, Mr. Cawthorn, Mr. Buck, Mr. Weber 
   of Texas, and Mr. Cloud) introduced the following bill; which was 
               referred to the Committee on the Judiciary

_______________________________________________________________________

                                 A BILL


 
 To provide that no Federal funds may be expended to implement certain 
         law enforcement pa

In [39]:
for i in range(1,10): 
    print('https://www.congress.gov/bill/117th-congress/house-bill/{i}/text?format=txt'.format(i=i))

https://www.congress.gov/bill/117th-congress/house-bill/1/text?format=txt
https://www.congress.gov/bill/117th-congress/house-bill/2/text?format=txt
https://www.congress.gov/bill/117th-congress/house-bill/3/text?format=txt
https://www.congress.gov/bill/117th-congress/house-bill/4/text?format=txt
https://www.congress.gov/bill/117th-congress/house-bill/5/text?format=txt
https://www.congress.gov/bill/117th-congress/house-bill/6/text?format=txt
https://www.congress.gov/bill/117th-congress/house-bill/7/text?format=txt
https://www.congress.gov/bill/117th-congress/house-bill/8/text?format=txt
https://www.congress.gov/bill/117th-congress/house-bill/9/text?format=txt


In [60]:
def scrape_one_bill(url):
    time.sleep(2)
    print('Now getting the text from ' + url)
    r = requests.get(url, headers = {'User-Agent': useragent,'From': 'jkropko@virginia.edu'})
    myhtml = BeautifulSoup(r.text, 'html.parser')
    try:
        billtext = myhtml.find_all('pre')[0].text
        return billtext
    except:
        pass

In [61]:
urllist = ['https://www.congress.gov/bill/117th-congress/house-bill/{i}/text?format=txt'.format(i=i) for i in range(7,13)]

In [62]:
bills = [scrape_one_bill(u) for u in urllist]

Now getting the text from https://www.congress.gov/bill/117th-congress/house-bill/7/text?format=txt
Now getting the text from https://www.congress.gov/bill/117th-congress/house-bill/8/text?format=txt
Now getting the text from https://www.congress.gov/bill/117th-congress/house-bill/9/text?format=txt
Now getting the text from https://www.congress.gov/bill/117th-congress/house-bill/10/text?format=txt
Now getting the text from https://www.congress.gov/bill/117th-congress/house-bill/11/text?format=txt
Now getting the text from https://www.congress.gov/bill/117th-congress/house-bill/12/text?format=txt


In [65]:
print(bills[1])

[Congressional Bills 117th Congress]
[From the U.S. Government Publishing Office]
[H.R. 8 Placed on Calendar Senate (PCS)]







                                                       Calendar No. 390
117th CONGRESS
  2d Session
                                 H. R. 8


_______________________________________________________________________


                   IN THE SENATE OF THE UNITED STATES

                             March 11, 2021

                                Received

                              May 24, 2022

                          Read the first time

                              May 25, 2022

            Read the second time and placed on the calendar

_______________________________________________________________________

                                 AN ACT


 
         To require a background check for every firearm sale.

    Be it enacted by the Senate and House of Representatives of the 
United States of America in Congress assembled,

SECTION 1. SHORT