# Congress 116 H.R.

This notebook will parse the relevant data for the 116th Congress. Hope to use this as a template for previous sessions and Senate.

## Wishlist of data
- Session
- Bill ID: 'bill_id'
- Bill word count: 
- Date of introduction: 'action_code': 'Intro-H'
- Date of passage House:  'status': 'PASS_OVER:HOUSE'z
- Date of passsage Senate
- Date signed into law by president

## Bring in one bill and see what it looks like

In [1]:
import json
import pandas as pd



In [2]:
file_name = 'congress/data/116/bills/hr/hr1/data.json'

with open(file_name) as f:
    hr1 = json.load(f)

In [3]:
hr1['actions'][0]

{'acted_at': '2019-01-03',
 'action_code': 'Intro-H',
 'references': [],
 'text': 'Introduced in House',
 'type': 'action'}

In [4]:
hr1.keys()

dict_keys(['actions', 'amendments', 'bill_id', 'bill_type', 'by_request', 'committee_reports', 'committees', 'congress', 'cosponsors', 'enacted_as', 'history', 'introduced_at', 'number', 'official_title', 'popular_title', 'related_bills', 'short_title', 'sponsor', 'status', 'status_at', 'subjects', 'subjects_top_term', 'summary', 'titles', 'updated_at', 'url'])

In [5]:
hr1['introduced_at']

'2019-01-03'

In [6]:
hr1['status']

'PASS_OVER:HOUSE'

In [7]:
hr1['status_at']

'2019-03-08T11:23:05-05:00'

In [8]:
# # pretty-print hr1
# print(json.dumps(hr1, indent = 4, sort_keys=True))

## Creating an empty list to store only the HR bills that were signed into law


In [9]:
# list(range(111, 117))

In [10]:
# # A loop to go thru all the HR bills in all Congresses
# for session in range(111, 117):
#     directory =  f'congress/data/{session}'
#     print(directory)


In [15]:
def pretty_print(j):
    print(json.dumps(j, indent=2))    

In [20]:
action_codes = ['Intro-H', 'H37300', 'E30000']

In [29]:
def filter_actions(bill):
    return list(filter(lambda x: x['action_code'] in action_codes, bill))

In [30]:
filter_actions(hr_bills_to_keep[0]['actions'])

[{'acted_at': '2019-01-03',
  'action_code': 'Intro-H',
  'references': [],
  'text': 'Introduced in House',
  'type': 'action'},
 {'acted_at': '2019-01-10T15:11:20-05:00',
  'action_code': 'H37300',
  'how': 'by voice vote',
  'references': [{'reference': 'CR H430-431', 'type': 'text'}],
  'result': 'pass',
  'status': 'PASS_OVER:HOUSE',
  'suspension': None,
  'text': 'On motion to suspend the rules and pass the bill Agreed to by voice vote.',
  'type': 'vote',
  'vote_type': 'vote',
  'where': 'h'},
 {'acted_at': '2020-12-27',
  'action_code': 'E30000',
  'references': [],
  'status': 'ENACTED:SIGNED',
  'text': 'Signed by President.',
  'type': 'signed'}]

In [64]:
hr_bills_to_keep = []

for n in range(1, 9057):
    file_name = f'congress/data/116/bills/hr/hr{ n }/data.json'
    try:
        with open(file_name) as f:
            hr_bill = json.load(f)
        if hr_bill['status'] == 'ENACTED:SIGNED':
            hr_bill['actions'] = filter_actions(hr_bill['actions'])
            existing_keys = hr_bill.keys()
            keys_to_delete = ['related_bills', 'subjects', 'subjects_top_term', 'titles', 'cosponsors', 'sponsor']
            for key in keys_to_delete:
                if key in existing_keys:
                    del hr_bill[key]
            hr_bills_to_keep.append(hr_bill)
    except:
        # print(f'skipping {n}, doesn\'t exist')        
        pass

In [71]:
for bill in hr_bills_to_keep:
    bill_number = bill['number']
    url = f'https://www.congress.gov/bill/116th-congress/house-bill/{ bill_number }/text?r=1&s=3'
    # scrape id='billTextContainer'
    print(url)

https://www.congress.gov/bill/116th-congress/house-bill/133/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/150/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/251/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/259/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/263/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/266/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/276/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/299/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/430/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/434/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/439/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/473/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-bill/504/text?r=1&s=3
https://www.congress.gov/bill/116th-congress/house-

In [65]:
# this is just the first HR bill
pretty_print(hr_bills_to_keep[0])

{
  "actions": [
    {
      "acted_at": "2019-01-03",
      "action_code": "Intro-H",
      "references": [],
      "text": "Introduced in House",
      "type": "action"
    },
    {
      "acted_at": "2019-01-10T15:11:20-05:00",
      "action_code": "H37300",
      "how": "by voice vote",
      "references": [
        {
          "reference": "CR H430-431",
          "type": "text"
        }
      ],
      "result": "pass",
      "status": "PASS_OVER:HOUSE",
      "suspension": null,
      "text": "On motion to suspend the rules and pass the bill Agreed to by voice vote.",
      "type": "vote",
      "vote_type": "vote",
      "where": "h"
    },
    {
      "acted_at": "2020-12-27",
      "action_code": "E30000",
      "references": [],
      "status": "ENACTED:SIGNED",
      "text": "Signed by President.",
      "type": "signed"
    }
  ],
  "amendments": [
    {
      "amendment_id": "samdt2731-116",
      "amendment_type": "samdt",
      "chamber": "s",
      "number": "2731"
   

Among actions, the filters I want are:
- "action_code": "Intro-H"
- "action_code": "H37300" / "status": "PASS_OVER:HOUSE"
- "action_code": "E30000" / "status": "ENACTED:SIGNED" (E3000 always means ENACTED:SIGNED)

In [13]:
len(hr_bills_to_keep)

157