https://malegislature.gov/api/swagger/index.html?url=/api/swagger/v1/swagger.json#/Documents

In [1]:
import requests
from tqdm.notebook import tqdm

In [2]:
def get_json(url, verbose=True):
    response = requests.get(url)
    response.raise_for_status()
    return response.json()

In [3]:
%%time
documents = get_json("https://malegislature.gov/api/Documents")
len(documents)

CPU times: user 111 ms, sys: 28.7 ms, total: 139 ms
Wall time: 8.65 s


7075

In [4]:
documents[0].keys()

dict_keys(['BillNumber', 'DocketNumber', 'Title', 'PrimarySponsor', 'GeneralCourtNumber', 'Details', 'IsDocketBookOnly'])

In [5]:
documents[0]

{'BillNumber': None,
 'DocketNumber': 'HD164',
 'Title': 'An Act relative to cost of living adjustments for retired public employees of the Commonwealth',
 'PrimarySponsor': {'Id': 'J_A1',
  'Name': 'James Arciero',
  'Type': 1,
  'Details': 'http://malegislature.gov/api/GeneralCourts/192/LegislativeMembers/J_A1'},
 'GeneralCourtNumber': 192,
 'Details': None,
 'IsDocketBookOnly': True}

In [6]:
missing_details = [d for d in documents if not d['Details']]
len(missing_details)

115

In [7]:
docket_book_only = [d for d in documents if d['IsDocketBookOnly']]
len(docket_book_only)

139

In [8]:
bills = [d for d in documents if d['BillNumber'] and d['Details']]
len(bills)

6432

In [9]:
%%time
def get_document_details(documents):
    for document in tqdm(documents):
        try:
            yield get_json(document['Details'])
        except Exception as exc:
            document_number = ()
            print(f"{document['BillNumber']}/{document['DocketNumber']}: {exc}")
            continue

bill_details = list(get_document_details(bills))
len(bill_details)

  0%|          | 0/6432 [00:00<?, ?it/s]

CPU times: user 3min 47s, sys: 24.3 s, total: 4min 12s
Wall time: 1h 6min 20s


6432

In [10]:
import json

In [11]:
with open("ma_bills.json", "w") as f:
    json.dump(bill_details, f, indent=2)
!ls -lh ma_bills.json

-rw-r--r--  1 bhrutledge  staff    40M Jul 10 07:01 ma_bills.json


In [12]:
bill_details[0].keys()

dict_keys(['Title', 'BillNumber', 'DocketNumber', 'GeneralCourtNumber', 'PrimarySponsor', 'Cosponsors', 'BillHistory', 'LegislationTypeName', 'Pinslip', 'DocumentText', 'EmergencyPreamble', 'RollCalls', 'Attachments', 'CommitteeRecommendations', 'Amendments'])

In [13]:
roll_call_bills = [b for b in bill_details if b['RollCalls']]
len(roll_call_bills)

17

In [14]:
committee_bills = [b for b in bill_details if b['CommitteeRecommendations']]
len(committee_bills)

274

In [15]:
complete_bills = [b for b in bill_details if b['RollCalls'] and b['CommitteeRecommendations']]
len(complete_bills)

12

In [16]:
bill_keys = {
    'Title',
    'BillNumber',
    'DocketNumber',
    'GeneralCourtNumber',
#     'PrimarySponsor',
#     'Cosponsors',
    'BillHistory',
#     'LegislationTypeName',
#     'Pinslip',
#     'DocumentText',
#     'EmergencyPreamble',
    'RollCalls',
    'Attachments',
#     'CommitteeRecommendations',
    'Amendments'
}

bill = complete_bills[-1]
{k: v for k, v in bill.items() if k in bill_keys}

{'Title': 'An Act creating a next-generation roadmap for Massachusetts climate policy',
 'BillNumber': 'S9',
 'DocketNumber': 'SD169',
 'GeneralCourtNumber': 192,
 'BillHistory': 'http://malegislature.gov/api/GeneralCourts/192/Documents/S9/DocumentHistoryActions',
 'RollCalls': [{'GeneralCourtNumber': 192,
   'Branch': 'House',
   'RollCallNumber': 2,
   'Details': 'http://malegislature.gov/api/GeneralCourts/192/Branches/House/RollCalls/2'},
  {'GeneralCourtNumber': 192,
   'Branch': 'House',
   'RollCallNumber': 3,
   'Details': 'http://malegislature.gov/api/GeneralCourts/192/Branches/House/RollCalls/3'},
  {'GeneralCourtNumber': 192,
   'Branch': 'Senate',
   'RollCallNumber': 19,
   'Details': 'http://malegislature.gov/api/GeneralCourts/192/Branches/Senate/RollCalls/19'}],
 'Attachments': [],
 'Amendments': [{'AmendmentNumber': None,
   'ParentBillNumber': 'S9',
   'Branch': 'Senate',
   'GeneralCourtNumber': 192,
   'Details': '/'},
  {'AmendmentNumber': '1',
   'ParentBillNumber':

In [17]:
bill['CommitteeRecommendations']

[{'Action': 'Favorable',
  'FiscalAmounts': [],
  'Committee': {'CommitteeCode': 'TS30',
   'GeneralCourtNumber': 192,
   'Details': 'http://malegislature.gov/api/GeneralCourts/192/Committees/TS30'},
  'Votes': [{'Question': 'Ought to pass',
    'Bill': {'BillNumber': 'S9',
     'DocketNumber': 'SD169',
     'Title': 'An Act creating a next-generation roadmap for Massachusetts climate policy',
     'PrimarySponsor': {'Id': 'MJB0',
      'Name': 'Michael J. Barrett',
      'Type': 1,
      'Details': 'http://malegislature.gov/api/GeneralCourts/192/LegislativeMembers/MJB0'},
     'GeneralCourtNumber': 192,
     'Details': 'http://malegislature.gov/api/GeneralCourts/192/Documents/S9',
     'IsDocketBookOnly': False},
    'Committee': {'CommitteeCode': 'TS30',
     'GeneralCourtNumber': 192,
     'Details': 'http://malegislature.gov/api/GeneralCourts/192/Committees/TS30'},
    'Date': '2021-01-26T17:05:00',
    'Vote': [{'Favorable': [{'GeneralCourtNumber': 192,
        'MemberCode': 'MJR0