In [1]:
import numpy as np
import pandas as pd
import pymongo
import requests
import os
from bson.json_util import loads, dumps

In [2]:
propublica_token = os.environ['propublica_token']
mongo_user = os.environ['MONGO_INITDB_ROOT_USERNAME']
mongo_pwd = os.environ['MONGO_INITDB_ROOT_PASSWORD']
mongo_initdb = os.environ['MONGO_INITDB_DATABASE']

In [3]:
import getdata

In [4]:
useragent = getdata.get_useragent()
useragent

'python-requests/2.28.1'

In [5]:
bills_list, num_results = getdata.get_bills_pp(propublica_token, useragent, email='bwl5cd@virginia.edu', offset=0)

In [6]:
bills_list

[{'bill_id': 'sres830-117',
  'bill_slug': 'sres830',
  'bill_type': 'sres',
  'number': 'S.RES.830',
  'bill_uri': 'https://api.propublica.org/congress/v1/117/bills/sres830.json',
  'title': 'A resolution expressing support for the designation of the week of October 24, 2022, to October 31, 2022, as "Bat Week".',
  'short_title': 'A resolution expressing support for the designation of the week of October 24, 2022, to October 31, 2022, as "Bat Week".',
  'sponsor_title': 'Sen.',
  'sponsor_id': 'L000174',
  'sponsor_name': 'Patrick J. Leahy',
  'sponsor_state': 'VT',
  'sponsor_party': 'D',
  'sponsor_uri': 'https://api.propublica.org/congress/v1/members/L000174.json',
  'gpo_pdf_uri': None,
  'congressdotgov_url': 'https://www.congress.gov/bill/117th-congress/senate-resolution/830',
  'govtrack_url': 'https://www.govtrack.us/congress/bills/117/sres830',
  'introduced_date': '2022-11-14',
  'active': False,
  'last_vote': None,
  'house_passage': None,
  'senate_passage': None,
  'enac

In [7]:
myclient = pymongo.MongoClient(f"mongodb://{mongo_user}:{mongo_pwd}@mongo:27017/{mongo_initdb}?authSource=admin")

In [8]:
contrans_db = myclient['contrans']

In [9]:
collist = contrans_db.list_collection_names()
if "bills" in collist:
    contrans_db.bills.drop()

In [10]:
bills = contrans_db['bills']

In [11]:
bills_insert = bills.insert_many(bills_list)

## Queries

In [12]:
# get all records
myquery = bills.find({})
loads(dumps(myquery))
# creates the _id object associated with the database

[{'_id': ObjectId('6373b35cb400288627b70c8e'),
  'bill_id': 'sres830-117',
  'bill_slug': 'sres830',
  'bill_type': 'sres',
  'number': 'S.RES.830',
  'bill_uri': 'https://api.propublica.org/congress/v1/117/bills/sres830.json',
  'title': 'A resolution expressing support for the designation of the week of October 24, 2022, to October 31, 2022, as "Bat Week".',
  'short_title': 'A resolution expressing support for the designation of the week of October 24, 2022, to October 31, 2022, as "Bat Week".',
  'sponsor_title': 'Sen.',
  'sponsor_id': 'L000174',
  'sponsor_name': 'Patrick J. Leahy',
  'sponsor_state': 'VT',
  'sponsor_party': 'D',
  'sponsor_uri': 'https://api.propublica.org/congress/v1/members/L000174.json',
  'gpo_pdf_uri': None,
  'congressdotgov_url': 'https://www.congress.gov/bill/117th-congress/senate-resolution/830',
  'govtrack_url': 'https://www.govtrack.us/congress/bills/117/sres830',
  'introduced_date': '2022-11-14',
  'active': False,
  'last_vote': None,
  'house_pa

In [13]:
myquery = bills.find({'sponsor_name': 'Patrick J. Leahy'})
loads(dumps(myquery))
# all bills sponsored by Leahy

[{'_id': ObjectId('6373b35cb400288627b70c8e'),
  'bill_id': 'sres830-117',
  'bill_slug': 'sres830',
  'bill_type': 'sres',
  'number': 'S.RES.830',
  'bill_uri': 'https://api.propublica.org/congress/v1/117/bills/sres830.json',
  'title': 'A resolution expressing support for the designation of the week of October 24, 2022, to October 31, 2022, as "Bat Week".',
  'short_title': 'A resolution expressing support for the designation of the week of October 24, 2022, to October 31, 2022, as "Bat Week".',
  'sponsor_title': 'Sen.',
  'sponsor_id': 'L000174',
  'sponsor_name': 'Patrick J. Leahy',
  'sponsor_state': 'VT',
  'sponsor_party': 'D',
  'sponsor_uri': 'https://api.propublica.org/congress/v1/members/L000174.json',
  'gpo_pdf_uri': None,
  'congressdotgov_url': 'https://www.congress.gov/bill/117th-congress/senate-resolution/830',
  'govtrack_url': 'https://www.govtrack.us/congress/bills/117/sres830',
  'introduced_date': '2022-11-14',
  'active': False,
  'last_vote': None,
  'house_pa

In [15]:
myquery = bills.find({'sponsor_name': 'Patrick J. Leahy'}, {'_id':0, 'number':1, 'sponsor_name':1, 'title':1})
loads(dumps(myquery))

[{'number': 'S.RES.830',
  'title': 'A resolution expressing support for the designation of the week of October 24, 2022, to October 31, 2022, as "Bat Week".',
  'sponsor_name': 'Patrick J. Leahy'}]

In [17]:
myquery = bills.find({}, {'_id':0, 'number':1, 'sponsor_name':1, 'title':1})
pd.DataFrame.from_records(loads(dumps(myquery)))

Unnamed: 0,number,title,sponsor_name
0,S.RES.830,A resolution expressing support for the design...,Patrick J. Leahy
1,S.5082,A bill to impose sanctions with respect to Gen...,Joshua Hawley
2,S.5077,A bill to amend the Alaska Native Claims Settl...,Dan Sullivan
3,S.5081,A bill to establish an Office of Environmental...,Alex Padilla
4,S.5079,A bill to designate the facility of the United...,Amy Klobuchar
5,S.5076,A bill to require training for employees of Fe...,Joni Ernst
6,S.5080,A bill to direct the Secretary of Transportati...,Richard Blumenthal
7,S.5084,A bill to reprioritize Federal law enforcement...,Bill Hagerty
8,S.5083,A bill to require the Secretary of State to su...,Robert Menendez
9,S.5085,A bill to prohibit the government of the Distr...,Ted Cruz


In [19]:
myquery = bills.find({'cosponsors':{'$gt':4}}, {'_id':0, 'number':1, 'sponsor_name':1, 'title':1, 'cosponsors':1})
pd.DataFrame.from_records(loads(dumps(myquery)))

Unnamed: 0,number,title,sponsor_name,cosponsors
0,S.5081,A bill to establish an Office of Environmental...,Alex Padilla,7
1,S.5084,A bill to reprioritize Federal law enforcement...,Bill Hagerty,5
2,H.R.9296,To direct the Director of the Bureau of Prison...,Jackie Speier,5
3,H.R.9291,To require a report on the death of Shireen Ab...,André Carson,18


In [None]:
bills.creat_index[('summary', 'text')]