In [1]:
import numpy as np
import pandas as pd
import requests
import json
import psycopg2
from sqlalchemy import create_engine
import os
import pymongo

In [2]:
propublica_token = os.environ['propublica_token']
postgres_password = os.environ['POSTGRES_PASSWORD']
mongo_username = os.environ['MONGO_INITDB_ROOT_USERNAME']
mongo_password = os.environ['MONGO_INITDB_ROOT_PASSWORD']
mongo_init_db = os.environ['MONGO_INITDB_DATABASE']

In [3]:
import getdata

In [4]:
members_vv, cvote_vv, memvotes_vv = getdata.get_voteview()

In [5]:
useragent = getdata.get_useragent()

In [6]:
members_pp = getdata.get_propublica(propublica_token, useragent, email='jkropko@virginia.edu')

In [7]:
members = getdata.merge_members(members_pp=members_pp, members_vv=members_vv)

In [8]:
members.head(1).T

Unnamed: 0,0
title,Representative
short_title,Rep.
first_name,Alma
middle_name,
last_name,ADAMS
suffix,
congress,117.0
chamber,House
icpsr,21545.0
state,NC


In [9]:
members['propublica_id'].isna().value_counts()

False    556
Name: propublica_id, dtype: int64

In [10]:
cvote_vv.head(1).T

Unnamed: 0,0
congress,117
chamber,House
rollnumber,1
date,2021-01-03
session,1
clerk_rollnumber,2
yea_count,216
nay_count,211
nominate_mid_1,-0.057
nominate_mid_2,0.038


In [11]:
memvotes_vv.head(1).T

Unnamed: 0,0
congress,117
chamber,House
rollnumber,1
icpsr,14066.0
cast_code,6
prob,100.0


# Initialize Postgres

In [12]:
server = psycopg2.connect(
    user = 'postgres',
    password = postgres_password,
    host = 'postgres',
    port = '5432')
server.autocommit = True

In [13]:
cursor = server.cursor()

In [14]:
try:
    cursor.execute("CREATE DATABASE contrans")
except:
    cursor.execute("DROP DATABASE contrans")
    cursor.execute("CREATE DATABASE contrans")

## Add data to contrans DB

In [15]:
engine = create_engine('postgresql+psycopg2://{user}:{password}@{host}:{port}/{db}'.format(
    user = 'postgres',
    password = postgres_password,
    host = 'postgres',
    port = '5432',
    db = 'contrans'))

In [16]:
members.to_sql('members', con=engine, if_exists='replace', index=False, chunksize=1000)
memvotes_vv.to_sql('member_vote', con=engine, if_exists='replace', index=False, chunksize=1000)
cvote_vv.to_sql('rollcalls', con=engine, if_exists='replace', index=False, chunksize=1000)

1723

## Example queries

In [17]:
myquery = '''
SELECT * FROM members
'''
rollcall = pd.read_sql(myquery, con=engine)
rollcall.head(1).T

Unnamed: 0,0
title,Representative
short_title,Rep.
first_name,Alma
middle_name,
last_name,ADAMS
suffix,
congress,117.0
chamber,House
icpsr,21545.0
state,NC


In [18]:
bills, num_results = getdata.get_bills_pp(propublica_token, useragent, email='jkropko@virginia.edu')

In [19]:
bills

[{'bill_id': 'hr9285-117',
  'bill_slug': 'hr9285',
  'bill_type': 'hr',
  'number': 'H.R.9285',
  'bill_uri': 'https://api.propublica.org/congress/v1/117/bills/hr9285.json',
  'title': 'To provide payment for patient navigator services under title XIX of the Social Security Act, and for other purposes.',
  'short_title': 'Patient Navigation Assistance Act',
  'sponsor_title': 'Rep.',
  'sponsor_id': 'D000623',
  'sponsor_name': 'Mark DeSaulnier',
  'sponsor_state': 'CA',
  'sponsor_party': 'D',
  'sponsor_uri': 'https://api.propublica.org/congress/v1/members/D000623.json',
  'gpo_pdf_uri': None,
  'congressdotgov_url': 'https://www.congress.gov/bill/117th-congress/house-bill/9285',
  'govtrack_url': 'https://www.govtrack.us/congress/bills/117/hr9285',
  'introduced_date': '2022-11-10',
  'active': False,
  'last_vote': None,
  'house_passage': None,
  'senate_passage': None,
  'enacted': None,
  'vetoed': None,
  'cosponsors': 1,
  'cosponsors_by_party': {'R': 1},
  'committees': 'Hou

In [26]:
myclient = pymongo.MongoClient(f"mongodb://{mongo_username}:{mongo_password}@mongo:27017/{mongo_init_db}?authSource=admin")

In [27]:
billsdb = myclient["billsdb"]

In [28]:
collist = billsdb.list_collection_names()
if "bills117" in collist:
  billsdb.bills117.drop()