In [25]:
import pandas as pd
import numpy as np
import psycopg2
from sqlalchemy import create_engine
import os
import pymongo
from bson.json_util import loads, dumps

In [24]:
postgres_password = os.environ['POSTGRES_PASSWORD']
mongo_username = os.environ['MONGO_INITDB_ROOT_USERNAME']
mongo_password = os.environ['MONGO_INITDB_ROOT_PASSWORD']
mongo_init_db = os.environ['MONGO_INITDB_DATABASE']

In [7]:
engine = create_engine("postgresql+psycopg2://{user}:{pw}@postgres:5432/{db}"
                       .format(user="postgres", pw=postgres_password, db="contrans"))

In [9]:
charwords = pd.read_csv('charwords.csv')

In [13]:
charwords = charwords.drop(['Unnamed: 0'], axis=1)

In [14]:
charwords.to_sql('charwords', con=engine, chunksize=1000, index=False, if_exists='replace')

5500

In [23]:
myquery = '''
SELECT c.word, c.tf_idf
FROM charwords c
INNER JOIN members m
    ON c.sponsor_id = m.propublica_id
WHERE m.last_name LIKE '%%CORTEZ'
'''
pd.read_sql_query(myquery, con=engine)

Unnamed: 0,word,tf_idf
0,climate corps,0.383722
1,civilian climate,0.378639
2,civilian climate corps,0.378639
3,climate,0.187214
4,corps,0.140386
5,public housing,0.135222
6,civilian,0.121353
7,housing,0.116316
8,climate service,0.105739
9,members civilian climate,0.089262


In [16]:
myquery = '''
SELECT * 
FROM members
'''
pd.read_sql_query(myquery, con=engine)

Unnamed: 0,title,short_title,first_name,middle_name,last_name,suffix,congress,chamber,icpsr,state,...,office,phone,fax,missed_votes_pct,votes_with_party_pct,votes_against_party_pct,DWNOMINATE,propublica_id,propublica_endpoint,last_updated
0,Representative,Rep.,Alma,,ADAMS,,117.0,House,21545.0,NC,...,2436 Rayburn House Office Building,202-225-1510,,0.32,99.02,0.87,-0.465,A000370,https://api.propublica.org/congress/v1/members...,2022-11-11 09:00:11 -0500
1,Representative,Rep.,Robert,B.,ADERHOLT,,117.0,House,29701.0,AL,...,266 Cannon House Office Building,202-225-4876,,1.51,96.24,3.65,0.380,A000055,https://api.propublica.org/congress/v1/members...,2022-11-11 09:00:10 -0500
2,Representative,Rep.,Pete,,AGUILAR,,117.0,House,21506.0,CA,...,109 Cannon House Office Building,202-225-3201,,0.32,98.80,1.09,-0.296,A000371,https://api.propublica.org/congress/v1/members...,2022-11-11 09:00:11 -0500
3,Representative,Rep.,Rick,,ALLEN,,117.0,House,21516.0,GA,...,570 Cannon House Office Building,202-225-2823,,3.03,90.92,8.97,0.699,A000372,https://api.propublica.org/congress/v1/members...,2022-11-11 09:00:09 -0500
4,Representative,Rep.,Colin,,ALLRED,,117.0,House,21900.0,TX,...,114 Cannon House Office Building,202-225-2231,,1.08,98.14,1.75,-0.432,A000376,https://api.propublica.org/congress/v1/members...,2022-11-11 09:00:10 -0500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
551,"Senator, 1st Class",Sen.,Elizabeth,,WARREN,,117.0,Senate,41301.0,MA,...,309 Hart Senate Office Building,202-224-4543,,0.69,97.79,2.21,-0.753,W000817,https://api.propublica.org/congress/v1/members...,2022-10-12 09:45:23 -0400
552,"Senator, 1st Class",Sen.,Sheldon,,WHITEHOUSE,,117.0,Senate,40704.0,RI,...,530 Hart Senate Office Building,202-224-2921,,1.39,99.30,0.70,-0.354,W000802,https://api.propublica.org/congress/v1/members...,2022-10-01 01:45:44 -0400
553,"Senator, 1st Class",Sen.,Roger,,WICKER,,117.0,Senate,29534.0,MS,...,555 Dirksen Senate Office Building,202-224-6253,,1.39,89.73,10.27,0.377,W000437,https://api.propublica.org/congress/v1/members...,2022-10-01 01:45:44 -0400
554,"Senator, 3rd Class",Sen.,Ron,,WYDEN,,117.0,Senate,14871.0,OR,...,221 Dirksen Senate Office Building,202-224-5244,202-228-2717,0.92,99.07,0.93,-0.330,W000779,https://api.propublica.org/congress/v1/members...,2022-10-01 01:45:44 -0400


In [26]:
myclient = pymongo.MongoClient(f"mongodb://{mongo_username}:{mongo_password}@mongo:27017/{mongo_init_db}?authSource=admin")

In [50]:
contrans_db = myclient['contrans']
bills = contrans_db['bills']

In [51]:
myquery = bills.find({'enacted': {'$ne': None}}, 
           {'_id': 0, 'short_title': 1, 'sponsor_name': 1, 'enacted': 1})

In [52]:
laws = pd.DataFrame.from_records(loads(dumps(myquery)))
laws.groupby('sponsor_name').size().reset_index().sort_values(0, ascending=False)

Unnamed: 0,sponsor_name,0
42,John Cornyn,8
28,Gary Peters,6
1,Amy Klobuchar,6
46,John Yarmuth,3
47,Jon Tester,3
...,...,...
33,Jack Reed,1
32,Jack Bergman,1
30,Gregory W. Meeks,1
29,Gerald E. Connolly,1


In [54]:
laws.sponsor_name.value_counts()

John Cornyn      8
Gary Peters      6
Amy Klobuchar    6
Rosa DeLauro     3
Jon Tester       3
                ..
Mike Levin       1
Joe Neguse       1
Bob Casey        1
Tim Ryan         1
Thom Tillis      1
Name: sponsor_name, Length: 92, dtype: int64

In [36]:
bills.find_one({})

{'_id': ObjectId('637618635cf9156c15d4da95'),
 'bill_id': 'sres835-117',
 'bill_slug': 'sres835',
 'bill_type': 'sres',
 'number': 'S.RES.835',
 'bill_uri': 'https://api.propublica.org/congress/v1/117/bills/sres835.json',
 'title': 'A resolution expressing support for the designation of October 2022 as "National Youth Justice Action Month".',
 'short_title': 'A resolution expressing support for the designation of October 2022 as "National Youth Justice Action Month".',
 'sponsor_title': 'Sen.',
 'sponsor_id': 'W000802',
 'sponsor_name': 'Sheldon Whitehouse',
 'sponsor_state': 'RI',
 'sponsor_party': 'D',
 'sponsor_uri': 'https://api.propublica.org/congress/v1/members/W000802.json',
 'gpo_pdf_uri': None,
 'congressdotgov_url': 'https://www.congress.gov/bill/117th-congress/senate-resolution/835',
 'govtrack_url': 'https://www.govtrack.us/congress/bills/117/sres835',
 'introduced_date': '2022-11-16',
 'active': None,
 'last_vote': None,
 'house_passage': None,
 'senate_passage': None,
 'e