In [1]:
import pandas as pd
import numpy as np 
import psycopg2
from sqlalchemy import create_engine
import os
import pymongo
from bson.json_util import loads, dumps

In [2]:
postgres_password = os.environ['POSTGRES_PASSWORD']
mongo_username = os.environ['MONGO_INITDB_ROOT_USERNAME']
mongo_password = os.environ['MONGO_INITDB_ROOT_PASSWORD']
mongo_init_db = os.environ['MONGO_INITDB_DATABASE']

In [3]:
engine = create_engine('postgresql+psycopg2://{user}:{password}@{host}:{port}/{db}'.format(
    user = 'postgres',
    password = postgres_password,
    host = 'postgres',
    port = '5432',
    db = 'contrans'))

In [4]:
charwords = pd.read_csv('charwords.csv')

In [5]:
charwords.columns

Index(['Unnamed: 0', 'word', 'tf_idf', 'sponsor_id'], dtype='object')

In [6]:
charwords = charwords.drop(['Unnamed: 0'], axis=1)

In [7]:
charwords.to_sql('charwords', con=engine, chunksize=1000, index=False, if_exists='replace')

5500

In [8]:
myquery = '''
SELECT c.word, c.tf_idf
FROM charwords c
INNER JOIN members m
    ON c.sponsor_id = m.propublica_id
WHERE m.last_name like '%%GAETZ'

'''
pd.read_sql_query(myquery, con=engine)

Unnamed: 0,word,tf_idf
0,jerusalem,0.124535
1,digital health passes,0.104751
2,health passes,0.104751
3,digital health,0.100667
4,city david,0.093112
5,escambia,0.088801
6,hunter biden,0.084453
7,lionfish,0.081473
8,tags,0.080029
9,escambia county,0.077701


In [9]:
myquery = '''
SELECT * 
FROM members
'''
pd.read_sql_query(myquery, con=engine)

Unnamed: 0,title,short_title,first_name,middle_name,last_name,suffix,congress,chamber,icpsr,state,...,office,phone,fax,missed_votes_pct,votes_with_party_pct,votes_against_party_pct,DWNOMINATE,propublica_id,propublica_endpoint,last_updated
0,Representative,Rep.,Alma,,ADAMS,,117.0,House,21545.0,NC,...,2436 Rayburn House Office Building,202-225-1510,,0.32,99.04,0.86,-0.465,A000370,https://api.propublica.org/congress/v1/members...,2022-12-01 09:30:11 -0500
1,Representative,Rep.,Robert,B.,ADERHOLT,,117.0,House,29701.0,AL,...,266 Cannon House Office Building,202-225-4876,,1.49,96.30,3.59,0.380,A000055,https://api.propublica.org/congress/v1/members...,2022-12-01 09:30:10 -0500
2,Representative,Rep.,Pete,,AGUILAR,,117.0,House,21506.0,CA,...,109 Cannon House Office Building,202-225-3201,,0.32,98.82,1.07,-0.296,A000371,https://api.propublica.org/congress/v1/members...,2022-12-01 09:30:11 -0500
3,Representative,Rep.,Rick,,ALLEN,,117.0,House,21516.0,GA,...,570 Cannon House Office Building,202-225-2823,,2.98,91.05,8.84,0.699,A000372,https://api.propublica.org/congress/v1/members...,2022-12-01 09:30:10 -0500
4,Representative,Rep.,Colin,,ALLRED,,117.0,House,21900.0,TX,...,114 Cannon House Office Building,202-225-2231,,1.06,98.17,1.73,-0.432,A000376,https://api.propublica.org/congress/v1/members...,2022-12-01 09:30:11 -0500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
552,"Senator, 1st Class",Sen.,Elizabeth,,WARREN,,117.0,Senate,41301.0,MA,...,309 Hart Senate Office Building,202-224-4543,,0.67,97.85,2.15,-0.753,W000817,https://api.propublica.org/congress/v1/members...,2022-12-01 06:49:17 -0500
553,"Senator, 1st Class",Sen.,Sheldon,,WHITEHOUSE,,117.0,Senate,40704.0,RI,...,530 Hart Senate Office Building,202-224-2921,,1.35,99.32,0.68,-0.354,W000802,https://api.propublica.org/congress/v1/members...,2022-12-01 06:49:16 -0500
554,"Senator, 1st Class",Sen.,Roger,,WICKER,,117.0,Senate,29534.0,MS,...,555 Dirksen Senate Office Building,202-224-6253,,1.46,89.86,10.14,0.377,W000437,https://api.propublica.org/congress/v1/members...,2022-12-01 06:49:16 -0500
555,"Senator, 3rd Class",Sen.,Ron,,WYDEN,,117.0,Senate,14871.0,OR,...,221 Dirksen Senate Office Building,202-224-5244,202-228-2717,1.01,99.09,0.91,-0.330,W000779,https://api.propublica.org/congress/v1/members...,2022-12-01 06:49:16 -0500


In [10]:
myclient = pymongo.MongoClient(f"mongodb://{mongo_username}:{mongo_password}@mongo:27017/{mongo_init_db}?authSource=admin")

In [11]:
contrans_db = myclient['contrans']
bills = contrans_db['bills']

In [12]:
#bills.insert_many
bills.count_documents({})

17071

In [13]:
bills.find_one({})

{'_id': ObjectId('63764e009bcb2a47fa57a599'),
 'bill_id': 'sres835-117',
 'bill_slug': 'sres835',
 'bill_type': 'sres',
 'number': 'S.RES.835',
 'bill_uri': 'https://api.propublica.org/congress/v1/117/bills/sres835.json',
 'title': 'A resolution expressing support for the designation of October 2022 as "National Youth Justice Action Month".',
 'short_title': 'A resolution expressing support for the designation of October 2022 as "National Youth Justice Action Month".',
 'sponsor_title': 'Sen.',
 'sponsor_id': 'W000802',
 'sponsor_name': 'Sheldon Whitehouse',
 'sponsor_state': 'RI',
 'sponsor_party': 'D',
 'sponsor_uri': 'https://api.propublica.org/congress/v1/members/W000802.json',
 'gpo_pdf_uri': None,
 'congressdotgov_url': 'https://www.congress.gov/bill/117th-congress/senate-resolution/835',
 'govtrack_url': 'https://www.govtrack.us/congress/bills/117/sres835',
 'introduced_date': '2022-11-16',
 'active': False,
 'last_vote': None,
 'house_passage': None,
 'senate_passage': None,
 '

In [14]:
myquery = bills.find({'introduced_date': '2022-11-16'}, 
           {'_id': 0, 'short_title':1, 'sponsor_name':1, 'introduced_date':1}) #rows then columns

In [15]:
loads(dumps(myquery))

[{'short_title': 'A resolution expressing support for the designation of October 2022 as "National Youth Justice Action Month".',
  'sponsor_name': 'Sheldon Whitehouse',
  'introduced_date': '2022-11-16'},
 {'short_title': 'A resolution permitting the collection of clothing, toys, food, and housewares during the holiday season for charitable purposes in Senate buildings.',
  'sponsor_name': 'Jon Tester',
  'introduced_date': '2022-11-16'},
 {'short_title': 'A bill to amend the Truth in Lending Act to address certain issues relating to the extension of consumer credit, and for other purposes.',
  'sponsor_name': 'Jeff Merkley',
  'introduced_date': '2022-11-16'},
 {'short_title': 'A bill to authorize the Secretary of the Interior to issue a right-of-way permit with respect to a natural gas distribution main within Valley Forge National Historical Park, and for other purposes.',
  'sponsor_name': 'Bob Casey',
  'introduced_date': '2022-11-16'},
 {'short_title': 'A bill to amend the Omnib

In [16]:
pd.DataFrame.from_records(loads(dumps(myquery)))

In [17]:
myquery = bills.find({'enacted': {'$ne': None}}, 
           {'_id': 0, 'enacted':1, 'short_title':1, 'sponsor_name':1})

In [18]:
laws = pd.DataFrame.from_records(loads(dumps(myquery)))

In [19]:
laws['sponsor_name'].value_counts()

Gary Peters          10
John Cornyn           8
Amy Klobuchar         6
Rosa DeLauro          6
Jon Tester            5
                     ..
Cynthia M. Lummis     1
John Boozman          1
Jason Crow            1
Don Bacon             1
Bobby L. Rush         1
Name: sponsor_name, Length: 140, dtype: int64

In [20]:
myquery = '''
SELECT c.word, c.tf_idf
FROM charwords c
INNER JOIN members m
    ON c.sponsor_id = m.propublica_id
WHERE m.first_name LIKE '%%Gary' AND m.last_name like '%%PETERS'

'''
pd.read_sql_query(myquery, con=engine)

Unnamed: 0,word,tf_idf
0,cybersecurity,0.1856
1,fedramp,0.159005
2,infrastructure security agency,0.150585
3,cybersecurity infrastructure security,0.150385
4,cybersecurity infrastructure,0.149688
5,infrastructure security,0.144829
6,director cybersecurity,0.142051
7,director cybersecurity infrastructure,0.141411
8,security agency,0.134354
9,incident,0.128406


In [21]:
bills.drop_index([('bill_text', 'text')])
bills.create_index([('short_title', 'text')])

OperationFailure: index not found with name [bill_text_text], full error: {'ok': 0.0, 'errmsg': 'index not found with name [bill_text_text]', 'code': 27, 'codeName': 'IndexNotFound'}

In [None]:
#all the bills that had at least 5 co-sponsors and sort by most relevant to Ukraine
myquery = bills.find({'cosponsors':{'$gte':5}, '$text': {'$search': 'ukraine', '$caseSensitive': False}}, 
           {'_id': 0, 'score':{'$meta': 'textScore'}, 'short_title':1, 'sponsor_name':1, 'cosponsors':1})

In [None]:
relateukraine = pd.DataFrame.from_records(loads(dumps(myquery)))
relateukraine