In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import requests
import pickle
import pandas as pd
import numpy as np
from processJSON import get_bill_char
from predictChamber import encode_data_pred
from keras.models import load_model

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
def preprocess_data(bill, members, member_char, chamber, cong_char):
    '''
    Preprocesses bill from current bill call to fit the form for encoding
    
    param:
    bill - json of bill from current bill api call
    members - dict of member parties
    member_char - csv of all member parties
    chamber - str name of chamber either house or senate
    congress_char - dict for congress json data for member characteristics
    
    return:
    list of bill characteristics for prediction
    '''
    bill_slug = bill['bill_slug']
    response = requests.get(bill_url.format(str(session), bill_slug, 'subjects'), headers={'X-API-KEY' : api_key})
    
    bill['subjects'] = [entry['name'] for entry in response.json()['results'][0]['subjects']]
    bill['sponsor'] = {'bioguide_id' : response.json()['results'][0]['sponsor_id'], 'state' : response.json()['results'][0]['sponsor_state']}
    
    response = requests.get(bill_url.format(str(session), bill_slug, 'cosponsors'), headers={'X-API-KEY' : api_key})
    cosponsors = response.json()['results'][0]['cosponsors']
    for cosponsor in cosponsors:
        cosponsor['bioguide_id'] = cosponsor['cosponsor_id']
        cosponsor['state'] = cosponsor['cosponsor_state']
        
    bill['cosponsors'] = cosponsors
    
    return(get_bill_char(bill, members, member_char, chamber, cong_char, get_status=False))

In [4]:
house_model_name = 'models/houseModel'
senate_model_name = 'models/senateModel'
with open(house_model_name + '/params', 'rb') as params_file:
    house_dict = pickle.load(params_file)
    house_threshold = house_dict['thresh']
    house_party_indicies = house_dict['party_indicies']
    max_len = house_dict['max_len']
with open(senate_model_name + '/params', 'rb') as params_file:
    senate_dict = pickle.load(params_file)
    senate_threshold = senate_dict['thresh']
    senate_party_indicies = house_dict['party_indicies']
api_file = 'api_key.json'
member_char = 'data/congress_mem_char.json'
session = 116

In [5]:
url = 'https://api.propublica.org/congress/v1/116/{0}/bills/active.json'
f = open(api_file ,'r')
json_data = json.load(f)
api_key = json_data['key']

In [6]:
char_f = open('data/116/characteristics.json', 'r')
cong_char = json.load(char_f)

recent_house = requests.get(url.format('house'), headers={'X-API-Key': api_key})
house_bills = recent_house.json()['results'][0]['bills']

recent_senate = requests.get(url.format('senate'), headers={'X-API-Key': api_key})
senate_bills = recent_senate.json()['results'][0]['bills']

In [7]:
house_output = []
senate_output = []
members = {}
f = open(member_char, 'r')
member_char = json.load(f)

In [8]:
bill_url = 'https://api.propublica.org/congress/v1/{0}/bills/{1}/{2}.json'
for bill in house_bills:
    house_output.append(preprocess_data(bill, members, member_char, 'house', cong_char))
    
for bill in senate_bills:
    senate_output.append(preprocess_data(bill, members, member_char, 'senate', cong_char))

In [9]:
column_names = ['Democrat', 'Republican', 'Independant',
                'Subjects', 'Sponsor Party', 'Bipartisan', 'Number Cosponsors', 'Number Cosponsor States', 'Outcome']

house = pd.DataFrame(house_output, columns=column_names)
senate = pd.DataFrame(senate_output, columns=column_names)

In [10]:
house_char_features, house_text_features = encode_data_pred(house, max_len, house_party_indicies, house_model_name)
senate_char_features, senate_text_features = encode_data_pred(senate, max_len, senate_party_indicies, house_model_name)

In [11]:
house_model = load_model(house_model_name + '/model.h5')
senate_model = load_model(senate_model_name + '/model.h5')

house_predictions = house_model.predict([house_char_features, house_text_features])
senate_predictions = senate_model.predict([senate_char_features, senate_text_features])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



In [12]:
house_slugs = [entry['bill_slug'] for entry in house_bills]
senate_slugs = [entry['bill_slug'] for entry in senate_bills]

house_prob = house_predictions.flatten()
senate_prob = senate_predictions.flatten()

pass_dict = {0 : 'Fail', 1 : 'Pass'}


house_thresholds = [house_threshold] * len(house_slugs)
senate_thresholds = [senate_threshold] * len(senate_slugs)
final_house_predictions = np.zeros(len(house_prob))
final_senate_predictions = np.zeros(len(senate_prob))

final_house_predictions[house_prob < house_threshold] = 0
final_house_predictions[house_prob >= house_threshold] = 1
final_senate_predictions[senate_prob < senate_threshold] = 0
final_senate_predictions[senate_prob >= senate_threshold] = 1

final_house_predictions = final_house_predictions.flatten()
final_senate_predictions = final_senate_predictions.flatten()

final_house_predictions = np.vectorize(pass_dict.get)(final_house_predictions)
final_senate_predictions = np.vectorize(pass_dict.get)(final_senate_predictions)

pred_house_df = pd.DataFrame({'Bill Slug': house_slugs, 'Probability of Passing' : house_prob, 'Model Threshold' : house_threshold, 'Prediction' : final_house_predictions})
pred_senate_df = pd.DataFrame({'Bill Slug': senate_slugs, 'Probability of Passing' : senate_prob, 'Model Threshold' : senate_threshold, 'Prediction' : final_senate_predictions})

pred_house_df.set_index('Bill Slug', inplace=True)
pred_senate_df.set_index('Bill Slug', inplace=True)
pred_house_df['Probability of Passing'].round(7)
pred_senate_df['Probability of Passing'].round(7)

print('House')
print(pred_house_df.round(6))
print()
print('Senate')
print(pred_senate_df.round(6))

House
           Probability of Passing  Model Threshold Prediction
Bill Slug                                                    
hr7095                   0.017266          0.71542       Fail
hr7010                   0.000000          0.71542       Fail
hr1957                   0.000805          0.71542       Fail
hr1240                   0.000002          0.71542       Fail
hr1218                   0.000000          0.71542       Fail
hr496                    0.000000          0.71542       Fail
hr192                    0.000058          0.71542       Fail
hr6800                   0.000010          0.71542       Fail
hr6509                   0.003645          0.71542       Fail
hr6172                   0.001737          0.71542       Fail
hr6168                   0.004145          0.71542       Fail
hr4739                   0.000000          0.71542       Fail
hjres76                  0.338842          0.71542       Fail
hr2066                   0.014545          0.71542       Fail
hr