In [1]:
import os
import requests
import random
import pickle
import networkx as nx
%matplotlib inline


KEY = '91mTqXFEeSaCIbjC0fuaB1RABk4HANqS4I91qRBN'
KEY2 = 'flLQZGa0kx8opciUA6M7R9iLPwfFPj5yFKKlCiVb'

MOST_RECENT_HR_BILL = 6536 #1490
MOST_RECENT_S_BILL = 3548  #606
CONGRESS = 114

In [2]:
import re 
from lxml import html
# Create list of bills that passed at least one chamber of Congress

temp = set()
page = requests.get('https://www.congress.gov/search?q={%22type%22:%22bills%22,%22source%22:%22legislation%22,%22congress%22:%22114%22,%22bill-status%22:%22passed-one%22,%22chamber%22:%22Senate%22}&pageSize=250&page=1')
tree = html.fromstring(page.content)
bills = tree.xpath('//span[@class="result-heading"]//a/text()')
temp |= set(bills)

senate_passed_one_house = set()
for bill in temp:
    senate_passed_one_house.add(int(''.join(list(filter(str.isdigit, bill)))))

temp = set()    
for i in range(1, 10):
    page = requests.get('https://www.congress.gov/search?pageSort=documentNumber:asc&q={%22congress%22:%22114%22,%22type%22:%22bills%22,%22bill-status%22:[%22passed-one%22],%22chamber%22:%22House%22}&pageSize=250&page=' + str(i))
    tree = html.fromstring(page.content)
    bills = tree.xpath('//span[@class="result-heading"]//a/text()')
    temp |= set(bills)
    
hr_passed_one_house = set()
for bill in temp:
    hr_passed_one_house.add(int(''.join(list(filter(str.isdigit, bill)))))


In [5]:
def getJSON(webpage):
    headers = {
        'X-API-Key': KEY
    }
    r = requests.get(webpage, headers=headers)
    return r.json()

def getSponsors(bill_id, congress, progress = 'introduced'):
    '''Returns (PrimarySponsor, [cosponsors]) for specified bill_id and congress strings. Returns () if faulty'''
    '''progress = 'introduced', 'became law'                                                                '''
    edge = getJSON('https://api.propublica.org/congress/v1/' + str(congress) + '/bills/' + bill_id + '/cosponsors.json')
    toReturn = False
    if edge['status'] == 'OK':
        if progress =='introduced':
            toReturn = True
        elif progress == 'became law':
            toReturn = edge['results'][0]['latest_major_action'][:17] == 'Became Public Law'           
                
    if toReturn:
        return (edge['results'][0]['sponsor_id'], [cosponsor['cosponsor_id'] for cosponsor in edge['results'][0]['cosponsors']])
    else:
        return ()
    
def getHouseMembers(congress):
    '''takes congress session and Returns dictioanry of congress member ids with value 0'''
    house_members = getJSON('https://api.propublica.org/congress/v1/'+ str(congress) +'/house/members.json')
    houseList = []
    if house_members['status'] == 'OK':
        houseList = [(rep['id'],0) for rep in house_members['results'][0]['members']]
    return dict(houseList)

def getSenateMembers(congress):
    '''takes congress session and Returns dictioanry of congress member ids with value 0'''
    senate_members = getJSON('https://api.propublica.org/congress/v1/'+ str(congress) +'/senate/members.json')
    senateList = []
    if senate_members['status'] == 'OK':
        senateList = [(senator['id'],0) for senator in senate_members['results'][0]['members']]
    return dict(senateList)


def getNameIDPairing(congress):
    senate_members = getJSON('https://api.propublica.org/congress/v1/'+ str(congress) +'/senate/members.json')
    house_members = getJSON('https://api.propublica.org/congress/v1/'+ str(congress) +'/house/members.json')
    senateList, houseList = [],[]
    if senate_members['status'] == 'OK':
        senateList = [(senator['id'], senator['first_name'] + ' ' + senator['last_name'] + '(' + senator['party'] + ')') for senator in senate_members['results'][0]['members']]
    if house_members['status'] == 'OK':
        houseList = [(rep['id'],rep['first_name'] + ' ' + rep['last_name'] + '(' + rep['party'] + ')') for rep in house_members['results'][0]['members']]
    return dict(senateList + houseList)

def buildHouseGraph(members, congress, progress='introduced'):
    graph = {}
    for m in members:
        graph[m] = []
    
    for i in range(1, MOST_RECENT_HR_BILL+1):
        print("At HR{0}".format(i))
        edge = getSponsors("hr{0}".format(i), congress, progress)
        if edge: # not (,)
            for u in edge[1]:
                graph[u].append(edge[0])
        
    return graph

def buildSenateGraph(members, congress, progress='introduced'):
    graph = {}
    for m in members:
        graph[m] = []
    
    for i in range(1, MOST_RECENT_S_BILL+1):
        print("At S{0}".format(i))
        edge = getSponsors("s{0}".format(i), congress, progress)
        if edge: # not (,)
            for u in edge[1]:
                graph[u].append(edge[0])
        
    return graph

def buildHouseGraph_PassedAtLeastOneChamber(members, congress):
    graph = {}
    for m in members:
        graph[m] = []
    
    for i in sorted(list(hr_passed_one_house)):
        print("At HR{0}".format(i))
        edge = getSponsors("hr{0}".format(i), congress)
        if edge: # not (,)
            for u in edge[1]:
                graph[u].append(edge[0])
        
    return graph

def buildSenateGraph_PassedAtLeastOneChamber(members, congress):
    graph = {}
    for m in members:
        graph[m] = []
    
    for i in sorted(list(senate_passed_one_house)):
        print("At S{0}".format(i))
        edge = getSponsors("s{0}".format(i), congress, progress)
        if edge: # not (,)
            for u in edge[1]:
                graph[u].append(edge[0])
        
    return graph


def ranker(members, graph, p=.15, iterations=1000000):
    assert len(graph) == len(members)
    member_list = list(graph.keys())
    current = random.choice(member_list)
    members[current] += 1
    for i in range(iterations):
        if len(graph[current]) == 0 or random.random() < p:
            current = random.choice(member_list)
        else:
            neighbors = graph[current]
            current = neighbors[random.randint(0, len(neighbors)-1)]
        members[current] += 1
    return members


def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [8]:
house_members = getHouseMembers(CONGRESS)
senate_members = getSenateMembers(CONGRESS)
names = getNameIDPairing(CONGRESS)

save_obj(house_members, 'house' + str(CONGRESS) + '_members')
save_obj(senate_members, 'senate' + str(CONGRESS) + '_members')
save_obj(names, 'congress' + str(CONGRESS) + '_names')

In [None]:
unfilteredHouseGraph = buildHouseGraph(house_members, CONGRESS)
unfilteredSenateGraph = buildSenateGraph(senate_members, CONGRESS)
save_obj(unfilteredHouseGraph, 'house' + str(CONGRESS) + '_unfilteredgraph')
save_obj(unfilteredSenateGraph, 'senate' + str(CONGRESS) + '_unfilteredgraph')