Skip to content
Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
78 lines (69 sloc) 2.57 KB
"""
@author: henryre
"""
import pandas as pd
import numpy as np
import urllib, json, argparse
# Define constants
# HoR number
n_hr = '114'
# Base URL for voting records
root_url = 'https://www.govtrack.us/data/congress/' + n_hr + '/votes/2015/h'
# Dictionaries mapping voting constants
vote_types = {'Aye': 1, 'Yea': 1, 'Nay': -1, 'No': -1, 'Not Voting': 0, 'Present': 0}
party_types = {'D': 1, 'R': 2}
# Strip spaces and non-ASCII
def simple_text(s):
return ''.join([i if ord(i) < 128 else '' for i in s.replace(' ', '')])
# Get JSON data for particular vote
def load_vote_data(n_m):
url = root_url + n_m + '/data.json'
response = urllib.urlopen(url)
return json.loads(response.read())
# Get names and parties of voters in all votes
def get_names(major):
party = dict()
for m in major:
# Get vote JSON record
data = load_vote_data(m)
# Read the names and parties
for v_t in data['votes'].keys():
for v in data['votes'][v_t]:
if v['display_name'] not in party:
party[v['display_name']] = v['party']
# Return dict of names (keys) and parties (values)
return party
# Update voting matrix with new vote
def update_votes(votes, names, n_m, m_ind):
# Get vote data
data = load_vote_data(n_m)
# Update votes for all names
for v_t in data['votes'].keys():
for v in data['votes'][v_t]:
votes[names.index(v['display_name']), m_ind] = vote_types[v_t]
return votes
# Get the votes for MAJOR bills
def get_votes(major):
# Get names of reps who voted on MAJOR bills
party = get_names(major)
names_hr = party.keys()
# Get votes on all MAJOR bills for all NAMES_HR
votes = np.zeros((len(names_hr), len(major)+1), dtype=np.int)
for p in xrange(len(names_hr)):
votes[p, -1] = party_types[party[names_hr[p]]]
for m in major:
votes = update_votes(votes, names_hr, m, major.index(m))
# Define Panda data frame for R happiness
colnames = ['h' + m for m in major]
colnames.append('Party')
return pd.DataFrame(votes, index = [simple_text(n) for n in names_hr], columns = colnames)
if __name__ == '__main__':
# Get output from cmd line
parser = argparse.ArgumentParser(description='Retrieve HoR voting records')
parser.add_argument('output', type=str, help='Path to output CSV')
args = parser.parse_args()
# Define major votes and get voting matrix
major = ['58', '75', '104', '106', '109', '144', '223', '224', '361', '374']
vote_df = get_votes(major)
# Write to csv
vote_df.to_csv(args.output)
You can’t perform that action at this time.