In [5]:
%matplotlib inline
import pandas as pd
import json
import numpy as np
import os

In [7]:
content_list = []
years = []
yearVotes = {}
congress_no = "data/108/"
# Documentation on the votes https://github.com/unitedstates/congress/wiki/votes
congress_votes = congress_no + "votes/"

for content in os.listdir(congress_no):
    if content != '.DS_Store':
        content_list.append(content)
        
for year in os.listdir(congress_votes):
    years.append(year)    
    house_count = 0
    senate_count = 0
    for chamber in os.listdir(congress_votes + year):
        if chamber[0] == "h":
            house_count +=1
        if chamber[0] == "s":
            senate_count += 1
    yearVotes[year] = {'senate_count':senate_count, 'house_count':house_count}

yearVotes

{'2003': {'house_count': 677, 'senate_count': 459},
 '2004': {'house_count': 544, 'senate_count': 216}}

In [661]:
# "Foreign" related bills and amendments in the House
res_docs = []
wordSearch = "Foreign"
for i in range(1, house_count+1):
    path = house_path + str(i) + '/data.json'
    f = open(path, 'r')
    x = json.loads(f.read())
    if wordSearch in x["question"]:
        res_docs.append(i)
        
res_docs

[314, 369, 424, 429, 539]

In [34]:
# Parse Vote Document
# https://www.govtrack.us/data/congress/108/votes/2003/h424/data.json

class Processor():
    def __init__(self, congress_num, word_search):
        self.congress_num = congress_num
        self.vote_types = ["Nay", "Not Voting", "Present", "Yea"]
        self.bill_types = ['hconres', 'hjres', 'hres', 'sconres', 'sjres', 'sres', 'hr', 's']
        self.res_dict = {}
        self.res_dict['bill_title'] = []
        self.res_dict['isAmendment'] = []
        self.res_dict['result'] = []
        self.res_dict['date'] = []
        self.res_dict['question'] = []
        self.res_dict['chamber'] = []
        self.res_dict['year'] = []
        self.res_dict['bill_long_text'] = []
        self.res_dict["top_subject"] = []
        self.res_dict['amendment_type_num'] = []
        self.res_dict["bill_sponsor_name"] = []
        for vote_type in self.vote_types:
            self.res_dict[vote_type + " (Democrats)"] = []
            self.res_dict[vote_type + " (Republicans)"] = []
            self.res_dict[vote_type + " (Others)"] = []
        self.word_search = word_search
        self.chambers = {"/h":"house_count", "/s":"senate_count"}

    def find_chamber_count(self):
        self.years = []
        self.count_arrangements = {}
        self.congress_votes = self.congress_num+"votes/"
        for year in os.listdir(self.congress_votes):
            self.years.append(year)    
            self.house_count = 0
            self.senate_count = 0
            for chamber in os.listdir(self.congress_votes + year):
                if chamber[0] == "h":
                    self.house_count +=1
                if chamber[0] == "s":
                    self.senate_count += 1
            self.count_arrangements[year] = {'senate_count':self.senate_count, 'house_count':self.house_count}
        return self.count_arrangements

    def read_chamber_data(self):
        chambers_in_years = self.find_chamber_count()
        self.res_docs = []
        for chamber in self.chambers:
            for year in self.years:
                chamber_path = self.congress_votes+year+chamber
                for i in range(1, chambers_in_years[year][self.chambers[chamber]]+1):
                    path = chamber_path + str(i) + '/data.json'
                    f = open(path, 'r')
                    x = json.loads(f.read())
                    if self.word_search in x["question"]:
                        self.res_docs.append(i)
                        res_all = self.arrange_dict(i, path, self.chambers[chamber])
        return res_all
        
    def arrange_dict(self, vote_index, vote_path, chamber):
        vote = json.loads(open(vote_path, 'r').read())
        votes_info = vote['votes']
        if "Aye" in votes_info:
            votes_info["Yea"] = votes_info["Aye"]
        if "No" in votes_info:
            votes_info["Nay"] = votes_info["No"]

        if vote["category"] != "amendment":
            self.res_dict['amendment_type_num'].append("no")
            self.res_dict['isAmendment'].append(False)
        else:
            self.res_dict['isAmendment'].append(True)
            self.res_dict['amendment_type_num'].append(str(vote['amendment']['type'][0])+"amdt"+str(vote['amendment']['number']))

        self.res_dict['bill_title'].append(str(vote['bill']['type'] + str(vote['bill']['number'])))
        self.res_dict['bill_long_text'].append(self.get_bill_info(vote_index,vote_path)[0])
        self.res_dict['top_subject'].append(self.get_bill_info(vote_index,vote_path)[1])
        self.res_dict['bill_sponsor_name'].append(self.get_bill_info(vote_index,vote_path)[2])
        self.res_dict['result'].append(str(vote['result_text']))
        self.res_dict['date'].append(str(vote['date']))
        self.res_dict['year'].append(vote['date'].split('-')[0])
        self.res_dict['question'].append(str(vote['question']))
        self.res_dict['chamber'].append(str(chamber.split('_')[0]))
        for vote_type in self.vote_types:
            dem_counter = 0
            rep_counter = 0
            ind_counter = 0
            for voterObj in vote["votes"][vote_type]:
                name = voterObj["display_name"]
                party = voterObj["party"]
                if party == 'D':
                    dem_counter +=1
                if party == 'R':
                    rep_counter +=1
                if party == 'I':
                    ind_counter +=1
            self.res_dict[vote_type + " (Democrats)"].append(dem_counter)
            self.res_dict[vote_type + " (Republicans)"].append(rep_counter)
            self.res_dict[vote_type + " (Others)"].append(ind_counter)
        return self.res_dict

    def get_bill_info(self, vote_index, vote_path):
        vote = json.loads(open(vote_path, 'r').read())
        bill_type_num = str(vote['bill']['type']) + str(vote['bill']['number'])
        for bill_type in self.bill_types:
            if bill_type in bill_type_num:
                bill_data_path = self.congress_num + "bills/" + bill_type + "/" + str(bill_type_num) + "/data.json"
                bill = json.loads(open(bill_data_path, 'r').read())
                long_text_top_subject_array = [bill["summary"]["text"], bill["subjects_top_term"], bill["sponsor"]["name"]]
                return long_text_top_subject_array

pr = Processor("data/108/","Foreign")

In [35]:
df = pd.DataFrame(data=pr.read_chamber_data())
filename_dest = "word_Foreign_congress_108_years_2003_2004.csv"
df.to_csv(filename_dest, index=False)

In [36]:
data_info = pd.read_csv("./word_Foreign_congress_108_years_2003_2004.csv")
data_info.head(3)

Unnamed: 0,Nay (Democrats),Nay (Others),Nay (Republicans),Not Voting (Democrats),Not Voting (Others),Not Voting (Republicans),Present (Democrats),Present (Others),Present (Republicans),Yea (Democrats),...,bill_long_text,bill_sponsor_name,bill_title,chamber,date,isAmendment,question,result,top_subject,year
0,43,1,9,4,0,0,0,0,0,1,...,"Foreign Relations Authorization Act, Fiscal Ye...","Lugar, Richard G.",s925,senate,2003-07-09T17:23:00-04:00,False,On the Motion to Table S.Amdt. 1141 to S.Amdt....,Motion to Table Failed (43-53),International affairs,2003
1,0,0,43,3,0,0,0,0,0,45,...,"Foreign Relations Authorization Act, Fiscal Ye...","Lugar, Richard G.",s925,senate,2003-07-10T11:57:00-04:00,True,On the Amendment S.Amdt. 1164 to S.Amdt. 1136 ...,Amendment Agreed to (54-43),International affairs,2003
2,0,0,18,4,0,0,0,0,0,44,...,"Foreign Relations Authorization Act, Fiscal Ye...","Lugar, Richard G.",s925,senate,2003-07-10T14:42:00-04:00,True,On the Amendment S.Amdt. 1174 to S.Amdt. 1136 ...,Amendment Agreed to (78-18),International affairs,2003
