In [11]:
from itertools import combinations
from math import comb

import numpy as np
import pandas as pd
from tqdm import tqdm

In [12]:
votes = pd.read_csv('H117_votes.csv', dtype={'icpsr': 'int'})
bill_data = pd.read_csv('H117_rollcalls.csv')
members = pd.read_csv('H117_members.csv', dtype={'icpsr': 'int'})

In [13]:
bill_data = bill_data[['rollnumber', 'date', 'yea_count', 'nay_count', 'vote_desc', 'vote_question']]
bill_data

Unnamed: 0,rollnumber,date,yea_count,nay_count,vote_desc,vote_question
0,1,2021-01-03,216,211,,Election of the Speaker
1,2,2021-01-03,371,2,Authorizing and directing the Speaker to admin...,On Agreeing to the Resolution
2,3,2021-01-04,214,204,Adopting the Rules of the House of Representat...,On Motion to Table the Motion to Postpone to a...
3,4,2021-01-04,214,196,Adopting the Rules of the House of Representat...,Table Motion to Refer
4,5,2021-01-04,217,204,Adopting the Rules of the House of Representat...,On Ordering the Previous Question
...,...,...,...,...,...,...
991,992,2022-12-22,337,79,Durbin Feeling Native American Languages Act o...,On Motion to Suspend the Rules and Pass
992,993,2022-12-22,380,35,Student Veteran Emergency Relief Act,On Motion to Suspend the Rules and Concur in t...
993,994,2022-12-23,215,206,Providing for consideration of the Senate amen...,On Agreeing to the Resolution
994,995,2022-12-23,193,227,,On Motion to Adjourn


In [14]:
is_yea = {1, 2, 3}
votes['vote'] = votes.cast_code.isin(is_yea)
votes = votes[['rollnumber', 'icpsr', 'vote']]
votes

Unnamed: 0,rollnumber,icpsr,vote
0,1,14066,False
1,1,14854,False
2,1,14863,False
3,1,14873,True
4,1,15019,True
...,...,...,...
428966,996,31103,True
428967,996,39301,True
428968,996,39305,True
428969,996,39307,True


In [15]:
member_vote_table = pd.pivot_table(votes, values='vote', columns='rollnumber', index='icpsr', fill_value=0)
member_vote_table

rollnumber,1,2,3,4,5,6,7,8,9,10,...,987,988,989,990,991,992,993,994,995,996
icpsr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14066,0,1,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
14854,0,1,0,0,0,1,0,1,1,1,...,1,1,1,1,1,1,1,0,1,0
14863,0,1,0,0,0,1,0,1,0,0,...,1,1,1,1,1,1,1,0,1,0
14873,1,1,1,1,1,0,1,1,0,0,...,1,1,1,1,1,1,1,1,0,1
15019,1,1,1,1,1,0,1,1,0,0,...,1,1,1,1,1,1,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39305,1,1,1,1,1,0,1,1,0,0,...,1,1,1,1,1,1,1,1,0,1
39307,1,0,1,1,1,0,1,1,0,0,...,1,1,1,1,1,1,1,1,0,1
91980,0,1,0,0,0,1,0,1,1,1,...,0,0,0,0,0,0,0,0,1,0
99912,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0


In [16]:
members = members[members.chamber.isin(['House'])]
party_dict = {100: 'dem', 200: 'rep'}
members['party'] = members.party_code.map(party_dict)
members = members[['icpsr', 'party', 'bioname', 'state_abbrev', 'nominate_dim1', 'nominate_dim2']]
members = members.rename(columns={'state_abbrev': 'state', 'bioname': 'name', 'nominate_dim1': 'dim1', 'nominate_dim2': 'dim2'})
members

Unnamed: 0,icpsr,party,name,state,dim1,dim2
2,20301,rep,"ROGERS, Mike Dennis",AL,0.362,0.462
3,21102,dem,"SEWELL, Terri",AL,-0.394,0.397
4,21193,rep,"BROOKS, Mo",AL,0.652,-0.417
5,21500,rep,"PALMER, Gary James",AL,0.677,0.095
6,22108,rep,"CARL, Jerry L.",AL,0.507,0.535
...,...,...,...,...,...,...
452,21970,rep,"STEIL, Bryan",WI,0.413,0.045
453,21989,rep,"TIFFANY, Thomas P.",WI,0.643,-0.206
454,22115,rep,"FITZGERALD, Scott",WI,0.612,0.233
455,29769,dem,"KIND, Ron",WI,-0.260,-0.080


In [17]:
def entropy(bills):
    restricted_votes = member_vote_table[list(bills)]
    counts = restricted_votes.value_counts()
    as_proportions = counts / counts.sum()
    return (- np.log2(as_proportions) * as_proportions).sum()

In [18]:
unique_bills = list(votes.rollnumber.unique())


def optimal_entropy(k):
    return max((combo for combo in tqdm(combinations(unique_bills, r=k), total=comb(len(unique_bills), k))),
               key=entropy)


def greedy_entropy(k):
    cur = []
    remaining_bills = set(unique_bills)
    for _ in tqdm(range(k)):
        best_addition = max((bill for bill in remaining_bills), key=lambda bill: entropy(cur + [bill]))
        cur.append(best_addition)
        remaining_bills.remove(best_addition)
    return cur

In [19]:
# best_two = optimal_entropy(2) # [828, 864]
best_two = [828, 864]
print(entropy(best_two))
print(best_two)

1.9119853345466924
[828, 864]


In [20]:
greedy_five = greedy_entropy(5)
print([entropy(greedy_five[:i]) for i in range(1, 6)])
print(greedy_five)

100%|██████████| 5/5 [00:08<00:00,  1.71s/it]

[0.9999965460782858, 1.8832425425314643, 2.6946189479224607, 3.4714702951135163, 4.171985222752786]
[40, 864, 632, 141, 786]





In [232]:
def bill_details(bills):
    return bill_data[bill_data.rollnumber.isin(bills)]

In [233]:
bill_details(best_two)

Unnamed: 0,rollnumber,date,yea_count,nay_count,vote_desc,vote_question
827,828,2022-07-20,238,191,,On Agreeing to the Amendment
863,864,2022-07-29,303,89,To include certain computer-related projects i...,On Motion to Suspend the Rules and Pass


In [222]:
bill_details(greedy_five)

Unnamed: 0,rollnumber,date,yea_count,nay_count,vote_desc,vote_question
39,40,2021-02-26,229,198,,On Agreeing to the Amendment
140,141,2021-05-13,323,93,Behavioral Intervention Guidelines Act of 2021,On Motion to Suspend the Rules and Pass
631,632,2022-05-16,313,105,DHS Roles and Responsibilities in Cyber Space Act,"On Motion to Suspend the Rules and Pass, as Am..."
785,786,2022-07-14,244,179,,On Agreeing to the Amendment
863,864,2022-07-29,303,89,To include certain computer-related projects i...,On Motion to Suspend the Rules and Pass


In [234]:
def display_member_data(bills, num=10):
    for response, ids in member_vote_table.groupby(bills).groups.items():
        print(response, len(ids))
        print(members[members.icpsr.isin(ids)].drop(['icpsr'], axis=1).head(num))
display_member_data(best_two)

(0, 0) 62
   party                         bioname state   dim1   dim2
9    rep             YOUNG, Donald Edwin    AK  0.283  0.022
10   dem           PELTOLA, Mary Sattler    AK -0.126  0.298
11   rep  RADEWAGEN, Aumua Amata Coleman    AS  0.403  0.076
25   dem                   PELOSI, Nancy    CA -0.490 -0.203
28   rep                    NUNES, Devin    CA  0.450  0.223
33   rep                 MCCARTHY, Kevin    CA  0.457  0.221
65   dem                    JACOBS, Sara    CA -0.332 -0.475
76   dem                    LEE, Barbara    CA -0.679 -0.576
81   rep            BUCK, Kenneth Robert    CO  0.712 -0.431
85   dem                  DeGETTE, Diana    CO -0.431 -0.306
(0, 1) 157
   party              bioname state   dim1   dim2
2    rep  ROGERS, Mike Dennis    AL  0.362  0.462
4    rep           BROOKS, Mo    AL  0.652 -0.417
5    rep   PALMER, Gary James    AL  0.677  0.095
6    rep       CARL, Jerry L.    AL  0.507  0.535
7    rep         MOORE, Barry    AL  0.640 -0.119
8    rep

In [235]:
display_member_data(greedy_five)

(0, 0, 0, 0, 0) 19
    party                         bioname state   dim1   dim2
10    dem           PELTOLA, Mary Sattler    AK -0.126  0.298
11    rep  RADEWAGEN, Aumua Amata Coleman    AS  0.403  0.076
25    dem                   PELOSI, Nancy    CA -0.490 -0.203
81    rep            BUCK, Kenneth Robert    CO  0.712 -0.431
92    dem          NORTON, Eleanor Holmes    DC -0.498 -0.047
135   dem            SAN NICOLAS, Michael    GU -0.410 -0.520
167   rep                YAKYM, Rudy, III    IN  0.456  0.636
183   dem                RICHMOND, Cedric    LA -0.486  0.332
230   rep                   FINSTAD, Brad    MN  0.555  0.223
293   dem                   RYAN, Patrick    NY -0.265  0.268
(0, 0, 0, 0, 1) 3
    party                  bioname state   dim1   dim2
132   rep  GREENE, Marjorie Taylor    GA  0.800 -0.600
181   rep           MASSIE, Thomas    KY  0.680 -0.733
317   rep              JORDAN, Jim    OH  0.717 -0.200
(0, 0, 0, 1, 0) 13
    party              bioname state   dim