In [1]:
import pandas as pd

from entropy_funcs import prediction_entropy, bill_entropy, greedy_prediction_entropy, greedy_bill_entropy, optimal_prediction_entropy, optimal_bill_entropy

In [2]:
votes_orig = pd.read_csv('data/H117_votes.csv', dtype={'icpsr': 'int'})
bill_data_orig = pd.read_csv('data/H117_rollcalls.csv')
members_orig = pd.read_csv('data/H117_members.csv', dtype={'icpsr': 'int'})

In [3]:
bill_data = bill_data_orig[['rollnumber', 'date', 'yea_count', 'nay_count', 'vote_desc', 'vote_question']]
bill_data

Unnamed: 0,rollnumber,date,yea_count,nay_count,vote_desc,vote_question
0,1,2021-01-03,216,211,,Election of the Speaker
1,2,2021-01-03,371,2,Authorizing and directing the Speaker to admin...,On Agreeing to the Resolution
2,3,2021-01-04,214,204,Adopting the Rules of the House of Representat...,On Motion to Table the Motion to Postpone to a...
3,4,2021-01-04,214,196,Adopting the Rules of the House of Representat...,Table Motion to Refer
4,5,2021-01-04,217,204,Adopting the Rules of the House of Representat...,On Ordering the Previous Question
...,...,...,...,...,...,...
991,992,2022-12-22,337,79,Durbin Feeling Native American Languages Act o...,On Motion to Suspend the Rules and Pass
992,993,2022-12-22,380,35,Student Veteran Emergency Relief Act,On Motion to Suspend the Rules and Concur in t...
993,994,2022-12-23,215,206,Providing for consideration of the Senate amen...,On Agreeing to the Resolution
994,995,2022-12-23,193,227,,On Motion to Adjourn


In [4]:
def bill_details(bills):
    return bill_data[bill_data.rollnumber.isin(bills)]

In [5]:
is_yea = {1, 2, 3}
votes_orig['vote'] = votes_orig.cast_code.isin(is_yea)
votes = votes_orig[['rollnumber', 'icpsr', 'vote']]
votes

Unnamed: 0,rollnumber,icpsr,vote
0,1,14066,False
1,1,14854,False
2,1,14863,False
3,1,14873,True
4,1,15019,True
...,...,...,...
428966,996,31103,True
428967,996,39301,True
428968,996,39305,True
428969,996,39307,True


In [6]:
votes_orig.cast_code.value_counts()

cast_code
1    290343
6    130181
9      8354
7        93
Name: count, dtype: int64

In [7]:
vote_mapping = {
    1: 0,
    6: 1,
    7: 2,
    9: 2
}
votes_orig['vote_with_neutral'] = votes_orig.cast_code.map(vote_mapping)
votes_neutral = votes_orig[['rollnumber', 'icpsr', 'vote_with_neutral']]
votes_neutral


Unnamed: 0,rollnumber,icpsr,vote_with_neutral
0,1,14066,1
1,1,14854,1
2,1,14863,1
3,1,14873,0
4,1,15019,0
...,...,...,...
428966,996,31103,0
428967,996,39301,0
428968,996,39305,0
428969,996,39307,0


In [8]:
member_vote_table = pd.pivot_table(votes, values='vote', columns='rollnumber', index='icpsr', fill_value=0)
member_vote_table

rollnumber,1,2,3,4,5,6,7,8,9,10,...,987,988,989,990,991,992,993,994,995,996
icpsr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14066,0,1,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
14854,0,1,0,0,0,1,0,1,1,1,...,1,1,1,1,1,1,1,0,1,0
14863,0,1,0,0,0,1,0,1,0,0,...,1,1,1,1,1,1,1,0,1,0
14873,1,1,1,1,1,0,1,1,0,0,...,1,1,1,1,1,1,1,1,0,1
15019,1,1,1,1,1,0,1,1,0,0,...,1,1,1,1,1,1,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39305,1,1,1,1,1,0,1,1,0,0,...,1,1,1,1,1,1,1,1,0,1
39307,1,0,1,1,1,0,1,1,0,0,...,1,1,1,1,1,1,1,1,0,1
91980,0,1,0,0,0,1,0,1,1,1,...,0,0,0,0,0,0,0,0,1,0
99912,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0


In [9]:
member_vote_table_neutral = pd.pivot_table(votes_neutral, values='vote_with_neutral', columns='rollnumber',
                                           index='icpsr', fill_value=2)
member_vote_table_neutral

rollnumber,1,2,3,4,5,6,7,8,9,10,...,987,988,989,990,991,992,993,994,995,996
icpsr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14066,1,0,2,2,1,0,1,0,1,1,...,2,2,2,2,2,2,2,2,2,2
14854,1,0,1,1,1,0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,1
14863,1,0,1,1,1,0,1,0,1,1,...,0,0,0,0,0,0,0,1,0,1
14873,0,0,0,0,0,1,0,0,1,1,...,0,0,0,0,0,0,0,0,1,0
15019,0,0,0,0,0,1,0,0,1,1,...,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39305,0,0,0,0,0,1,0,0,1,1,...,0,0,0,0,0,0,0,0,1,0
39307,0,2,0,0,0,1,0,0,1,1,...,0,0,0,0,0,0,0,0,1,0
91980,1,0,1,1,1,0,1,0,0,0,...,1,1,1,1,1,1,1,1,0,1
99912,2,2,2,2,2,2,2,2,0,0,...,2,2,2,2,2,2,2,2,2,2


In [10]:
members = members_orig[members_orig.chamber.isin(['House'])]
party_dict = {100: 'dem', 200: 'rep'}
members['party'] = members.party_code.map(party_dict)
members = members[['icpsr', 'party', 'bioname', 'state_abbrev', 'nominate_dim1', 'nominate_dim2']]
members = members.rename(
    columns={'state_abbrev': 'state', 'bioname': 'name', 'nominate_dim1': 'dim1', 'nominate_dim2': 'dim2'})
members

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  members['party'] = members.party_code.map(party_dict)


Unnamed: 0,icpsr,party,name,state,dim1,dim2
2,20301,rep,"ROGERS, Mike Dennis",AL,0.362,0.462
3,21102,dem,"SEWELL, Terri",AL,-0.394,0.397
4,21193,rep,"BROOKS, Mo",AL,0.652,-0.417
5,21500,rep,"PALMER, Gary James",AL,0.677,0.095
6,22108,rep,"CARL, Jerry L.",AL,0.507,0.535
...,...,...,...,...,...,...
452,21970,rep,"STEIL, Bryan",WI,0.413,0.045
453,21989,rep,"TIFFANY, Thomas P.",WI,0.643,-0.206
454,22115,rep,"FITZGERALD, Scott",WI,0.612,0.233
455,29769,dem,"KIND, Ron",WI,-0.260,-0.080


In [11]:
# best_two_bill = optimal_bill_entropy(2, member_vote_table) # [828, 864]
best_two_bill = [828, 864]
print(bill_entropy(best_two_bill, member_vote_table))
print(best_two_bill)

1.9119853345466926
[828, 864]


In [12]:
# greedy_bill_five = greedy_bill_entropy(5, member_vote_table)
greedy_bill_five = [40, 864, 632, 141, 786]
print([bill_entropy(greedy_bill_five[:i], member_vote_table) for i in range(1, 6)])
print(greedy_bill_five)

[0.9999965460782858, 1.8832425425314643, 2.6946189479224607, 3.471470295113517, 4.171985222752787]
[40, 864, 632, 141, 786]


In [13]:
# prediction_best = optimal_prediction_entropy(1, member_vote_table)  # 587
prediction_best = [587]
prediction_entropy(prediction_best, member_vote_table)

0.23172260898819205

In [14]:
prediction_greedy_3 = greedy_prediction_entropy(5, member_vote_table) # [587, 699, 136, 83, 969]
# prediction_greedy_5 = [587, 699, 136, 83, 969]
prediction_greedy_3

100%|██████████| 996/996 [11:40<00:00,  1.42it/s]
 15%|█▌        | 151/995 [03:27<19:18,  1.37s/it]

KeyboardInterrupt



In [None]:
# greedy_five_neutral = greedy_bill_entropy(5, True)
greedy_five_neutral = [385, 962, 141, 864, 17]
# best_two_neutral = optimal_bill_entropy(2, True)
best_two_neutral = (70, 962)
greedy_four_prediction_neutral = greedy_prediction_entropy(4, True)
best_two_prediction_neutral = optimal_prediction_entropy(2, True)

  0%|          | 0/996 [00:00<?, ?it/s]Process SpawnPoolWorker-725:
Process SpawnPoolWorker-724:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/Cellar/python@3.11/3.11.4/Frameworks/Python.framework/Versions/3.11/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/local/Cellar/python@3.11/3.11.4/Frameworks/Python.framework/Versions/3.11/lib/python3.11/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/Cellar/python@3.11/3.11.4/Frameworks/Python.framework/Versions/3.11/lib/python3.11/multiprocessing/pool.py", line 114, in worker
    task = get()
           ^^^^^
  File "/usr/local/Cellar/python@3.11/3.11.4/Frameworks/Python.framework/Versions/3.11/lib/python3.11/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute 'prediction_entropy_wrapper'

In [72]:
for i in range(1, 6):
    print(bill_entropy(greedy_five_neutral[:i], True))
greedy_five_neutral

1.5174015533135248
2.6186325827563124
3.5990852677463057
4.454005104434073
5.212124248116338


[385, 962, 141, 864, 17]

In [73]:
bill_entropy(best_two_neutral, True)
best_two_neutral

(70, 962)

In [14]:
for i in range(1, 6):
    print(prediction_entropy(prediction_greedy_5[:i]))

0.33464104383288507
0.26371094654419247
0.23175889423867013
0.20403370808890298
0.18396010372145977


In [15]:
bill_details(prediction_greedy_5)
# prediction_greedy_5

Unnamed: 0,rollnumber,date,yea_count,nay_count,vote_desc,vote_question
82,83,2021-03-17,242,174,,On Agreeing to the Amendment
135,136,2021-05-12,349,74,,On Motion to Suspend the Rules and Pass Certai...
586,587,2022-04-28,220,205,Providing for consideration of the bill (S. 35...,On Agreeing to the Resolution
698,699,2022-06-08,411,11,Strengthening Subcontracting for Small Busines...,On Motion to Suspend the Rules and Pass
968,969,2022-12-14,349,80,,On Motion to Suspend the Rules and Pass Certai...


In [16]:
def display_member_data(bills, num=10):
    for response, ids in member_vote_table.groupby(bills).groups.items():
        print(response, len(ids))
        print(members[members.icpsr.isin(ids)].drop(['icpsr'], axis=1).head(num))


display_member_data(prediction_greedy_5[:3], num=20)

(0, 0, 0) 33
    party                            name state   dim1   dim2
10    dem           PELTOLA, Mary Sattler    AK -0.126  0.298
11    rep  RADEWAGEN, Aumua Amata Coleman    AS  0.403  0.076
14    rep                     GOSAR, Paul    AZ  0.698 -0.478
17    rep                BIGGS, Andrew S.    AZ  0.838 -0.546
25    dem                   PELOSI, Nancy    CA -0.490 -0.203
69    rep                  CONWAY, Connie    CA  0.376  0.295
81    rep            BUCK, Kenneth Robert    CO  0.712 -0.431
92    dem          NORTON, Eleanor Holmes    DC -0.498 -0.047
120   dem           HASTINGS, Alcee Lamar    FL -0.564  0.157
125   rep             HICE, Jody Brownlow    GA  0.797 -0.242
132   rep         GREENE, Marjorie Taylor    GA  0.800 -0.600
135   dem            SAN NICOLAS, Michael    GU -0.410 -0.520
167   rep                YAKYM, Rudy, III    IN  0.456  0.636
181   rep                  MASSIE, Thomas    KY  0.680 -0.733
183   dem                RICHMOND, Cedric    LA -0.486  0

KeyError: (40,)

In [235]:
display_member_data(greedy_five)

(0, 0, 0, 0, 0) 19
    party                         bioname state   dim1   dim2
10    dem           PELTOLA, Mary Sattler    AK -0.126  0.298
11    rep  RADEWAGEN, Aumua Amata Coleman    AS  0.403  0.076
25    dem                   PELOSI, Nancy    CA -0.490 -0.203
81    rep            BUCK, Kenneth Robert    CO  0.712 -0.431
92    dem          NORTON, Eleanor Holmes    DC -0.498 -0.047
135   dem            SAN NICOLAS, Michael    GU -0.410 -0.520
167   rep                YAKYM, Rudy, III    IN  0.456  0.636
183   dem                RICHMOND, Cedric    LA -0.486  0.332
230   rep                   FINSTAD, Brad    MN  0.555  0.223
293   dem                   RYAN, Patrick    NY -0.265  0.268
(0, 0, 0, 0, 1) 3
    party                  bioname state   dim1   dim2
132   rep  GREENE, Marjorie Taylor    GA  0.800 -0.600
181   rep           MASSIE, Thomas    KY  0.680 -0.733
317   rep              JORDAN, Jim    OH  0.717 -0.200
(0, 0, 0, 1, 0) 13
    party              bioname state   dim