In [1]:
import csv
from collections import defaultdict,Counter
from pprint import pprint
import glob
from typing import NamedTuple,DefaultDict,Tuple,List

In [2]:
#Data structure
VoteValue=int
Senator=NamedTuple('Senator',[('name',str),('party',str),('state',str)])
acummulated_record:DefaultDict[Senator,list[VoteValue]]=defaultdict(list) #type:DefaultDict[Senator,List[VoteValue]]
vote_value: DefaultDict[str,VoteValue]={'Yea':1,'Nay':-1,'Not Voting':0} #Type:Dict[str,VoteValue]
VoteHistory=Tuple[VoteValue,...]

In [3]:
# Load votes and sort them by senators (it was sorted by topic)
for filename in glob.glob('senate_data/*.csv'):
    with open(filename,encoding='utf-8') as f:
        reader=csv.reader(f)
        vote_topic=next(reader)
        headers=next(reader)
        for person, state, district, vote, name, party in reader:
            senator=Senator(name,party,state)
            acummulated_record[senator].append(vote_value[vote])


In [4]:
print(acummulated_record[senator])

[-1, -1, 1, -1, -1, 1, -1, 1, 1, 1, 1, 1, -1, -1, 1, 1, 1, -1, 1, 1, 1, -1, -1, 1, 1, 1, 1, -1]


In [5]:
# Transform the record into a plain dict that maps to tuple of votes
record:dict[Senator,VoteHistory]={senator:tuple(votes) for senator,votes in acummulated_record.items()}

In [6]:
#Use K-means to locate the cluster centroins for the pattern of votes,assign each senator to the nearest cluster
from kmeans import k_means,assign_data

In [7]:
centers,labeled=k_means(record.values(),k=3)

In [8]:
clustered_vots=assign_data(centers,record.values())

In [9]:
#Build a reverse mapping from a vote history to a list of senators who voted that way 

In [10]:
votes_to_senators:DefaultDict[VoteHistory,list[Senator]]=defaultdict(list)
for senator,votehistory in record.items():
    votes_to_senators[votehistory].append(senator)

In [11]:
sum(len(cluster) for cluster in votes_to_senators.values())==100

True

In [12]:
for i, votes_in_cluster in enumerate(clustered_vots.values(),start=1):
    print(f'===================== Voting Cluster {i} ===================')
    party_totals=Counter()
    for votes in set(votes_in_cluster):
        for senator in votes_to_senators[votes]:
            party_totals[senator.party]+=1
            print(senator)
    print(party_totals)

Senator(name='Sen. Kelly Ayotte [R, 2011-2016]', party='Republican', state='NH')
Senator(name='Sen. Lisa Murkowski [R, 2003-2022]', party='Republican', state='AK')
Senator(name='Sen. Patrick “Pat” Toomey [R, 2011-2022]', party='Republican', state='PA')
Senator(name='Sen. Heidi Heitkamp [D, 2013-2018]', party='Democrat', state='ND')
Senator(name='Sen. James “Jim” Inhofe [R, 1994-2026]', party='Republican', state='OK')
Senator(name='Sen. Mike Rounds [R, 2015-2026]', party='Republican', state='SD')
Senator(name='Sen. David Vitter [R, 2005-2016]', party='Republican', state='LA')
Senator(name='Sen. Shelley Capito [R, 2015-2026]', party='Republican', state='WV')
Senator(name='Sen. Robert “Rob” Portman [R, 2011-2022]', party='Republican', state='OH')
Senator(name='Sen. Jerry Moran [R, 2011-2022]', party='Republican', state='KS')
Senator(name='Sen. Cory Gardner [R, 2015-2020]', party='Republican', state='CO')
Senator(name='Sen. Dan Sullivan [R, 2015-2026]', party='Republican', state='AK')
Sena