In [47]:
import numpy as np
import pandas as pd

In [48]:
url = 'https://voteview.com/static/data/out/members/HS119_members.csv'
ideaology = pd.read_csv(url)
ideaology.head(3).T

Unnamed: 0,0,1,2
congress,119,119,119
chamber,House,House,House
icpsr,20301,21102,21500
state_icpsr,41,41,41
district_code,3,7,6
state_abbrev,AL,AL,AL
party_code,200,100,200
occupancy,,,
last_means,,,
bioname,"ROGERS, Mike Dennis","SEWELL, Terri","PALMER, Gary James"


In [49]:
cols_to_keep = ['bioname', 'chamber', 'party_code', 'nominate_dim1']
crosswalk_cols = ['bioname', 'icpsr', 'state_abbrev', 'district_code', 'bioguide_id']

crosswalk = ideaology[crosswalk_cols]
ideaology = ideaology[cols_to_keep]
ideaology['party_code'].value_counts()

party_code
200    279
100    264
328      2
Name: count, dtype: int64

In [50]:
replace_map = {200: 'Republican', 100: 'Democrat', 328: 'Independent'}
ideaology['party'] = ideaology['party_code'].replace(replace_map)
ideaology['party'].value_counts()

party
Republican     279
Democrat       264
Independent      2
Name: count, dtype: int64

In [51]:
ideaology = ideaology.drop('party_code', axis=1)
ideaology = ideaology.rename({'nominate_dim1': 'left_right_ideology'}, axis=1)
ideaology.sort_values('left_right_ideology', ascending=False)

Unnamed: 0,bioname,chamber,left_right_ideology,party
398,"GILL, Brandon",House,0.981,Republican
296,"HARRIGAN, Pat",House,0.981,Republican
443,"TUBERVILLE, Thomas Hawley (Tommy)",Senate,0.936,Republican
493,"SCHMITT, Eric Stephen",Senate,0.918,Republican
477,"PAUL, Rand",Senate,0.891,Republican
...,...,...,...,...
429,"RANDALL, Emily",House,-0.685,Democrat
485,"WARREN, Elizabeth",Senate,-0.744,Democrat
401,"TURNER, Sylvester",House,-0.746,Democrat
381,"GARCIA, Sylvia",House,-0.781,Democrat


In [52]:
ideaology_crosswalk = pd.merge(ideaology, crosswalk, on='bioname', how='outer', validate='1:1', indicator='matched')
ideaology_crosswalk['matched'].value_counts()

matched
both          545
left_only       0
right_only      0
Name: count, dtype: int64

In [53]:
ideaology_crosswalk = ideaology_crosswalk.drop('matched', axis=1)
ideaology_crosswalk.query('state_abbrev == "VA"')

Unnamed: 0,bioname,chamber,left_right_ideology,party,icpsr,state_abbrev,district_code,bioguide_id
31,"BEYER, Donald Sternoff Jr.",House,-0.395,Democrat,21554,VA,8,B001292
80,"CLINE, Benjamin",House,0.716,Republican,21908,VA,6,C001118
90,"CONNOLLY, Gerald E. (Gerry)",House,-0.309,Democrat,20952,VA,11,C001078
195,"GRIFFITH, H. Morgan",House,0.51,Republican,21191,VA,9,G000568
252,"KAINE, Timothy Michael (Tim)",Senate,-0.243,Democrat,41305,VA,0,K000384
265,"KIGGANS, Jennifer",House,0.26,Republican,22335,VA,2,K000399
318,"MCCLELLAN, Jennifer",House,-0.55,Democrat,22374,VA,4,M001227
324,"MCGUIRE, John J., III",House,0.673,Republican,22539,VA,5,M001239
445,"SCOTT, Robert C.",House,-0.45,Democrat,39307,VA,3,S000185
477,"SUBRAMANYAM, Suhas",House,-0.301,Democrat,22554,VA,10,S001230


## Vote similarity matrix

In [54]:
url = 'https://voteview.com/static/data/out/votes/HS119_votes.csv'
votes = pd.read_csv(url)
votes.head(3).T

Unnamed: 0,0,1,2
congress,119,119,119
chamber,House,House,House
rollnumber,1,1,1
icpsr,14854,14863,14873
cast_code,1,1,6
prob,99.6,78.2,100.0


### pd.merge(data1, data2, on, how, validate, indicator)
Six arguments:
* data1, data2: two dataframes we want to join
* on: column(s) whos value(s) the dataframes share - if not the same name, use left_on and right_on
* how: what to do with rows that do not have a match in the other dataset
    * inner: drop unmatched rows
    * outer/full: keep all rows, fill unmatched rows with missing values in other columns
    * left: keep all rows in data1, drop unmatched rows in data2
    * right: keep all rows in data2, drop unmatched rows in data1  
    Note: it is a good idea to join outer first to see if there are any problems with matching
* validate: one_to_one, many_to_one, one_to_many, many_to_many - sets an expectation for how many rows in data2 one row in data1 will match to, if expectation is not met then returns an error
* indicator: new column that tells whether an id from the on argument was found in both datasets, left only, or right only

In [55]:
votes = votes.drop(['congress', 'prob'], axis=1)
votes

Unnamed: 0,chamber,rollnumber,icpsr,cast_code
0,House,1,14854,1
1,House,1,14863,1
2,House,1,14873,6
3,House,1,15029,6
4,House,1,15433,6
...,...,...,...,...
174605,Senate,530,42504,1
174606,Senate,530,42505,1
174607,Senate,530,42506,1
174608,Senate,530,49308,6


In [56]:
vote_compare = pd.merge(votes, votes, on=['chamber', 'rollnumber'], how='outer', validate='m:m', indicator='matched')
vote_compare.head()

Unnamed: 0,chamber,rollnumber,icpsr_x,cast_code_x,icpsr_y,cast_code_y,matched
0,House,1,14854,1,14854,1,both
1,House,1,14854,1,14863,1,both
2,House,1,14854,1,14873,6,both
3,House,1,14854,1,15029,6,both
4,House,1,14854,1,15433,6,both


In [57]:
vote_compare['matched'].value_counts()

matched
both          57936814
left_only            0
right_only           0
Name: count, dtype: int64

In [58]:
vote_compare = vote_compare.drop('matched', axis=1)
vote_compare = vote_compare.query('icpsr_x != icpsr_y')
vote_compare.head()

Unnamed: 0,chamber,rollnumber,icpsr_x,cast_code_x,icpsr_y,cast_code_y
1,House,1,14854,1,14863,1
2,House,1,14854,1,14873,6
3,House,1,14854,1,15029,6
4,House,1,14854,1,15433,6
5,House,1,14854,1,15448,6


In [59]:
vote_compare['agree'] = vote_compare['cast_code_x'] == vote_compare['cast_code_y']
vote_compare.head()

Unnamed: 0,chamber,rollnumber,icpsr_x,cast_code_x,icpsr_y,cast_code_y,agree
1,House,1,14854,1,14863,1,True
2,House,1,14854,1,14873,6,False
3,House,1,14854,1,15029,6,False
4,House,1,14854,1,15433,6,False
5,House,1,14854,1,15448,6,False


In [60]:
vote_compare = vote_compare.groupby(['icpsr_x', 'icpsr_y']).agg({'agree': 'mean'}).reset_index()
vote_compare.head()

Unnamed: 0,icpsr_x,icpsr_y,agree
0,14226,14435,0.020755
1,14226,14858,0.067925
2,14226,14871,0.05283
3,14226,14921,0.883019
4,14226,15021,0.1


In [61]:
vote_compare = pd.merge(vote_compare, crosswalk, how='left', left_on='icpsr_x', right_on='icpsr', indicator='matched', validate='m:1')
vote_compare.head()

Unnamed: 0,icpsr_x,icpsr_y,agree,bioname,icpsr,state_abbrev,district_code,bioguide_id,matched
0,14226,14435,0.020755,"GRASSLEY, Charles Ernest",14226,IA,0,G000386,both
1,14226,14858,0.067925,"GRASSLEY, Charles Ernest",14226,IA,0,G000386,both
2,14226,14871,0.05283,"GRASSLEY, Charles Ernest",14226,IA,0,G000386,both
3,14226,14921,0.883019,"GRASSLEY, Charles Ernest",14226,IA,0,G000386,both
4,14226,15021,0.1,"GRASSLEY, Charles Ernest",14226,IA,0,G000386,both


In [62]:
vote_compare = vote_compare[['bioname', 'icpsr_y', 'agree']]
vote_compare = pd.merge(vote_compare, crosswalk, how='left', left_on='icpsr_y', right_on='icpsr', indicator='matched', validate='m:1')
vote_compare.head()

Unnamed: 0,bioname_x,icpsr_y,agree,bioname_y,icpsr,state_abbrev,district_code,bioguide_id,matched
0,"GRASSLEY, Charles Ernest",14435,0.020755,"MARKEY, Edward John",14435,MA,0,M000133,both
1,"GRASSLEY, Charles Ernest",14858,0.067925,"SCHUMER, Charles Ellis (Chuck)",14858,NY,0,S000148,both
2,"GRASSLEY, Charles Ernest",14871,0.05283,"WYDEN, Ronald Lee",14871,OR,0,W000779,both
3,"GRASSLEY, Charles Ernest",14921,0.883019,"McCONNELL, Addison Mitchell (Mitch)",14921,KY,0,M000355,both
4,"GRASSLEY, Charles Ernest",15021,0.1,"DURBIN, Richard Joseph",15021,IL,0,D000563,both


In [64]:
vote_compare = vote_compare[['bioname_x', 'bioname_y', 'agree']]
vote_compare = vote_compare.rename(columns={'bioname_x': 'bioname', 'bioname_y': 'comparison_member'})
vote_compare.head()

Unnamed: 0,bioname,comparison_member,agree
0,"GRASSLEY, Charles Ernest","MARKEY, Edward John",0.020755
1,"GRASSLEY, Charles Ernest","SCHUMER, Charles Ellis (Chuck)",0.067925
2,"GRASSLEY, Charles Ernest","WYDEN, Ronald Lee",0.05283
3,"GRASSLEY, Charles Ernest","McCONNELL, Addison Mitchell (Mitch)",0.883019
4,"GRASSLEY, Charles Ernest","DURBIN, Richard Joseph",0.1


In [65]:
vote_compare[vote_compare['bioname'].str.contains('MCGUIRE')].sort_values('agree', ascending=False)

Unnamed: 0,bioname,comparison_member,agree
175793,"MCGUIRE, John J., III","JOHNSON, Mike",0.973684
175922,"MCGUIRE, John J., III","BEAN, Aaron",0.953737
176002,"MCGUIRE, John J., III","CRANK, Jeff",0.950178
175871,"MCGUIRE, John J., III","CAMMACK, Kat",0.950178
175817,"MCGUIRE, John J., III","CLINE, Benjamin",0.950178
...,...,...,...
175864,"MCGUIRE, John J., III","NORTON, Eleanor Holmes",0.121951
175865,"MCGUIRE, John J., III","PLASKETT, Stacey E.",0.121951
175851,"MCGUIRE, John J., III","SHERRILL, Mikie",0.106762
176056,"MCGUIRE, John J., III","WALKINSHAW, James R.",0.105263
