In [1]:
import numpy as np
import pandas as pd

In [4]:
url = 'https://voteview.com/static/data/out/members/HS119_members.csv'
ideology = pd.read_csv(url)
ideology.head(3).T

cols_to_keep = ['bioname','chamber', 'nominate_dim1', 'party_code']
crosswalk_cols = ['bioname','state_abbrev','district_code','icpsr', 'bioguide_id']

crosswalk = ideology[crosswalk_cols]
ideology = ideology[cols_to_keep]

In [5]:
ideology['party_code'].value_counts()

party_code
200    279
100    264
328      2
Name: count, dtype: int64

In [6]:
replace_map = {200: 'Republican', 
               100: 'Democrat',
               328: 'Independent'}
ideology['party'] = ideology['party_code'].replace(replace_map)
ideology['party'].value_counts()

party
Republican     279
Democrat       264
Independent      2
Name: count, dtype: int64

In [7]:
ideology = ideology.drop(['party_code'], axis=1)

In [8]:
ideology = ideology.rename({'nominate_dim1': 'left_right_ideology'}, axis=1)
ideology

Unnamed: 0,bioname,chamber,left_right_ideology,party
0,"ROGERS, Mike Dennis",House,0.378,Republican
1,"SEWELL, Terri",House,-0.401,Democrat
2,"PALMER, Gary James",House,0.674,Republican
3,"MOORE, Barry",House,0.645,Republican
4,"STRONG, Dale",House,0.606,Republican
...,...,...,...,...
540,"JUSTICE, James Conley, II",Senate,0.555,Republican
541,"BALDWIN, Tammy",Senate,-0.487,Democrat
542,"JOHNSON, Ron",Senate,0.641,Republican
543,"LUMMIS, Cynthia M.",Senate,0.686,Republican


In [9]:
ideology.sort_values('left_right_ideology', ascending=False)

Unnamed: 0,bioname,chamber,left_right_ideology,party
398,"GILL, Brandon",House,0.981,Republican
296,"HARRIGAN, Pat",House,0.981,Republican
443,"TUBERVILLE, Thomas Hawley (Tommy)",Senate,0.936,Republican
493,"SCHMITT, Eric Stephen",Senate,0.918,Republican
129,"MCCORMICK, Rich",House,0.891,Republican
...,...,...,...,...
429,"RANDALL, Emily",House,-0.685,Democrat
485,"WARREN, Elizabeth",Senate,-0.744,Democrat
401,"TURNER, Sylvester",House,-0.746,Democrat
381,"GARCIA, Sylvia",House,-0.781,Democrat


In [17]:
crosswalk

Unnamed: 0,bioname,state_abbrev,district_code,icpsr,bioguide_id
0,"ROGERS, Mike Dennis",AL,3,20301,R000575
1,"SEWELL, Terri",AL,7,21102,S001185
2,"PALMER, Gary James",AL,6,21500,P000609
3,"MOORE, Barry",AL,1,22140,M001212
4,"STRONG, Dale",AL,5,22366,S001220
...,...,...,...,...,...
438,"TIFFANY, Thomas P.",WI,7,21989,T000165
439,"FITZGERALD, Scott",WI,5,22115,F000471
440,"VAN ORDEN, Derrick",WI,3,22370,V000135
441,"WIED, Tony",WI,8,22383,W000829


In [21]:
ideology_crosswalk = pd.merge(ideology, crosswalk, 
                              on = 'bioname',
                              how = 'outer',
                              validate = 'one_to_one',
                              indicator='matched')

In [22]:
ideology_crosswalk['matched'].value_counts()

matched
both          545
left_only       0
right_only      0
Name: count, dtype: int64

In [23]:
ideology_crosswalk = ideology_crosswalk.drop(['matched'], axis=1)

In [13]:
ideology_crosswalk.query("state_abbrev=='VA'")

Unnamed: 0,bioname,chamber,left_right_ideology,party,state_abbrev,district_code,icpsr,bioguide_id
410,"WITTMAN, Robert J.",House,0.448,Republican,VA,1,20756,W000804
411,"CONNOLLY, Gerald E. (Gerry)",House,-0.309,Democrat,VA,11,20952,C001078
412,"GRIFFITH, H. Morgan",House,0.51,Republican,VA,9,21191,G000568
413,"BEYER, Donald Sternoff Jr.",House,-0.395,Democrat,VA,8,21554,B001292
414,"CLINE, Benjamin",House,0.716,Republican,VA,6,21908,C001118
415,"KIGGANS, Jennifer",House,0.26,Republican,VA,2,22335,K000399
416,"MCCLELLAN, Jennifer",House,-0.55,Democrat,VA,4,22374,M001227
417,"MCGUIRE, John J., III",House,0.673,Republican,VA,5,22539,M001239
418,"SUBRAMANYAM, Suhas",House,-0.301,Democrat,VA,10,22554,S001230
419,"VINDMAN, Eugene Simon",House,-0.168,Democrat,VA,7,22558,V000138


## Vote similarity matrix

In [None]:
url = 'https://voteview.com/static/data/out/votes/HS119_votes.csv'
votes = pd.read_csv(url)

Unnamed: 0,congress,chamber,rollnumber,icpsr,cast_code,prob
0,119,House,1,14854,1,99.6
1,119,House,1,14863,1,78.2
2,119,House,1,14873,6,100.0


### pd.merge(data1, data2, on, how, validate, indicator)

Six arguments:

* data1, data2: the two dataframes we want to join/merge

* on: the column(s) whose values the dataframes share. If these columns don't have the same name, use left_on and right_on instead

* how: what to do with rows that do not have a match in the other dataset

    * inner: drop any unmatched row

    * outer (full): keep all rows, if they don't match put missing values in the unmatched part the data

    * left: keep everything from data1, drop unmatched from data2

    * right: keep everything from data2, drop unmatched from data1

    Note: it's a good idea to join outer, at first, to see if there are any problems with matching. 

* validate: "one_to_one", "many_to_one", "one_to_many", or "many_to_many". Sets an expectation for how many rows in data2 one row in data1 will match to. If the expectation is not met, returns an error.

* indicator: a new column that will tell you whether an ID (from the on feature) was found in both datasets, the left only, or right only
    

In [19]:
data1 = pd.DataFrame({'id': [1, 1, 1], 'x': [1, 2, 3]})
data2 = pd.DataFrame({'id': [1, 1, 1], 'y': [4, 5, 6]})
data1

Unnamed: 0,id,x
0,1,1
1,1,2
2,1,3


In [20]:
data2

Unnamed: 0,id,y
0,1,4
1,1,5
2,1,6


In [18]:
pd.merge(data1, data2, on='id')

Unnamed: 0,id,x,y
0,1,1,4
1,1,1,5
2,1,1,6
3,1,2,4
4,1,2,5
5,1,2,6
6,1,3,4
7,1,3,5
8,1,3,6


In [26]:
votes = votes.drop(['congress', 'prob'], axis=1)
votes

Unnamed: 0,chamber,rollnumber,icpsr,cast_code
0,House,1,14854,1
1,House,1,14863,1
2,House,1,14873,6
3,House,1,15029,6
4,House,1,15433,6
...,...,...,...,...
174605,Senate,530,42504,1
174606,Senate,530,42505,1
174607,Senate,530,42506,1
174608,Senate,530,49308,6


In [27]:
vote_compare = pd.merge(votes, votes,
                        on = ['chamber', 'rollnumber'],
                        how = 'outer',
                        indicator='matched',
                        validate = 'many_to_many')

In [31]:
vote_compare = vote_compare.drop(['matched'], axis=1)
vote_compare = vote_compare.query("icpsr_x != icpsr_y")
vote_compare

Unnamed: 0,chamber,rollnumber,icpsr_x,cast_code_x,icpsr_y,cast_code_y
1,House,1,14854,1,14863,1
2,House,1,14854,1,14873,6
3,House,1,14854,1,15029,6
4,House,1,14854,1,15433,6
5,House,1,14854,1,15448,6
...,...,...,...,...,...,...
57936808,Senate,530,49703,1,42503,1
57936809,Senate,530,49703,1,42504,1
57936810,Senate,530,49703,1,42505,1
57936811,Senate,530,49703,1,42506,1


In [32]:
vote_compare['agree'] = vote_compare['cast_code_x'] == vote_compare['cast_code_y']
vote_compare

Unnamed: 0,chamber,rollnumber,icpsr_x,cast_code_x,icpsr_y,cast_code_y,agree
1,House,1,14854,1,14863,1,True
2,House,1,14854,1,14873,6,False
3,House,1,14854,1,15029,6,False
4,House,1,14854,1,15433,6,False
5,House,1,14854,1,15448,6,False
...,...,...,...,...,...,...,...
57936808,Senate,530,49703,1,42503,1,True
57936809,Senate,530,49703,1,42504,1,True
57936810,Senate,530,49703,1,42505,1,True
57936811,Senate,530,49703,1,42506,1,True


In [34]:
vote_compare = vote_compare.groupby(['icpsr_x', 'icpsr_y']).agg({'agree': 'mean'}).reset_index()
vote_compare

Unnamed: 0,icpsr_x,icpsr_y,agree
0,14226,14435,0.020755
1,14226,14858,0.067925
2,14226,14871,0.052830
3,14226,14921,0.883019
4,14226,15021,0.100000
...,...,...,...
206035,91980,29911,0.320285
206036,91980,31101,0.341637
206037,91980,31102,0.704626
206038,91980,39301,0.341637


In [41]:
vote_compare =pd.merge(vote_compare, crosswalk,
         left_on='icpsr_x',
         right_on='icpsr',
         how='outer',
         indicator='matched',
         validate='many_to_one')

In [43]:
vote_compare = vote_compare[['bioname', 'icpsr_y', 'agree']]
vote_compare

Unnamed: 0,bioname,icpsr_y,agree
0,"GRASSLEY, Charles Ernest",14435,0.020755
1,"GRASSLEY, Charles Ernest",14858,0.067925
2,"GRASSLEY, Charles Ernest",14871,0.052830
3,"GRASSLEY, Charles Ernest",14921,0.883019
4,"GRASSLEY, Charles Ernest",15021,0.100000
...,...,...,...
206035,"VAN DREW, Jefferson",29911,0.320285
206036,"VAN DREW, Jefferson",31101,0.341637
206037,"VAN DREW, Jefferson",31102,0.704626
206038,"VAN DREW, Jefferson",39301,0.341637


In [44]:
vote_compare =pd.merge(vote_compare, crosswalk,
         left_on='icpsr_y',
         right_on='icpsr',
         how='outer',
         indicator='matched',
         validate='many_to_one')
vote_compare

Unnamed: 0,bioname_x,icpsr_y,agree,bioname_y,state_abbrev,district_code,icpsr,bioguide_id,matched
0,"MARKEY, Edward John",14226,0.020755,"GRASSLEY, Charles Ernest",IA,0,14226,G000386,both
1,"SCHUMER, Charles Ellis (Chuck)",14226,0.067925,"GRASSLEY, Charles Ernest",IA,0,14226,G000386,both
2,"WYDEN, Ronald Lee",14226,0.052830,"GRASSLEY, Charles Ernest",IA,0,14226,G000386,both
3,"McCONNELL, Addison Mitchell (Mitch)",14226,0.883019,"GRASSLEY, Charles Ernest",IA,0,14226,G000386,both
4,"DURBIN, Richard Joseph",14226,0.100000,"GRASSLEY, Charles Ernest",IA,0,14226,G000386,both
...,...,...,...,...,...,...,...,...,...
206035,"SCHAKOWSKY, Janice D.",91980,0.320285,"VAN DREW, Jefferson",NJ,2,91980,V000133,both
206036,"DelBENE, Suzan K.",91980,0.341637,"VAN DREW, Jefferson",NJ,2,91980,V000133,both
206037,"MASSIE, Thomas",91980,0.704626,"VAN DREW, Jefferson",NJ,2,91980,V000133,both
206038,"CLYBURN, James Enos",91980,0.341637,"VAN DREW, Jefferson",NJ,2,91980,V000133,both


In [46]:
vote_compare = vote_compare[['bioname_x', 'bioname_y', 'agree']]
vote_compare = vote_compare.rename({'bioname_x': 'bioname',
                                   'bioname_y': 'comparison_member'}, axis=1)
vote_compare

Unnamed: 0,bioname,comparison_member,agree
0,"MARKEY, Edward John","GRASSLEY, Charles Ernest",0.020755
1,"SCHUMER, Charles Ellis (Chuck)","GRASSLEY, Charles Ernest",0.067925
2,"WYDEN, Ronald Lee","GRASSLEY, Charles Ernest",0.052830
3,"McCONNELL, Addison Mitchell (Mitch)","GRASSLEY, Charles Ernest",0.883019
4,"DURBIN, Richard Joseph","GRASSLEY, Charles Ernest",0.100000
...,...,...,...
206035,"SCHAKOWSKY, Janice D.","VAN DREW, Jefferson",0.320285
206036,"DelBENE, Suzan K.","VAN DREW, Jefferson",0.341637
206037,"MASSIE, Thomas","VAN DREW, Jefferson",0.704626
206038,"CLYBURN, James Enos","VAN DREW, Jefferson",0.341637


In [49]:
vote_compare[vote_compare['bioname'].str.contains('MCGUIRE')].sort_values('agree', ascending=False)


Unnamed: 0,bioname,comparison_member,agree
69910,"MCGUIRE, John J., III","JOHNSON, Mike",0.973684
127414,"MCGUIRE, John J., III","BEAN, Aaron",0.953737
80821,"MCGUIRE, John J., III","CLINE, Benjamin",0.950178
104872,"MCGUIRE, John J., III","CAMMACK, Kat",0.950178
162771,"MCGUIRE, John J., III","CRANK, Jeff",0.950178
...,...,...,...
102224,"MCGUIRE, John J., III","PLASKETT, Stacey E.",0.121951
101785,"MCGUIRE, John J., III","NORTON, Eleanor Holmes",0.121951
95949,"MCGUIRE, John J., III","SHERRILL, Mikie",0.106762
187057,"MCGUIRE, John J., III","WALKINSHAW, James R.",0.105263
