In [2]:
import numpy as np
import pandas as pd

In [3]:
url = 'https://voteview.com/static/data/out/members/HS119_members.csv'
ideology = pd.read_csv(url)
ideology.head(3)

cols_to_keep = ['bioname', 'chamber', 'nominate_dim1', 'party_code']
crosswalk_cols = ['bioname', 'state_abbrev', 'district_code', 'icpsr', 'bioguide_id']

crosswalk = ideology[crosswalk_cols]
ideology = ideology[cols_to_keep]

In [4]:
ideology['party_code'].value_counts()

party_code
200    279
100    264
328      2
Name: count, dtype: int64

In [5]:
replace_map = {200: 'Republican', 
               100: 'Democrat',
               328: 'Independent'}

ideology['party'] = ideology['party_code'].replace(replace_map)
ideology['party'].value_counts()

party
Republican     279
Democrat       264
Independent      2
Name: count, dtype: int64

In [6]:
ideology = ideology.drop(['party_code'], axis=1) # drop a column instead of row
ideology = ideology.rename({'nominate_dim1': 'left_right_ideology'}, axis=1)
ideology

Unnamed: 0,bioname,chamber,left_right_ideology,party
0,"ROGERS, Mike Dennis",House,0.378,Republican
1,"SEWELL, Terri",House,-0.401,Democrat
2,"PALMER, Gary James",House,0.674,Republican
3,"MOORE, Barry",House,0.645,Republican
4,"STRONG, Dale",House,0.606,Republican
...,...,...,...,...
540,"JUSTICE, James Conley, II",Senate,0.555,Republican
541,"BALDWIN, Tammy",Senate,-0.487,Democrat
542,"JOHNSON, Ron",Senate,0.641,Republican
543,"LUMMIS, Cynthia M.",Senate,0.686,Republican


In [7]:
ideology = ideology.sort_values('left_right_ideology', ascending=False)
ideology

Unnamed: 0,bioname,chamber,left_right_ideology,party
398,"GILL, Brandon",House,0.981,Republican
296,"HARRIGAN, Pat",House,0.981,Republican
443,"TUBERVILLE, Thomas Hawley (Tommy)",Senate,0.936,Republican
493,"SCHMITT, Eric Stephen",Senate,0.918,Republican
129,"MCCORMICK, Rich",House,0.891,Republican
...,...,...,...,...
429,"RANDALL, Emily",House,-0.685,Democrat
485,"WARREN, Elizabeth",Senate,-0.744,Democrat
401,"TURNER, Sylvester",House,-0.746,Democrat
381,"GARCIA, Sylvia",House,-0.781,Democrat


### pd.merge(data1, data2, on, how, validate, indicator)

Six arguments:

* data1, data2: the two dataframes we want to join/merge

* on: the column(s) whose values the dataframs share. If these columns don't have the same name, use left_on and right_on instead

* how: what to do with rows that do not have a match in the other dataset

    * inner: drop any unmatched row

    * outer (full): keep all rows, if they don't match put missing values in the unmatched part of the data

    * left: keep everything from data1, drop unmatched from data2

    * right: keep everything from data2, drop unmatched from data1

    Note: It's a good idea to join outer, at first, to see if there are any problems with matching.

* validate: "one_to_one", "many_to_one", "one_to_many", or "many_to_many". Sets an expectation for how many rows in data2 one row in data1 will match to. If the expectation is not met, returns an error.

* indicator: a new column that will tell you whether an ID (from the on feature) was found in both datasets, the left only, or right only

In [14]:
ideology_crosswalk = pd.merge(ideology, crosswalk, 
                              on='bioname', 
                              how='outer', 
                              validate='one_to_one', 
                              indicator="matched")

ideology_crosswalk['matched'].value_counts()
#both          545
#left_only       0
#right_only      0

ideology_crosswalk = ideology_crosswalk.drop(['matched'], axis=1)

In [10]:
ideology_crosswalk.query("state_abbrev=='VA' & district_code==5")

Unnamed: 0,bioname,chamber,left_right_ideology,party,state_abbrev,district_code,icpsr,bioguide_id
324,"MCGUIRE, John J., III",House,0.673,Republican,VA,5,22539,M001239


## Vote Similarity Matrix

In [29]:
url = 'https://voteview.com/static/data/out/votes/HS119_votes.csv'
votes = pd.read_csv(url)
votes.head(3).T

Unnamed: 0,0,1,2
congress,119,119,119
chamber,House,House,House
rollnumber,1,1,1
icpsr,14854,14863,14873
cast_code,1,1,6
prob,99.6,78.2,100.0


In [30]:
vote_compare = pd.merge(votes, votes,
                        on = ['chamber', 'rollnumber'],
                        how = 'outer',
                        indicator = 'matched',
                        validate = 'many_to_many')

In [31]:
vote_compare = vote_compare.drop(['matched'], axis=1)
vote_compare = vote_compare.query("icpsr_x != icpsr_y")
vote_compare

Unnamed: 0,congress_x,chamber,rollnumber,icpsr_x,cast_code_x,prob_x,congress_y,icpsr_y,cast_code_y,prob_y
1,119,House,1,14854,1,99.6,119,14863,1,78.2
2,119,House,1,14854,1,99.6,119,14873,6,100.0
3,119,House,1,14854,1,99.6,119,15029,6,100.0
4,119,House,1,14854,1,99.6,119,15433,6,100.0
5,119,House,1,14854,1,99.6,119,15448,6,100.0
...,...,...,...,...,...,...,...,...,...,...
57936808,119,Senate,530,49703,1,76.1,119,42503,1,100.0
57936809,119,Senate,530,49703,1,76.1,119,42504,1,100.0
57936810,119,Senate,530,49703,1,76.1,119,42505,1,100.0
57936811,119,Senate,530,49703,1,76.1,119,42506,1,99.8


In [32]:
vote_compare['agree'] = vote_compare['cast_code_x'] == vote_compare['cast_code_y']
vote_compare = vote_compare.groupby(['icpsr_x', 'icpsr_y']).agg({'agree': 'mean'}).reset_index()
vote_compare

Unnamed: 0,icpsr_x,icpsr_y,agree
0,14226,14435,0.020755
1,14226,14858,0.067925
2,14226,14871,0.052830
3,14226,14921,0.883019
4,14226,15021,0.100000
...,...,...,...
206035,91980,29911,0.320285
206036,91980,31101,0.341637
206037,91980,31102,0.704626
206038,91980,39301,0.341637


In [None]:
vote_compare = pd.merge(vote_compare, crosswalk, 
                        left_on='icpsr_x', 
                        right_on='icpsr', 
                        how='outer',
                        indicator='matched',
                        validate='many_to_one')

Unnamed: 0,icpsr_x,icpsr_y,agree,bioname,state_abbrev,district_code,icpsr,bioguide_id,matched
0,14226,14435,0.020755,"GRASSLEY, Charles Ernest",IA,0,14226,G000386,both
1,14226,14858,0.067925,"GRASSLEY, Charles Ernest",IA,0,14226,G000386,both
2,14226,14871,0.052830,"GRASSLEY, Charles Ernest",IA,0,14226,G000386,both
3,14226,14921,0.883019,"GRASSLEY, Charles Ernest",IA,0,14226,G000386,both
4,14226,15021,0.100000,"GRASSLEY, Charles Ernest",IA,0,14226,G000386,both
...,...,...,...,...,...,...,...,...,...
206035,91980,29911,0.320285,"VAN DREW, Jefferson",NJ,2,91980,V000133,both
206036,91980,31101,0.341637,"VAN DREW, Jefferson",NJ,2,91980,V000133,both
206037,91980,31102,0.704626,"VAN DREW, Jefferson",NJ,2,91980,V000133,both
206038,91980,39301,0.341637,"VAN DREW, Jefferson",NJ,2,91980,V000133,both


In [34]:
vote_compare = vote_compare[['bioname', 'icpsr_y', 'agree']]
vote_compare

Unnamed: 0,bioname,icpsr_y,agree
0,"GRASSLEY, Charles Ernest",14435,0.020755
1,"GRASSLEY, Charles Ernest",14858,0.067925
2,"GRASSLEY, Charles Ernest",14871,0.052830
3,"GRASSLEY, Charles Ernest",14921,0.883019
4,"GRASSLEY, Charles Ernest",15021,0.100000
...,...,...,...
206035,"VAN DREW, Jefferson",29911,0.320285
206036,"VAN DREW, Jefferson",31101,0.341637
206037,"VAN DREW, Jefferson",31102,0.704626
206038,"VAN DREW, Jefferson",39301,0.341637


In [None]:
vote_compare = pd.merge(vote_compare, crosswalk, 
                        left_on='icpsr_y', 
                        right_on='icpsr', 
                        how='outer',
                        indicator='matched',
                        validate='many_to_one')

In [37]:
vote_compare = vote_compare[['bioname_x', 'bioname_y', 'agree']]
vote_compare = vote_compare.rename({'bioname_x': 'bioname', 
                                    'bioname_y': 'comparison_member'}, axis=1)

In [38]:
vote_compare[vote_compare['bioname'].str.contains('MCGUIRE')].sort_values('agree', ascending=False)

Unnamed: 0,bioname,comparison_member,agree
69910,"MCGUIRE, John J., III","JOHNSON, Mike",0.973684
127414,"MCGUIRE, John J., III","BEAN, Aaron",0.953737
80821,"MCGUIRE, John J., III","CLINE, Benjamin",0.950178
104872,"MCGUIRE, John J., III","CAMMACK, Kat",0.950178
162771,"MCGUIRE, John J., III","CRANK, Jeff",0.950178
...,...,...,...
102224,"MCGUIRE, John J., III","PLASKETT, Stacey E.",0.121951
101785,"MCGUIRE, John J., III","NORTON, Eleanor Holmes",0.121951
95949,"MCGUIRE, John J., III","SHERRILL, Mikie",0.106762
187057,"MCGUIRE, John J., III","WALKINSHAW, James R.",0.105263
