# US Representative Voting Patterns and Funding Sources

*Exploratory analysis of how funding influences voting habits in Congress*

Part 2 of Data Wrangling: Congressional Member Voting Data (*using the 113th session of congress as a case study*)

In [1]:
# modules to import for data clean up

import numpy as np
import pandas as pd

### Dataset Import and Cleaning

*Removing unnecessary columns for analysis, making formatting changes, and addressing nulls*

Datasets:
1. Voteview Member Ideology
2. Voteview Congressional Votes
3. Voteview Member Votes
4. DIME PLUS Congressional Bills

#### Voteview Member Ideology Dataset

In [2]:
# dataset of all members of the 113th congressional session
# This data includes basic biographical information (state, district, party, name) and ideological scores for members of the selected congresses.

h113m = pd.read_json('./datasets/HS113_members.json')
h113m.head(2)

Unnamed: 0,congress,chamber,icpsr,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname,...,died,nominate_dim1,nominate_dim2,nominate_log_likelihood,nominate_geo_mean_probability,nominate_number_of_votes,nominate_number_of_errors,conditional,nokken_poole_dim1,nokken_poole_dim2
0,113,President,99911,99,0,USA,100,0,0,"OBAMA, Barack",...,,-0.358,-0.197,-41.04464,0.882,327,11,,,
1,113,House,20300,41,1,AL,200,1,1,"BONNER, Jr., Josiah Robins (Jo)",...,,0.367,0.513,-41.88718,0.888,354,17,,0.331,0.625


In [3]:
# drop unnecessary columns and rename for readability
h113m.drop(columns=['state_icpsr', 'occupancy', 'last_means', 'born', 'nominate_log_likelihood', 'nominate_number_of_votes', 'nominate_number_of_errors', 'conditional', 'nokken_poole_dim1', 'nokken_poole_dim2', 'congress', 'died', 'nominate_geo_mean_probability'], inplace=True)

h113m.columns = ['chamber', 'icpsr', 'district', 'state', 'party', 'name', 'congress_id', 'NOMINATE_dim1', 'NOMINATE_dim2']

In [4]:
# filter by house of representatives only
h113m.drop(h113m[h113m['chamber'] != 'House'].index, inplace=True)

In [5]:
# confirm accuracy of list for no dups
h113m.duplicated(subset='name').value_counts()

False    444
dtype: int64

In [6]:
# need to view all member names
pd.set_option('display.max_row', None)
h113m['name'].sort_values()

302                                   ADAMS, Alma
8                                ADERHOLT, Robert
183                             ALEXANDER, Rodney
214                                 AMASH, Justin
245                               AMODEI, Mark E.
256                        ANDREWS, Robert Ernest
223                             BACHMANN, Michele
7                         BACHUS, Spencer T., III
15                                    BARBER, Ron
343                                 BARLETTA, Lou
174                    BARR, Garland H. (Andy) IV
123                                  BARROW, John
370                             BARTON, Joe Linus
44                                    BASS, Karen
319                                 BEATTY, Joyce
64                                BECERRA, Xavier
212                                 BENISHEK, Dan
215                             BENTIVOLIO, Kerry
48                                      BERA, Ami
97                                 BILIRAKIS, Gus


In [7]:
# will only be focusing on members elected at the time of the general election, so removing members appointed by special election during the session (should have 433 members after this, considering the two vacancies from Jesse Jackson and Tim Scott at the start of session)
semembers = ['KELLY, Robin L.', 'SANFORD, Mark', 'SMITH, Jason', 'CLARK, Katherine M.', 'BYRNE, Bradley', 'McALLISTER, Vance M.', 'JOLLY, David W.', 'ADAMS, Alma', 'CLAWSON, Curt', 'BRAT, David A.', 'NORCROSS, Donald']

h113m = h113m.query("name not in @semembers")

In [8]:
h113m.shape

(433, 9)

In [9]:
h113m.to_csv('./datasets/c113m.csv', index=False, sep=',', encoding='UTF-8')

#### Voteview Congressional Votes Dataset

In [138]:
# dataset of all bills voted on in 113th session
# This data includes the result and ideological parameters of every vote taken in the selected congresses and chambers. 
h113r = pd.read_json('./datasets/HS113_rollcalls.json')
h113r.head(2)

Unnamed: 0,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,nominate_log_likelihood,bill_number,vote_result,vote_desc,vote_question,dtl_desc,issue_codes,peltzman_codes,clausen_codes,crs_policy_area,crs_subjects,congress_url,source_documents
0,113,House,1,2013-01-03,1,2,220,192,0.061,0.358,-0.747,0.288,-0.303,,Boehner,,Election of the Speaker,,[Election of the Speaker of the House],[Internal Organization],[Miscellaneous Policy],,,,
1,113,House,2,2013-01-03,1,3,224,187,0.056,0.197,-0.732,0.309,-0.292,HRES5,Passed,Adopting rules for the One Hundred Thirteenth ...,On Motion to Table the Motion to Refer,,,[Internal Organization],[Miscellaneous Policy],Congress,"[Administrative law and regulatory procedures,...",https://www.congress.gov/bill/113th-congress/h...,


In [139]:
h113r.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1859 entries, 0 to 1858
Data columns (total 25 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   congress                 1859 non-null   int64         
 1   chamber                  1859 non-null   object        
 2   rollnumber               1859 non-null   int64         
 3   date                     1859 non-null   datetime64[ns]
 4   session                  1859 non-null   int64         
 5   clerk_rollnumber         1859 non-null   int64         
 6   yea_count                1859 non-null   int64         
 7   nay_count                1859 non-null   int64         
 8   nominate_mid_1           1859 non-null   float64       
 9   nominate_mid_2           1859 non-null   float64       
 10  nominate_spread_1        1859 non-null   float64       
 11  nominate_spread_2        1859 non-null   float64       
 12  nominate_log_likelihood  1859 non-

In [140]:
# # currently not usable column in this state - wish list task would be to parse through this in detail and pull a table of detailed subjects by bill, but most likely not feasible given time constraint
# h113r['crs_subjects'].value_counts()

In [141]:
# drop unnecessary columns
h113r.drop(columns=['source_documents', 'dtl_desc', 'nominate_log_likelihood', 'congress', 'clerk_rollnumber'], inplace=True)

In [142]:
# filter by house roll calls only
h113r.drop(h113r[h113r['chamber'] != 'House'].index, inplace=True)

In [143]:
for list in h113r['issue_codes']:
    list = str(list)

for list in h113r['peltzman_codes']:
    list = str(list)

for list in h113r['clausen_codes']:
    list = str(list)

In [144]:
pd.set_option('display.max_column', None)
h113r.head(20)

Unnamed: 0,chamber,rollnumber,date,session,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,bill_number,vote_result,vote_desc,vote_question,issue_codes,peltzman_codes,clausen_codes,crs_policy_area,crs_subjects,congress_url
0,House,1,2013-01-03,1,220,192,0.061,0.358,-0.747,0.288,,Boehner,,Election of the Speaker,[Election of the Speaker of the House],[Internal Organization],[Miscellaneous Policy],,,
1,House,2,2013-01-03,1,224,187,0.056,0.197,-0.732,0.309,HRES5,Passed,Adopting rules for the One Hundred Thirteenth ...,On Motion to Table the Motion to Refer,,[Internal Organization],[Miscellaneous Policy],Congress,"[Administrative law and regulatory procedures,...",https://www.congress.gov/bill/113th-congress/h...
2,House,3,2013-01-03,1,227,191,0.155,0.562,-0.724,0.438,HRES5,Passed,Adopting rules for the One Hundred Thirteenth ...,On Ordering the Previous Question,,[Internal Organization],[Miscellaneous Policy],Congress,"[Administrative law and regulatory procedures,...",https://www.congress.gov/bill/113th-congress/h...
3,House,4,2013-01-03,1,194,229,0.135,0.976,0.771,-0.249,HRES5,Failed,Adopting rules for the One Hundred Thirteenth ...,On Motion to Commit,,[Internal Organization],[Miscellaneous Policy],Congress,"[Administrative law and regulatory procedures,...",https://www.congress.gov/bill/113th-congress/h...
4,House,5,2013-01-03,1,228,196,0.168,0.695,-0.721,0.461,HRES5,Passed,Adopting rules for the One Hundred Thirteenth ...,On Agreeing to the Resolution,,[Internal Organization],[Miscellaneous Policy],Congress,"[Administrative law and regulatory procedures,...",https://www.congress.gov/bill/113th-congress/h...
5,House,6,2013-01-04,1,354,67,0.586,-0.254,1.333,-0.592,HR41,Passed,To temporarily increase the borrowing authorit...,On Motion to Suspend the Rules and Pass,[Public Safety],[Budget Special Interest],[Government Management],Emergency Management,"[Budget process, Department of Homeland Securi...",https://www.congress.gov/bill/113th-congress/h...
6,House,7,2013-01-14,1,403,0,0.0,0.0,0.0,0.0,HR219,Passed,To improve and streamline disaster assistance ...,On Motion to Suspend the Rules and Pass,,[Budget Special Interest],[Government Management],Emergency Management,"[Alternative dispute resolution, mediation, ar...",https://www.congress.gov/bill/113th-congress/h...
7,House,8,2013-01-14,1,300,95,0.776,-0.423,0.067,-0.114,,Passed,,On Approving the Journal,,[Internal Organization],[Miscellaneous Policy],,,
8,House,9,2013-01-14,1,4,397,0.0,0.0,0.0,0.0,,Failed,,On Motion to Adjourn,,[Internal Organization],[Miscellaneous Policy],,,
9,House,10,2013-01-15,1,293,127,-0.277,0.186,-0.21,0.13,HRES23,Passed,Providing for consideration of the bill (H.R. ...,On Ordering the Previous Question,,[Budget Special Interest],[Government Management],Congress,"[House of Representatives, Legislative rules a...",https://www.congress.gov/bill/113th-congress/h...


In [18]:
h113r['bill_number'] = h113r['bill_number'].str.lower()

In [19]:
h113r.to_csv('./datasets/c113r.csv', sep=',', index=False, encoding='UTF-8')

#### Voteview Member Votes Dataset

In [20]:
# dataset of votes by each member
h113v = pd.read_json('./datasets/HS113_votes.json')
h113v.head(1)

Unnamed: 0,congress,icpsr,cast_code,chamber,rollnumber,prob
0,113,2605,7,House,1,100.0


In [21]:
h113v.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586665 entries, 0 to 586664
Data columns (total 6 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   congress    586665 non-null  int64 
 1   icpsr       586665 non-null  int64 
 2   cast_code   586665 non-null  int64 
 3   chamber     586665 non-null  object
 4   rollnumber  586665 non-null  int64 
 5   prob        497716 non-null  object
dtypes: int64(4), object(2)
memory usage: 26.9+ MB


In [22]:
#drop unnecessary columns
h113v.drop(columns=['congress'], inplace=True)

In [23]:
h113v.drop(h113v[h113v['chamber'] != 'House'].index, inplace=True)

In [24]:
# create list of icpsr ids from cleaned member table to query out member votes:

icpsr_list = h113m['icpsr'].values.tolist()
#len(icpsr_list) --> to confirm we pulled the correct number of members - 433

h113v = h113v.query("icpsr in @icpsr_list")

In [25]:
h113v.shape

(513433, 5)

In [26]:
h113v.to_csv('./datasets/c113v.csv', sep=',', index=False, encoding='UTF-8')

#### DIME PLUS Congressional Bills

In [25]:
# additional dataset from Stanford (DIME PLUS) giving ideological weight to each bill put forth from 2003-2014 
bills_dime = pd.read_csv('./datasets/bills_db.csv')
bills_dime.head(2)

Unnamed: 0,bill_id,year,date,bill_str,bill_desc,congno,sponsors,cosponsors,tw_latent1,tw_abortion_and_social_conservatism,...,tw_healthcare,tw_higher_education,tw_immigration,tw_indian_affairs,tw_intelligence_and_surveillance,tw_labor,tw_law_courts_and_judges,tw_transportation,tw_veterans_affairs,tw_womens_issues
0,113_hr83,2014,12/13/2014,H.R. 83|On the Motion to Proceed H.R. 83|To re...,To require the Secretary of the Interior to as...,113,,,0.471785,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,113_pn1070,2014,12/13/2014,PN1070|On the Motion to Proceed PN1070|Christo...,"Christopher Smith, of Texas, to be an Assistan...",113,,,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
#slicing first four characters from bill id to remove congress session and underscore
bills_dime['bill_id'] = bills_dime['bill_id'].str[4:]

In [28]:
# filtering dataset by 113th congressional session
bills_dime= bills_dime.loc[bills_dime['congno'] == 113]

In [30]:
bills_dime = bills_dime[bills_dime['bill_id'].str.contains("hr") == True]

In [31]:
# confirming all bill_ids are in format that links to key in roll call table
pd.set_option('display.max_rows', None)
bills_dime['bill_id'].value_counts()

hr83       1
hres373    1
hres347    1
hr761      1
hr301      1
hres352    1
hres351    1
hr3102     1
hr1526     1
hr687      1
hr2600     1
hr3096     1
hres366    1
hres361    1
hr2251     1
hr3210     1
hr2848     1
hres367    1
hres368    1
hres370    1
hr3230     1
hr2449     1
hr2775     1
hres339    1
hr2218     1
hres300    1
hr2668     1
hr2667     1
hres303    1
hr5        1
hres312    1
hr2397     1
hres315    1
hr2610     1
hr1155     1
hr850      1
hres322    1
hr2879     1
hr1897     1
hr1582     1
hr367      1
hr2009     1
hr2844     1
hres371    1
hr3273     1
hr2642     1
hres380    1
hr1204     1
hres429    1
hr1105     1
hr3309     1
hr1402     1
hr1992     1
hres438    1
hr3304     1
hr3628     1
hr3527     1
hres455    1
hr2279     1
hr3811     1
hr2860     1
hr2274     1
hr1233     1
hres458    1
hr3547     1
hr3362     1
hr3588     1
hr1900     1
hr1965     1
hr3204     1
hres378    1
hr3205     1
hres385    1
hr3080     1
hr2011     1
hres391    1
hr2374     1

In [32]:
bills_dime.columns

Index(['bill_id', 'year', 'date', 'bill_str', 'bill_desc', 'congno',
       'sponsors', 'cosponsors', 'tw_latent1',
       'tw_abortion_and_social_conservatism', 'tw_agriculture',
       'tw_banking_and_finance', 'tw_civil_rights',
       'tw_congress_and_procedural', 'tw_crime',
       'tw_defense_and_foreign_policy', 'tw_economy', 'tw_education',
       'tw_energy', 'tw_environment', 'tw_fair_elections',
       'tw_federal_agencies_and_gov_regulation', 'tw_guns', 'tw_healthcare',
       'tw_higher_education', 'tw_immigration', 'tw_indian_affairs',
       'tw_intelligence_and_surveillance', 'tw_labor',
       'tw_law_courts_and_judges', 'tw_transportation', 'tw_veterans_affairs',
       'tw_womens_issues'],
      dtype='object')

In [33]:
bills_dime.drop(columns=['year', 'date', 'bill_str', 'congno'], inplace=True)

In [34]:
bills_dime.head(2)

Unnamed: 0,bill_id,bill_desc,sponsors,cosponsors,tw_latent1,tw_abortion_and_social_conservatism,tw_agriculture,tw_banking_and_finance,tw_civil_rights,tw_congress_and_procedural,...,tw_healthcare,tw_higher_education,tw_immigration,tw_indian_affairs,tw_intelligence_and_surveillance,tw_labor,tw_law_courts_and_judges,tw_transportation,tw_veterans_affairs,tw_womens_issues
0,hr83,To require the Secretary of the Interior to as...,,,0.471785,0.0,0.0,0.0,0.072641,0.110686,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15,hr5806,Supporting America’s Charities Act,,,0.131329,0.061381,0.013441,0.022888,0.05531,0.100501,...,0.002327,0.001538,0.009363,0.03894,0.002084,0.010166,0.015219,7.6e-05,6.3e-05,0.004749


In [35]:
bills_dime.to_csv('./datasets/bills_dime.csv', sep=',', index=False, encoding='UTF-8')