# US Representative Voting Patterns and Funding Sources

*Exploratory analysis of how funding influences voting habits in Congress*

Part 2 of Exploratory Analysis: Congressional Member Voting Data (*using the 113th session of congress as a case study*)

In [1]:
# modules to import for exploratory analysis

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ydata_profiling as yd

### Dataset Import and Cleaning

*Removing unnecessary columns to analysis & understanding and addressing nulls*

Datasets:
1. Voteview Member Ideology
2. Voteview Congressional Votes
3. Voteview Member Votes
4. DIME PLUS Congressional Bills
5. DIME PLUS Congressional Text

#### Voteview Member Ideology Dataset

In [2]:
# dataset of all members of the 113th congressional session
# This data includes basic biographical information (state, district, party, name) and ideological scores for members of the selected congresses.

h113m = pd.read_json('./datasets/HS113_members.json')
h113m.head(5)

Unnamed: 0,congress,chamber,icpsr,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname,...,died,nominate_dim1,nominate_dim2,nominate_log_likelihood,nominate_geo_mean_probability,nominate_number_of_votes,nominate_number_of_errors,conditional,nokken_poole_dim1,nokken_poole_dim2
0,113,President,99911,99,0,USA,100,0,0,"OBAMA, Barack",...,,-0.358,-0.197,-41.04464,0.882,327,11,,,
1,113,House,20300,41,1,AL,200,1,1,"BONNER, Jr., Josiah Robins (Jo)",...,,0.367,0.513,-41.88718,0.888,354,17,,0.331,0.625
2,113,House,20301,41,3,AL,200,0,1,"ROGERS, Mike Dennis",...,,0.363,0.455,-134.13992,0.8747,1002,57,,0.451,0.659
3,113,House,21102,41,7,AL,100,0,1,"SEWELL, Terri",...,,-0.396,0.398,-160.91163,0.85191,1004,80,,-0.406,0.478
4,113,House,21192,41,2,AL,200,0,1,"ROBY, Martha",...,,0.362,0.658,-87.38206,0.91759,1016,33,,0.316,0.734


In [3]:
h113m.drop(columns=['state_icpsr', 'occupancy', 'last_means', 'born', 'nominate_log_likelihood', 'nominate_number_of_votes', 'nominate_number_of_errors', 'conditional', 'nokken_poole_dim1', 'nokken_poole_dim2', 'congress', 'died', 'nominate_geo_mean_probability'], inplace=True)

In [4]:
h113m.shape

(550, 9)

In [5]:
h113m.head(2)

Unnamed: 0,chamber,icpsr,district_code,state_abbrev,party_code,bioname,bioguide_id,nominate_dim1,nominate_dim2
0,President,99911,0,USA,100,"OBAMA, Barack",O000167,-0.358,-0.197
1,House,20300,1,AL,200,"BONNER, Jr., Josiah Robins (Jo)",B001244,0.367,0.513


In [6]:
h113m.columns = ['chamber', 'icpsr_id', 'district', 'state', 'party', 'name', 'congress_id', 'NOMINATE_dim1', 'NOMINATE_dim2']

In [7]:
h113m.head(1)

Unnamed: 0,chamber,icpsr_id,district,state,party,name,congress_id,NOMINATE_dim1,NOMINATE_dim2
0,President,99911,0,USA,100,"OBAMA, Barack",O000167,-0.358,-0.197


In [8]:
h113m.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 550 entries, 0 to 549
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   chamber        550 non-null    object 
 1   icpsr_id       550 non-null    int64  
 2   district       550 non-null    int64  
 3   state          550 non-null    object 
 4   party          550 non-null    int64  
 5   name           550 non-null    object 
 6   congress_id    550 non-null    object 
 7   NOMINATE_dim1  550 non-null    float64
 8   NOMINATE_dim2  550 non-null    float64
dtypes: float64(2), int64(3), object(4)
memory usage: 38.8+ KB


In [9]:
h113m.to_csv('c113m.csv', index=False, sep=',', encoding='UTF-8')

#### Voteview Congressional Votes Dataset

In [11]:
# dataset of all bills voted on in 113th session
# This data includes the result and ideological parameters of every vote taken in the selected congresses and chambers. 
h113r = pd.read_json('./datasets/HS113_rollcalls.json')
h113r.head(2)

Unnamed: 0,congress,chamber,rollnumber,date,session,clerk_rollnumber,yea_count,nay_count,nominate_mid_1,nominate_mid_2,...,vote_desc,vote_question,dtl_desc,issue_codes,peltzman_codes,clausen_codes,crs_policy_area,crs_subjects,congress_url,source_documents
0,113,House,1,2013-01-03,1,2,220,192,0.061,0.358,...,,Election of the Speaker,,[Election of the Speaker of the House],[Internal Organization],[Miscellaneous Policy],,,,
1,113,House,2,2013-01-03,1,3,224,187,0.056,0.197,...,Adopting rules for the One Hundred Thirteenth ...,On Motion to Table the Motion to Refer,,,[Internal Organization],[Miscellaneous Policy],Congress,"[Administrative law and regulatory procedures,...",https://www.congress.gov/bill/113th-congress/h...,


In [12]:
h113r.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1859 entries, 0 to 1858
Data columns (total 25 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   congress                 1859 non-null   int64         
 1   chamber                  1859 non-null   object        
 2   rollnumber               1859 non-null   int64         
 3   date                     1859 non-null   datetime64[ns]
 4   session                  1859 non-null   int64         
 5   clerk_rollnumber         1859 non-null   int64         
 6   yea_count                1859 non-null   int64         
 7   nay_count                1859 non-null   int64         
 8   nominate_mid_1           1859 non-null   float64       
 9   nominate_mid_2           1859 non-null   float64       
 10  nominate_spread_1        1859 non-null   float64       
 11  nominate_spread_2        1859 non-null   float64       
 12  nominate_log_likelihood  1859 non-

In [13]:
h113r.drop(columns=['source_documents', 'dtl_desc', 'nominate_log_likelihood', 'congress', 'clerk_rollnumber'], inplace=True)

In [14]:
h113r.head(2)

Unnamed: 0,chamber,rollnumber,date,session,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,bill_number,vote_result,vote_desc,vote_question,issue_codes,peltzman_codes,clausen_codes,crs_policy_area,crs_subjects,congress_url
0,House,1,2013-01-03,1,220,192,0.061,0.358,-0.747,0.288,,Boehner,,Election of the Speaker,[Election of the Speaker of the House],[Internal Organization],[Miscellaneous Policy],,,
1,House,2,2013-01-03,1,224,187,0.056,0.197,-0.732,0.309,HRES5,Passed,Adopting rules for the One Hundred Thirteenth ...,On Motion to Table the Motion to Refer,,[Internal Organization],[Miscellaneous Policy],Congress,"[Administrative law and regulatory procedures,...",https://www.congress.gov/bill/113th-congress/h...


In [15]:
h113r.to_csv('./datasets/c113r.csv', sep=',', index=False, encoding='UTF-8')

#### Voteview Member Votes Dataset

In [17]:
# dataset of votes by each member
h113v = pd.read_json('./datasets/HS113_votes.json')
h113v.head(1)

Unnamed: 0,congress,icpsr,cast_code,chamber,rollnumber,prob
0,113,2605,7,House,1,100.0


In [18]:
h113v.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586665 entries, 0 to 586664
Data columns (total 6 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   congress    586665 non-null  int64 
 1   icpsr       586665 non-null  int64 
 2   cast_code   586665 non-null  int64 
 3   chamber     586665 non-null  object
 4   rollnumber  586665 non-null  int64 
 5   prob        497716 non-null  object
dtypes: int64(4), object(2)
memory usage: 26.9+ MB


In [19]:
#drop unnecessary columns
h113v.drop(columns=['congress'], inplace=True)

In [20]:
h113v.shape

(586665, 5)

In [21]:
h113v.to_csv('./datasets/c113v.csv', sep=',', index=False, encoding='UTF-8')

#### DIME PLUS Congressional Bills

In [30]:
# additional dataset from Stanford (DIME PLUS) giving ideological weight to each bill put forth from 2003-2014 
bills_dime = pd.read_csv('./datasets/bills_db.csv')
bills_dime.head(2)

Unnamed: 0,bill_id,year,date,bill_str,bill_desc,congno,sponsors,cosponsors,tw_latent1,tw_abortion_and_social_conservatism,...,tw_healthcare,tw_higher_education,tw_immigration,tw_indian_affairs,tw_intelligence_and_surveillance,tw_labor,tw_law_courts_and_judges,tw_transportation,tw_veterans_affairs,tw_womens_issues
0,113_hr83,2014,12/13/2014,H.R. 83|On the Motion to Proceed H.R. 83|To re...,To require the Secretary of the Interior to as...,113,,,0.471785,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,113_pn1070,2014,12/13/2014,PN1070|On the Motion to Proceed PN1070|Christo...,"Christopher Smith, of Texas, to be an Assistan...",113,,,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
#slicing first four characters from bill id to remove congress session and underscore
bills_dime['bill_id'] = bills_dime['bill_id'].str[4:]

In [32]:
bills_dime.head()

Unnamed: 0,bill_id,year,date,bill_str,bill_desc,congno,sponsors,cosponsors,tw_latent1,tw_abortion_and_social_conservatism,...,tw_healthcare,tw_higher_education,tw_immigration,tw_indian_affairs,tw_intelligence_and_surveillance,tw_labor,tw_law_courts_and_judges,tw_transportation,tw_veterans_affairs,tw_womens_issues
0,hr83,2014,12/13/2014,H.R. 83|On the Motion to Proceed H.R. 83|To re...,To require the Secretary of the Interior to as...,113,,,0.471785,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,pn1070,2014,12/13/2014,PN1070|On the Motion to Proceed PN1070|Christo...,"Christopher Smith, of Texas, to be an Assistan...",113,,,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,pn1099,2014,12/13/2014,PN1099|On the Motion to Proceed PN1099|Frank A...,"Frank A. Rose, of Massachusetts, to be an Assi...",113,,,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,pn1160,2014,12/13/2014,PN1160|On the Motion to Proceed PN1160|Vivek H...,"Vivek Hallegere Murthy, of Massachusetts, to b...",113,,,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,pn1297,2014,12/13/2014,PN1297|On the Motion to Proceed PN1297|John Ch...,"John Charles Cruden, of Virginia, to be an Ass...",113,,,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
# filtering dataset by 113th congressional session
bills_dime= bills_dime.loc[bills_dime['congno'] == 113]

In [35]:
bills_dime.head(2)

Unnamed: 0,bill_id,year,date,bill_str,bill_desc,congno,sponsors,cosponsors,tw_latent1,tw_abortion_and_social_conservatism,...,tw_healthcare,tw_higher_education,tw_immigration,tw_indian_affairs,tw_intelligence_and_surveillance,tw_labor,tw_law_courts_and_judges,tw_transportation,tw_veterans_affairs,tw_womens_issues
0,hr83,2014,12/13/2014,H.R. 83|On the Motion to Proceed H.R. 83|To re...,To require the Secretary of the Interior to as...,113,,,0.471785,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,pn1070,2014,12/13/2014,PN1070|On the Motion to Proceed PN1070|Christo...,"Christopher Smith, of Texas, to be an Assistan...",113,,,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
bills_dime['congno'].value_counts()

113    618
Name: congno, dtype: int64

In [37]:
bills_dime.to_csv('./datasets/bills_dime.csv', sep=',', index=False, encoding='UTF-8')