# US Representative Voting Patterns and Funding Sources

Part 3: Creating a SQLite Database

*Tables created:*

1. DIME PLUS Congressional Bills
2. Cast Codes
3. FEC Committees
4. Party Codes
5. Member Descriptions
6. Roll Call
7. Member Votes
8. Expenditures
9. Receipts
10. Candidate Committees
11. Disbursements

In [1]:
import sqlite3 as sq
import pandas as pd
conn = sq.connect('congress.db') # open connection to file

#### DIME PLUS Congressional Bills

In [2]:
df = pd.read_csv('./datasets/bills_dime.csv')
df

Unnamed: 0,bill_id,bill_desc,sponsors,cosponsors,tw_latent1,tw_abortion_and_social_conservatism,tw_agriculture,tw_banking_and_finance,tw_civil_rights,tw_congress_and_procedural,...,tw_healthcare,tw_higher_education,tw_immigration,tw_indian_affairs,tw_intelligence_and_surveillance,tw_labor,tw_law_courts_and_judges,tw_transportation,tw_veterans_affairs,tw_womens_issues
0,hr83,To require the Secretary of the Interior to as...,,,0.471785,0.000000,0.000000,0.000000,0.072641,0.110686,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,hr5806,Supporting America’s Charities Act,,,0.131329,0.061381,0.013441,0.022888,0.055310,0.100501,...,0.002327,0.001538,0.009363,0.038940,0.002084,0.010166,0.015219,0.000076,0.000063,0.004749
2,hres776,Providing for consideration of the Senate amen...,,,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,hr2719,To require the Transportation Security Adminis...,cand43257,cand1307|cand953|cand1128,0.678486,0.000000,0.000000,0.000000,0.000043,0.000258,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.054827,0.000000,0.000000
4,hres775,"Providing for consideration of S. 2244, the Te...",,,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
354,hres39,Providing for consideration of the bill (H.R. ...,cand1484,,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
355,hr152,,cand541,,0.533541,0.000000,0.000053,0.026818,0.019575,0.057106,...,0.000164,0.000000,0.000000,0.000000,0.000291,0.000022,0.000000,0.000106,0.000018,0.003529
356,hres23,Providing for consideration of the bill (H.R. ...,cand910,,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
357,hr219,To improve and streamline disaster assistance ...,cand1100,cand1376|cand1316|cand854|cand359|cand904|cand...,0.764401,0.000000,0.000000,0.000190,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000029,0.000000,0.000000,0.000061,0.000000,0.000000,0.000000


In [3]:
df.columns

Index(['bill_id', 'bill_desc', 'sponsors', 'cosponsors', 'tw_latent1',
       'tw_abortion_and_social_conservatism', 'tw_agriculture',
       'tw_banking_and_finance', 'tw_civil_rights',
       'tw_congress_and_procedural', 'tw_crime',
       'tw_defense_and_foreign_policy', 'tw_economy', 'tw_education',
       'tw_energy', 'tw_environment', 'tw_fair_elections',
       'tw_federal_agencies_and_gov_regulation', 'tw_guns', 'tw_healthcare',
       'tw_higher_education', 'tw_immigration', 'tw_indian_affairs',
       'tw_intelligence_and_surveillance', 'tw_labor',
       'tw_law_courts_and_judges', 'tw_transportation', 'tw_veterans_affairs',
       'tw_womens_issues'],
      dtype='object')

In [4]:
df.shape

(359, 29)

In [5]:
df.to_sql(name='bills_dime', con=conn, if_exists='replace', index=False)

359

#### Cast Codes

In [7]:
df2 = pd.read_csv('./datasets/cast_codes.csv')

In [9]:
df2.shape

(10, 3)

In [10]:
df2.head(1)

Unnamed: 0,cast_code,description,simplified_grouping
0,0,Not a member of the chamber when this vote was...,


In [11]:
df2.to_sql(name='cast_codes', con=conn, if_exists='replace', index=False)

10

#### FEC Committees

In [2]:
df3 = pd.read_csv('./datasets/committees.csv')

In [3]:
df3.shape

(13008, 10)

In [4]:
df3.head(2)

Unnamed: 0,affiliated_committee_name,candidate_ids,committee_id,committee_type,designation_type,name,organization_type,state,topic_weight,summary
0,,[],C00408286,Party - Nonqualified,Unauthorized,10TH CONGRESSIONAL DISTRICT DEMOCRATIC COMM,,MI,,
1,,[],C00486647,PAC - Nonqualified,Unauthorized,10TH CONSTITUTION PAC,,MA,,


In [5]:
def slice_string(df, column):
    for index, value in enumerate(df[column]):
        if len(value) == 2:
            df[column][index] = None
        elif len(value) == 13:
            df[column][index] = value[2:-2]
    return df

In [6]:
slice_string(df3, 'candidate_ids')

Unnamed: 0,affiliated_committee_name,candidate_ids,committee_id,committee_type,designation_type,name,organization_type,state,topic_weight,summary
0,,,C00408286,Party - Nonqualified,Unauthorized,10TH CONGRESSIONAL DISTRICT DEMOCRATIC COMM,,MI,,
1,,,C00486647,PAC - Nonqualified,Unauthorized,10TH CONSTITUTION PAC,,MA,,
2,REPUBLICAN PARTY OF VIRGINIA INC,H6VA10134,C00005462,Party - Qualified,Unauthorized,10TH DISTRICT REPUBLICAN CONGRESSIONAL COMMITTEE,,VA,,
3,,,C90013426,Independent expenditure filer (not a committee),Unauthorized,1199SEIU UNITED HEALTHCARE WORKERS EAST,,NY,tw_labor,Labor Unions
4,1199 SEIU UNITED HEALTHCARE WORKERS EAST,,C00348540,PAC - Qualified,Unauthorized,1199 SEIU UNITED HEALTHCARE WORKERS EAST FEDER...,Labor Organization,NY,tw_labor,Labor Unions
...,...,...,...,...,...,...,...,...,...,...
13003,NONE,,C00462754,PAC - Nonqualified,Unauthorized,ZR FPAC,,MN,,
13004,ENDEAVOR ACTION,,C00459693,PAC - Qualified,Unauthorized,ZUFFA POLITICAL ACTION COMMMITTEE,,DC,,
13005,,H0OH12088,C00472332,House,Principal campaign committee,ZUKOWSKI FOR CONGRESS,,OH,,
13006,"ZURICH HOLDING COMPANY OF AMERICA, INC.",,C00235036,PAC - Qualified,Lobbyist/Registrant PAC,"ZURICH HOLDING COMPANY OF AMERICA, INC. COMMIT...",Corporation,DC,,


In [7]:
df3['candidate_ids'].value_counts()

P80003338    15
P80002801    11
P80000748     8
S2CA00286     6
P80003353     6
             ..
S0CA00330     1
H0IL19048     1
S2NE00094     1
H2OH12068     1
H0OH12088     1
Name: candidate_ids, Length: 3810, dtype: int64

In [8]:
df3.to_sql(name='committees', con=conn, if_exists='replace', index=False)

13008

#### Party Codes

In [18]:
df4 = pd.read_csv('./datasets/party_codes.csv')

In [19]:
df4.shape

(53, 2)

In [22]:
df4.head(2)

Unnamed: 0,Party Code,Party Name
0,100,Democratic Party
1,200,Republican Party


In [23]:
df4.to_sql(name='party_codes', con=conn, if_exists='replace', index=False)

53

#### Congressional Member Descriptions

In [38]:
df5 = pd.read_csv('./datasets/congress.csv')

In [39]:
df5.shape

(433, 23)

In [40]:
df5.head(2)

Unnamed: 0,icpsr,district,state,party,congress_id,NOMINATE_dim1,NOMINATE_dim2,candidate_id,name,ici,...,comm_disb,start_cash,end_cash,cand_contrib,cand_loans,ind_contrib,district.1,state.1,other_comm_contrib,pty_contrib
0,20300,1,AL,200,B001244,0.367,0.513,H2AL01077,"bonner, josiah robias",I,...,0.0,279810.87,157209.22,0.0,0.0,564463.0,AL,1,523034.4,0.0
1,20301,3,AL,200,R000575,0.363,0.455,H2AL03032,"rogers, michael dennis",I,...,270000.0,204259.44,292048.29,0.0,0.0,474560.95,AL,3,588500.0,2700.0


In [41]:
# correcting district errors for tableau visualizations
df5.index[df5['name'] == 'daines, steven'].tolist()

[231]

In [42]:
df5.at[231, 'district'] = 1

In [43]:
# changing names to proper case
df5['name'] = df5['name'].str.title()

In [44]:
df5.to_sql(name='members', con=conn, if_exists='replace', index=False)

433

#### Roll Call

In [2]:
df6 = pd.read_csv('./datasets/c113r.csv')

In [3]:
df6.shape

(1202, 20)

In [4]:
df6.head(2)

Unnamed: 0,chamber,rollnumber,date,session,yea_count,nay_count,nominate_mid_1,nominate_mid_2,nominate_spread_1,nominate_spread_2,bill_number,vote_result,vote_desc,vote_question,issue_codes,peltzman_codes,clausen_codes,crs_policy_area,crs_subjects,congress_url
0,House,1,2013-01-03,1,220,192,0.061,0.358,-0.747,0.288,,Boehner,,Election of the Speaker,['Election of the Speaker of the House'],['Internal Organization'],['Miscellaneous Policy'],,,
1,House,2,2013-01-03,1,224,187,0.056,0.197,-0.732,0.309,hres5,Passed,Adopting rules for the One Hundred Thirteenth ...,On Motion to Table the Motion to Refer,,['Internal Organization'],['Miscellaneous Policy'],Congress,['Administrative law and regulatory procedures...,https://www.congress.gov/bill/113th-congress/h...


In [5]:
df6.to_sql(name='roll_call', con=conn, if_exists='replace', index=False)

1202

#### Member Votes

In [16]:
df7 = pd.read_csv('./datasets/c113v.csv')

In [17]:
df7.shape

(520314, 5)

In [18]:
df7.head(2)

Unnamed: 0,icpsr,cast_code,chamber,rollnumber,prob
0,2605,7,House,1,100.0
1,10713,6,House,1,100.0


In [19]:
df7.to_sql(name='votes', con=conn, if_exists='replace', index=False)

520314

#### Expenditures

In [8]:
df8 = pd.read_csv('./datasets/expenditures.csv')
df8.head(2)

Unnamed: 0,candidate_id,candidate_name,committee_id,committee_name,count,cycle,support_oppose_indicator,total
0,H2AL01077,"BONNER, JOSIAH ROBIAS",C00502849,CAMPAIGN FOR PRIMARY ACCOUNTABILITY INC,13,2012,O,123679.84
1,H0AL02087,"ROBY, MARTHA",C00000935,DEMOCRATIC CONGRESSIONAL CAMPAIGN COMMITTEE,3,2012,O,11676.41


In [9]:
df8.to_sql(name='expenditures', con=conn, if_exists='replace', index=False)

1619

#### Receipts (not used)

In [22]:
df9 = pd.read_csv('./datasets/receipts.csv')
df9.head(2)

Unnamed: 0,committee_id,committee_name,count,cycle,memo_count,memo_total,recipient_id,recipient_name,total
0,C00004036,SEIU COPE (SERVICE EMPLOYEES INTERNATIONAL UNI...,1,2012,0,0.0,C00348540,1199 SERVICE EMPLOYEES INT'L UNION FEDERAL POL...,125000.0
1,C00348540,1199 SEIU UNITED HEALTHCARE WORKERS EAST FEDER...,1,2012,0,0.0,C00344531,1199 32BJ/144 SERVICE EMPLOYEES INTERNATIONAL ...,3546.0


In [23]:
df9.to_sql(name='receipts', con=conn, if_exists='replace', index=False)

2712

#### Candidate Committees (not used)

In [13]:
df10 = pd.read_csv('./datasets/candidate_committees.csv')
df10.head(2)

Unnamed: 0,affiliated_org,candidate_id,committee_id,committee_type,designation,name,org_type
0,GULF COAST VICTORY FUND,H2AL01077,C00375220,H,P,JO BONNER FOR CONGRESS COMMITTEE,
1,NONE,H2AL03032,C00367862,H,P,MIKE ROGERS FOR CONGRESS,


In [14]:
df10.to_sql(name='candidate_committees', con=conn, if_exists='replace', index=False)

513

#### Disbursements

In [25]:
df11 = pd.read_csv('./datasets/disbursements.csv')
df11.head(2)

Unnamed: 0,committee_id,committee_name,count,recipient_id,recipient_name,total
0,C00277335,FAIRFAX COUNTY REPUBLICAN COMMITTEE (FEDERAL),1,C00005462,10TH DISTRICT REPUBLICAN CONGRESSIONAL COMMITTEE,4940.0
1,C00004036,SEIU COPE (SERVICE EMPLOYEES INTERNATIONAL UNI...,1,C00348540,1199 SERVICE EMPLOYEES INT'L UNION FEDERAL POL...,125000.0


In [26]:
df11.to_sql(name='disbursements', con=conn, if_exists='replace', index=False)

139263

## Close Connection

In [43]:
conn.close()