# Instructions

This is a run-through of the cleaning functions in create_staging_tables. The functions below clean and reorganize data collected from the API and scraped from the web. Data was initially collected and put into a postgres database named "wa_lge_raw". The API and scraping functions can be found in the data_aquisition directory. 

There are seven steps to creating the necessary staging tables. Only the following tables need to be saved to the wa_leg_staging database:
* legislator_df
* rep_score_df
* bill_text_df
* merged_final_df

In [None]:
import psycopg2
from sqlalchemy import create_engine
import pandas as pd
from create_staging_tables import create_staging_legislator_df_STEP_ONE, create_staging_vote_df_STEP_TWO, create_staging_bill_df_STEP_THREE, create_staging_merged_initial_df_STEP_FOUR, create_staging_bill_text_df_STEP_FIVE, clean_merged_final_STEP_SEVEN, create_rep_score_STEP_EIGHT, load_and_clean_party_minority_history_df

In [None]:
engine = create_engine('postgresql://localhost:5432/wa_leg_raw')

In [None]:
raw_vote_df = pd.read_sql_query('select * from "vote_api"', con=engine)
raw_committee_member_df = pd.read_sql_query('select * from "committee_member_api"', con=engine)
missing_leg_info_df = pd.read_csv('../data/missing_legislators.csv', sep = '|')
raw_bill_df = pd.read_sql_query('select * from "bill_api"', con=engine)
raw_sponsor_df = pd.read_sql_query('select * from "sponsor_api"', con=engine)

In [None]:
legislator_df = create_staging_legislator_df_STEP_ONE(raw_vote_df, raw_committee_member_df, missing_leg_info_df)

In [None]:
staging_vote_df = create_staging_vote_df_STEP_TWO(raw_vote_df)

In [None]:
staging_bill_df = create_staging_bill_df_STEP_THREE(raw_bill_df, raw_sponsor_df)

In [7]:
staging_bill_df.head()

Unnamed: 0,biennium,bill_id,class,description,htm_create_date,htm_last_modified_date,htm_url,long_friendly_name,name,type,bill_unique,bill_num,bill_num_unique,sponsor_agency,primary_sponsor_id,secondary_sponsors
0,1991-92,HB 1001,Bills,,1991-08-30T00:00:00,2006-07-10T17:13:53.543,http://app.leg.wa.gov/documents/billdocs/1991-...,House Bill 1001,1001,House Bills,1991-92 HB 1001,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."
1,1991-92,SHB 1001,Bills,,1991-02-01T00:00:00,2006-07-10T17:13:54.903,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1001,1001-S,House Bills,1991-92 SHB 1001,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."
2,1991-92,HB 1002,Bills,,1991-01-14T00:00:00,2006-07-10T17:13:11.637,http://app.leg.wa.gov/documents/billdocs/1991-...,House Bill 1002,1002,House Bills,1991-92 HB 1002,1002,1991-92 1002,House,251,"[7, 475, 332, 484, 180, 110, 481, 394, 472, 44..."
3,1991-92,HB 1003,Bills,,1991-01-14T00:00:00,2006-07-10T17:13:11.747,http://app.leg.wa.gov/documents/billdocs/1991-...,House Bill 1003,1003,House Bills,1991-92 HB 1003,1003,1991-92 1003,House,311,"[110, 54, 474]"
4,1991-92,SHB 1003,Bills,,1991-02-21T00:00:00,2006-07-10T17:14:07.357,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1003,1003-S,House Bills,1991-92 SHB 1003,1003,1991-92 1003,House,311,"[110, 54, 474]"


In [8]:
merged_initial_df = create_staging_merged_initial_df_STEP_FOUR(staging_vote_df, staging_bill_df, legislator_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  unique_vote_dates.drop_duplicates(keep='first', inplace=True)


In [12]:
merged_initial_df

Unnamed: 0,sequence_number,vote,vote_date,voter_id,voter_name,voting_agency,bill_unique,year,unique_id,biennium,...,htm_last_modified_date,htm_url,long_friendly_name,name,type,bill_num,bill_num_unique,sponsor_agency,primary_sponsor_id,secondary_sponsors
0,4,1,1991-02-22,7,Anderson,0,1991-92 SHB 1001,1991,2.0,1991-92,...,2006-07-10 17:13:54.903,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1001,1001-S,House Bills,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."
1,4,1,1991-02-22,11,Appelwick,0,1991-92 SHB 1001,1991,2.0,1991-92,...,2006-07-10 17:13:54.903,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1001,1001-S,House Bills,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."
2,4,1,1991-02-22,17,Ballard,0,1991-92 SHB 1001,1991,2.0,1991-92,...,2006-07-10 17:13:54.903,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1001,1001-S,House Bills,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."
3,4,1,1991-02-22,23,Basich,0,1991-92 SHB 1001,1991,2.0,1991-92,...,2006-07-10 17:13:54.903,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1001,1001-S,House Bills,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."
4,4,1,1991-02-22,32,Beck,0,1991-92 SHB 1001,1991,2.0,1991-92,...,2006-07-10 17:13:54.903,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1001,1001-S,House Bills,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."
5,4,1,1991-02-22,34,Belcher,0,1991-92 SHB 1001,1991,2.0,1991-92,...,2006-07-10 17:13:54.903,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1001,1001-S,House Bills,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."
6,4,2,1991-02-22,40,Betrozoff,0,1991-92 SHB 1001,1991,2.0,1991-92,...,2006-07-10 17:13:54.903,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1001,1001-S,House Bills,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."
7,4,1,1991-02-22,45,Bowman,0,1991-92 SHB 1001,1991,2.0,1991-92,...,2006-07-10 17:13:54.903,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1001,1001-S,House Bills,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."
8,4,0,1991-02-22,46,Braddock,0,1991-92 SHB 1001,1991,2.0,1991-92,...,2006-07-10 17:13:54.903,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1001,1001-S,House Bills,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."
9,4,1,1991-02-22,47,Bray,0,1991-92 SHB 1001,1991,2.0,1991-92,...,2006-07-10 17:13:54.903,http://app.leg.wa.gov/documents/billdocs/1991-...,Substitute House Bill 1001,1001-S,House Bills,1001,1991-92 1001,House,251,"[250, 7, 123, 54, 188, 396, 264, 339, 213, 481..."


In [11]:
merged_final_df = clean_merged_final_STEP_SEVEN(merged_initial_df, legislator_df)

KeyError: "labels ['first_name' 'last_name' 'id' 'agency'] not contained in axis"