# Instructions

This is a run-through of the cleaning functions in create_staging_tables. The functions below clean and reorganize data collected from the API and scraped from the web. Data was initially collected and put into a postgres database named "wa_lge_raw". The API and scraping functions can be found in the data_aquisition directory. 

There are seven steps to creating the necessary staging tables. Only the following tables need to be saved to the wa_leg_staging database:
* legislator_df
* rep_score_df
* bill_text_df
* merged_final_df

In [None]:
import psycopg2
from sqlalchemy import create_engine
import pandas as pd
from create_staging_tables import (create_staging_legislator_df_STEP_ONE, 
                                   create_staging_vote_df_STEP_TWO, 
                                   create_staging_bill_df_STEP_THREE, 
                                   create_staging_merged_initial_df_STEP_FOUR, 
                                   create_staging_bill_text_df_STEP_FIVE, 
                                   clean_merged_final_STEP_SIX, 
                                   create_rep_score_STEP_SEVEN)

In [None]:
engine = create_engine('postgresql://localhost:5432/wa_leg_raw')

In [None]:
raw_vote_df = pd.read_sql_query('select * from "vote_api"', con=engine)
raw_committee_member_df = pd.read_sql_query('select * from "committee_member_api"', con=engine)
missing_leg_info_df = pd.read_csv('../data/missing_legislators.csv', sep = '|')
raw_bill_df = pd.read_sql_query('select * from "bill_api"', con=engine)
raw_sponsor_df = pd.read_sql_query('select * from "sponsor_api"', con=engine)

In [None]:
legislator_df = create_staging_legislator_df_STEP_ONE(raw_vote_df, raw_committee_member_df, missing_leg_info_df)

In [None]:
staging_vote_df = create_staging_vote_df_STEP_TWO(raw_vote_df)

In [None]:
staging_bill_df = create_staging_bill_df_STEP_THREE(raw_bill_df, raw_sponsor_df)

In [None]:
merged_initial_df = create_staging_merged_initial_df_STEP_FOUR(staging_vote_df, staging_bill_df, legislator_df)

In [None]:
staging_bill_text_df = create_staging_bill_text_df_STEP_FIVE(merged_initial_df)

In [None]:
merged_final_df = clean_merged_final_STEP_SEVEN(merged_initial_df, legislator_df)

In [None]:
rep_score_df = create_rep_score_STEP_SEVEN(staging_bill_df, legislator_df)