In [None]:
import psycopg2 as pg2
import pandas as pd
import sqlalchemy
from functools import reduce
from web_scrape_functions import scrape_bill_topic_table
from WA_state_API_functions import get_bill_data, get_sponsor_data, get_committee_data, get_committee_member_data, get_and_reorganize_rollcall_data

# Load get_bill_data for all bienniums

In [None]:
bienniums = ['1991-92', '1993-94', '1995-96', '1997-98', '1999-00',
             '2001-02', '2003-04', '2005-06', '2007-08', '2009-10', 
             '2011-12', '2013-14', '2015-16', '2017-18']

In [None]:
bill_dfs = [get_bill_data(b, 'Bills') for b in bienniums]

In [None]:
bill_data_all_bienniums = reduce((lambda x, y: x.append(sy)), bill_dfs)

# Put dataframes in wa_leg_raw bill_api

In [None]:
engine = sqlalchemy.create_engine('postgresql://emilykarboski@localhost:5432/wa_leg_raw')
con = engine.connect()

In [None]:
bill_data_all_bienniums.to_sql('bill_api', con, if_exists='replace', index=False)

# Load sponsor data for all bills and put in raw table

In [None]:
bill_1991 = get_bill_data('1991-92', 'Bills')

In [None]:
bill_1991['bill_id'].unique()[3100: 3150]

In [None]:
sponsor_dfs = []
for bill_id, biennium in zip(bill_data_all_bienniums['bill_id'], 
                          bill_data_all_bienniums['biennium']):
    try: 
        sponsor_df = get_sponsor_data(biennium, bill_id)
        sponsor_df['biennium'] = biennium
        sponsor_df['bill_id'] = bill_id
        sponsor_dfs.append(sponsor_df)
    except:
        continue

In [None]:
len(sponsor_dfs)

### Pushing to postgres

In [None]:
sponsor_dfs[0].to_sql('sponsor_api', con, if_exists='replace', index=False)

In [None]:
for df in sponsor_dfs[1:]:
    df.to_sql('sponsor_api', con, if_exists='append', index=False)
% time

# Scrape topic data for all years and put in raw postgres table

In [None]:
years = ['1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000'
         '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', 
         '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']

In [None]:
topic_dfs = []
for year in years:
    topic_df = scrape_bill_topic_table(year)
    topic_df['year'] = year
    topic_dfs.append(topic_df)

In [None]:
topic_dfs[0].head()

In [None]:
topic_dfs[0].to_sql('topic_scrape', con, if_exists='replace', index=False)

In [None]:
for df in topic_dfs[1:]:
    df.to_sql('topic_scrape', con, if_exists='append', index=False)
% time

# Load committee data and put in raw postgres table

In [None]:
bienniums = ['1991-92', '1993-94', '1995-96', '1997-98', '1999-00',
             '2001-02', '2003-04', '2005-06', '2007-08', '2009-10', 
             '2011-12', '2013-14', '2015-16', '2017-18']

In [None]:
committee_dfs = [get_committee_data(b) for b in bienniums]

In [None]:
committee_dfs[0].head()

In [None]:
committee_data_all_bienniums = reduce((lambda x, y: x.append(y)), committee_dfs)

In [None]:
committee_data_all_bienniums.to_sql('committee_api', con, if_exists='replace', index=False)

# Load committee member data and put in raw postgres table

In [None]:
com_member_dfs = []
for biennium, agency, name in zip(committee_data_all_bienniums['biennium'], 
                              committee_data_all_bienniums['agency'], 
                              committee_data_all_bienniums['name']):
    try: 
        com_member_df = get_committee_member_data(biennium, agency, name)
        com_member_dfs.append(com_member_df)
    except:
        continue

In [None]:
com_member_dfs[0].to_sql('committee_member_api', con, if_exists='replace', index=False)

In [None]:
for df in com_member_dfs[1:]:
    df.to_sql('committee_member_api', con, if_exists='append', index=False)
% time

# Load voting data and put in raw postgres table

In [None]:
bill_data_all_bienniums['bill_num'] = bill_data_all_bienniums['bill_id'].str[-4:]

In [None]:
bill_data_all_bienniums.iloc[:5, :]

In [None]:
vote_dfs = []
for biennium, bill_num in zip(bill_data_all_bienniums['biennium'], 
                              bill_data_all_bienniums['bill_num']):
    try: 
        vote_df = get_and_reorganize_rollcall_data(biennium, bill_num)
        vote_dfs.append(vote_df)
    except:
        continue

In [None]:
vote_dfs[0].to_sql('vote_api', con, if_exists='replace', index=False)

In [None]:
for df in vote_dfs[1:]:
    df.to_sql('vote_api', con, if_exists='append', index=False)
% time