In [None]:
!pip install sqlalchemy
import pandas as pd
from sqlalchemy import create_engine

In [None]:
# pull first csv into dataframe
min_wage_file = "Raw_Data/Minimum_Wage_Data.csv"
min_wage_df = pd.read_csv(min_wage_file, encoding = "utf-8")
min_wage_df.head()

In [None]:
# remove years prior to 2010
min_year_df = min_wage_df.loc[(min_wage_df['Year'] == 2010) |
                          (min_wage_df['Year'] == 2011) |
                          (min_wage_df['Year'] == 2012) |
                          (min_wage_df['Year'] == 2013) |
                          (min_wage_df['Year'] == 2014) |
                          (min_wage_df['Year'] == 2015) |
                          (min_wage_df['Year'] == 2016)]

# alternative:
# min_wage_df2 = min_wage_df[(min_wage_df[‘Year’] > 2009)]
# min_wage_df2.head()

min_year_df

In [None]:
# remove extraneous columns
min_wage_df2 = min_year_df.drop(['Footnote', 'Table_Data', 'High.2018', 'Low.2018', 'CPI.Average'], axis =1)
min_wage_df2.head()

In [None]:
# make column names lower-case
min_wage_df2.columns = ["year","state","high_value","low_value"]
min_wage_df2.head()

In [None]:
# pull second csv into dataframe
pub_assistance_file = "Raw_Data/sub-est2016_all.csv"
pub_assistance_df = pd.read_csv(pub_assistance_file, encoding='utf-8')
pub_assistance_df.head()

In [None]:
# rename columns to be more readable
pub_assistance_df.columns = ["geographic_summary_level","state_FIPS_code", "county_FIPS_code", "place_FIPS_code", "minor_civil_div_FIPS_code", "consolidated_city_FIPS_code", "primitive_geography_flag", "functional_status_code","city", "state", "census_pop_2010", 
                             "est_base_2010", "est_pop_2010", "est_pop_2011", "est_pop_2012", "est_pop_2013", "est_pop_2014", "est_pop_2015", "est_pop_2016"]

pub_assistance_df.head()

In [None]:
# create new dictionary to replace SUMLEV column numbers with their description
SUMLEV_dict = {"geographic_summary_level": [40,50,61,71,157,162,170,172],
               "geographic_level": ["State", "County", "Minor Civil Division", "Minor Civil Division place part",
                                    "County place part", "Incorporated place", "Consolidated city",
                                    "Consolidated city -- place within consolidated city"]}

SUMLEV_df = pd.DataFrame(SUMLEV_dict)

In [None]:
# merge dictionary with dataframe as new column
new_pub_assist_df = pub_assistance_df.merge(SUMLEV_df, on='geographic_summary_level', how='left')
new_pub_assist_df.head()

In [None]:
# pull out state entries
state_pub_assist_df = new_pub_assist_df[(new_pub_assist_df['geographic_summary_level'] == 40)]
state_pub_assist_df.head()

In [None]:
# drop unneeded columns
state_pub_assist_df2 = state_pub_assist_df.drop(["census_pop_2010", "est_base_2010", "geographic_summary_level","state_FIPS_code", "county_FIPS_code", "place_FIPS_code", "minor_civil_div_FIPS_code", 
                                                 "consolidated_city_FIPS_code", "primitive_geography_flag", "functional_status_code", "city", "geographic_level"], axis=1)
state_pub_assist_df2

In [None]:
# rename year columns
state_pub_assist_df2.columns = ['state', '2010', '2011', '2012', '2013', '2014', '2015', '2016']
state_pub_assist_df2

In [None]:
# melt data from wide to long dataset
something_new_df = pd.melt(state_pub_assist_df2, id_vars=['state'], value_vars=['2010', '2011', '2012', '2013', '2014', '2015', '2016'])
something_new_df

In [None]:
# rename columns
something_new_df.columns = ['state', 'year', 'est_population']
something_new_df

In [None]:
something_new_df["WIC_usage"] = 'NaN'
something_new_df

In [None]:
rds_connection_string = "root:<password>@127.0.0.1/demographics_db"
engine = create_engine(f'mysql://{rds_connection_string}')

In [None]:
# Confirm tables
engine.table_names()

In [None]:
something_new_df.to_sql(name='pub_assist', con=engine, if_exists='append', index=True)
min_wage_df2.to_sql(name='min_wage', con=engine, if_exists='append', index=True)