## Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True, figsize=(9, 5))
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=14,
    titlepad=10,
)
plot_params = dict(
    color="0.75",
    style=".-",
    markeredgecolor="0.25",
    markerfacecolor="0.25",
    legend=False,
)
%config InlineBackend.figure_format = 'retina'

### Data Wrangling

In [3]:
census = pd.read_csv('./files/census_starter.csv')

census = (census
          .assign(cfips = census.cfips.astype(str),
                  median_hh_inc_2017 = census.median_hh_inc_2017.astype(np.float64),
                  median_hh_inc_2019 = census.median_hh_inc_2019.astype(np.float64),
                  )
          .set_index('cfips')
          .ffill(axis=1)
)
census.head(3)

Unnamed: 0_level_0,pct_bb_2017,pct_bb_2018,pct_bb_2019,pct_bb_2020,pct_bb_2021,pct_college_2017,pct_college_2018,pct_college_2019,pct_college_2020,pct_college_2021,...,pct_it_workers_2017,pct_it_workers_2018,pct_it_workers_2019,pct_it_workers_2020,pct_it_workers_2021,median_hh_inc_2017,median_hh_inc_2018,median_hh_inc_2019,median_hh_inc_2020,median_hh_inc_2021
cfips,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,76.6,78.9,80.6,82.7,85.5,14.5,15.9,16.1,16.7,16.4,...,1.3,1.1,0.7,0.6,1.1,55317.0,58786.0,58731.0,57982.0,62660.0
1003,74.5,78.1,81.8,85.1,87.9,20.4,20.7,21.0,20.2,20.6,...,1.4,1.3,1.4,1.0,1.3,52562.0,55962.0,58320.0,61756.0,64346.0
1005,57.2,60.4,60.5,64.6,64.6,7.6,7.8,7.6,7.3,6.7,...,0.5,0.3,0.8,1.1,0.8,33368.0,34186.0,32525.0,34990.0,36422.0


In [None]:
# select parameters columns to map into raw dataframe
parameters = [list(census.columns)[i:i+5] for i in range(0, len(census.columns), 5)]

# set dataframe into stack mode and rename columns
df = census.loc[:, parameters[1]]
df = (df
      .stack()
      .reset_index()
      .rename(columns={'level_1': 'parameter', 0: 'data'})
)

# setting lag between census data and train data
df['year'] = pd.to_numeric(df.parameter.str.slice(start=-4)) + 2

# bulid key for mapping
df['year'] = df['year'].astype(str)
df['cfips_year'] = df.cfips + '_' + df.year

# mapping parameter
dt = df.set_index('cfips_year').to_dict()['data']


In [None]:
# df

In [None]:
# dt

In [None]:
parameters