### Changing Directory.

In [None]:
cd ..

### Importing Modules.

In [None]:
# Import sets up ipynb's to be accessible through "ipynb.fs.full/def".
import ipynb
# ipynb.fs.full allows access to all functions and definitions.
from ipynb.fs.full import covid_data as cd
# Access to dataframe objects and manipualators.
import pandas as pd
# Numpy will be used to generate ranges of numbers.
import numpy as np

### Setting Pandas Display Options.

In [None]:
# Telling pandas not convert to html tags.
pd.set_option('display.notebook_repr_html', False)
# Max columns and rows to display.
pd.set_option('display.max_columns', 12)
pd.set_option('display.max_rows', 10)

### Specifying the Names of the Attributes we will be using.

In [None]:
attributes_DP = ["GEO_ID", "DP05_0002E","DP05_0003E"\
                , "DP05_0004E","DP05_0019E", "DP05_0021E"\
                , "DP05_0023E", "DP05_0018E", "DP05_0086E"]

attribute_names = ["GEO_ID", "population_male", "population_female"\
                 , "sex_ratio", "under_18", "over_18"\
                 , "over_or_62", "median_age", "total_housing"]

# Reverse is necessary to give the desired order when populating DataFrame.
attributes_DP.reverse()
attribute_names.reverse()

### Creating our Dataframe.

In [None]:
# Reading in general data.
df_housing = pd.read_csv(cd._housing_demographics_path)
df_housing_modified = pd.DataFrame()

# Pulling out the attributes and names that we specified above.
# The first row is given the attribute name, we will later reset the columns to the first row.
for attribute, name in zip(attributes_DP, attribute_names):
    df_housing_modified.insert(0, attribute, df_housing[attribute])
    df_housing_modified[attribute][0] = (name)
# Linear comprehension is used to create a series consisting of the last five digits of each GEO_ID, yields countyFIPS.
countyFIPS_Series = pd.Series(["countyFIPS"]).append(pd.Series([int(x[-5:]) for x in df_housing_modified["GEO_ID"][1::]], dtype = int))
# Because of the concatenation of two series (duplicates exist), we must reset the index.
countyFIPS_Series.index = np.arange(0,len(countyFIPS_Series))
# We insert our new attribute into the first position of our dataframe.
df_housing_modified.insert(0, "countyFIPS", countyFIPS_Series)
# We drop "GEO_ID", it is no longer needed.
df_housing_modified.drop("GEO_ID", axis = 1, inplace = True)
# We unobfuscate our column names.
df_housing_modified.columns = df_housing_modified.iloc[0]
# We drop our first row, column names now represent this data).
df_housing_modified.drop(0,inplace = True)

In [None]:
df_housing_modified

In [None]:
df_covid = cd.get_covid_primary_data()
df_covid = pd.merge(df_covid, df_housing_modified, how = "inner")

In [8]:
df_covid

    countyFIPS       County Name State  stateFIPS  cases - 1/22/20  \
0         1003    Baldwin County    AL          1                0   
1         1015    Calhoun County    AL          1                0   
2         1043    Cullman County    AL          1                0   
3         1049     DeKalb County    AL          1                0   
4         1051     Elmore County    AL          1                0   
..         ...               ...   ...        ...              ...   
813      55133   Waukesha County    WI         55                0   
814      55139  Winnebago County    WI         55                0   
815      55141       Wood County    WI         55                0   
816      56021    Laramie County    WY         56                0   
817      56025    Natrona County    WY         56                0   

     cases - 1/23/20  ...  sex_ratio  under_18  over_18  over_or_62  \
0                  0  ...       93.7     47156   170866       53676   
1                