In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3
from src.cleaning import DataCleaner

In [2]:
data_cleaner = DataCleaner('data/raw/hpi_po_metro.csv', 'data/raw/ssamatab1.csv')
data_cleaner.clean_unemployment_data
clean_housing_data = data_cleaner.clean_housing_data
clean_unemployment_data = data_cleaner.clean_unemployment_data

In [3]:
clean_unemployment_data.columns

Index(['LAUS_Code', 'State_FIPS_Code', 'FIPS_Code', 'Area', 'Year', 'Month',
       'Civilian_Labor_Force', 'Employment', 'Unemployment',
       'Unemployment_Rate'],
      dtype='object')

In [4]:
# Group the data to create to make it relatable to each other.

grouped_housing_data = clean_housing_data.drop(columns=['Quarter','Not_Seasonally_Adjusted_Index'] ).groupby(['CBSA', 'Metro_Name', 'Year']).agg('mean').reset_index()

grouped_unemployment_data = clean_unemployment_data.drop(columns="Month").groupby(
    ['LAUS_Code', 'State_FIPS_Code','FIPS_Code', 'Area', 'Year']).agg('mean').round(2).reset_index()
grouped_unemployment_data["Unemployment_Rate"] = grouped_unemployment_data["Unemployment_Rate"] / 10

grouped_unemployment_data

Unnamed: 0,LAUS_Code,State_FIPS_Code,FIPS_Code,Area,Year,Civilian_Labor_Force,Employment,Unemployment,Unemployment_Rate
0,MT0111500000000,1,11500,"Anniston-Oxford, AL MSA",1990,51473.92,47727.33,3746.58,7.267
1,MT0111500000000,1,11500,"Anniston-Oxford, AL MSA",1991,52168.58,48221.42,3947.17,7.567
2,MT0111500000000,1,11500,"Anniston-Oxford, AL MSA",1992,53110.00,48424.08,4685.92,8.808
3,MT0111500000000,1,11500,"Anniston-Oxford, AL MSA",1993,52674.58,48100.67,4573.92,8.683
4,MT0111500000000,1,11500,"Anniston-Oxford, AL MSA",1994,52471.75,48392.33,4079.42,7.767
...,...,...,...,...,...,...,...,...,...
14135,MT7241980000000,72,41980,"San Juan-Bayamon-Caguas, PR MSA",2021,786984.58,729661.83,57322.75,7.300
14136,MT7241980000000,72,41980,"San Juan-Bayamon-Caguas, PR MSA",2022,798661.42,757138.67,41522.75,5.200
14137,MT7241980000000,72,41980,"San Juan-Bayamon-Caguas, PR MSA",2023,805597.33,765487.42,40109.92,4.983
14138,MT7241980000000,72,41980,"San Juan-Bayamon-Caguas, PR MSA",2024,816319.50,777505.50,38814.00,4.758


In [5]:
# make SQL database with tables "Unemployment" and "Housing_value"

with sqlite3.connect("database.db") as conn:
    grouped_unemployment_data.to_sql("Unemployment", conn, if_exists='replace', index=False)
    grouped_housing_data.to_sql("Housing_value", conn, if_exists='replace', index=False)

In [6]:
grouped_housing_data.columns

Index(['CBSA', 'Metro_Name', 'Year', 'Seasonally_Adjusted_Index'], dtype='object')

In [7]:
grouped_unemployment_data.columns

Index(['LAUS_Code', 'State_FIPS_Code', 'FIPS_Code', 'Area', 'Year',
       'Civilian_Labor_Force', 'Employment', 'Unemployment',
       'Unemployment_Rate'],
      dtype='object')

In [8]:
combined_tables = """

SELECT 
    Unemployment.LAUS_Code AS Unemployment_LAUS_Code,
    Unemployment.Year AS Unemployment_Year,
    Unemployment.Area AS Unemployment_Area,
    Unemployment.Employment AS Employment,
    Unemployment.Unemployment AS Unemployment,
    Unemployment.Unemployment_Rate AS Unemployment_Rate,
    Housing_value.Metro_Name AS Housing_value_Metro_Name,
    Housing_value.Seasonally_Adjusted_Index AS Housing_value_Seasonally_Adjusted_Index
FROM Unemployment
INNER JOIN Housing_value
    ON Unemployment.FIPS_Code = Housing_value.CBSA
    
"""

combined_tables_result = pd.read_sql(combined_tables, conn)
combined_tables_result


Unnamed: 0,Unemployment_LAUS_Code,Unemployment_Year,Unemployment_Area,Employment,Unemployment,Unemployment_Rate,Housing_value_Metro_Name,Housing_value_Seasonally_Adjusted_Index
0,MT0113820000000,1990,"Birmingham, AL MSA",434440.58,24961.33,5.425,"Birmingham, AL",101.3800
1,MT0113820000000,1990,"Birmingham, AL MSA",434440.58,24961.33,5.425,"Birmingham, AL",106.0900
2,MT0113820000000,1990,"Birmingham, AL MSA",434440.58,24961.33,5.425,"Birmingham, AL",111.7600
3,MT0113820000000,1990,"Birmingham, AL MSA",434440.58,24961.33,5.425,"Birmingham, AL",118.6525
4,MT0113820000000,1990,"Birmingham, AL MSA",434440.58,24961.33,5.425,"Birmingham, AL",123.9525
...,...,...,...,...,...,...,...,...
84275,MT5533340000000,2025,"Milwaukee-Waukesha, WI MSA",788726.38,28328.62,3.475,"Milwaukee-Waukesha, WI",322.5025
84276,MT5533340000000,2025,"Milwaukee-Waukesha, WI MSA",788726.38,28328.62,3.475,"Milwaukee-Waukesha, WI",357.3625
84277,MT5533340000000,2025,"Milwaukee-Waukesha, WI MSA",788726.38,28328.62,3.475,"Milwaukee-Waukesha, WI",385.8775
84278,MT5533340000000,2025,"Milwaukee-Waukesha, WI MSA",788726.38,28328.62,3.475,"Milwaukee-Waukesha, WI",415.7175


In [14]:
correlation = combined_tables_result["Unemployment_Rate"].corr(combined_tables_result["Housing_value_Seasonally_Adjusted_Index"]).round(2)
correlation

np.float64(-0.07)

In [10]:
query1 = """
SELECT * FROM Unemployment
ORDER BY Area
"""
result1 = pd.read_sql(query1, conn)
result1

Unnamed: 0,LAUS_Code,State_FIPS_Code,FIPS_Code,Area,Year,Civilian_Labor_Force,Employment,Unemployment,Unemployment_Rate
0,MT4810180000000,48,10180,"Abilene, TX MSA",1990,68715.50,64637.42,4078.08,5.933
1,MT4810180000000,48,10180,"Abilene, TX MSA",1991,69444.17,65741.25,3702.92,5.325
2,MT4810180000000,48,10180,"Abilene, TX MSA",1992,71049.92,67113.50,3936.42,5.550
3,MT4810180000000,48,10180,"Abilene, TX MSA",1993,72534.25,68599.17,3935.08,5.408
4,MT4810180000000,48,10180,"Abilene, TX MSA",1994,73953.08,70277.17,3675.92,4.958
...,...,...,...,...,...,...,...,...,...
14135,MT0449740000000,4,49740,"Yuma, AZ MSA",2021,93947.92,81840.08,12107.83,12.892
14136,MT0449740000000,4,49740,"Yuma, AZ MSA",2022,97012.58,85258.58,11754.00,12.125
14137,MT0449740000000,4,49740,"Yuma, AZ MSA",2023,98531.75,86537.17,11994.58,12.183
14138,MT0449740000000,4,49740,"Yuma, AZ MSA",2024,98665.08,86622.50,12042.58,12.200


In [11]:
query2 = """
SELECT * FROM Housing_value
"""
result2 = pd.read_sql(query2, conn)
result2

Unnamed: 0,CBSA,Metro_Name,Year,Seasonally_Adjusted_Index
0,10580,"Albany-Schenectady-Troy, NY",1991,99.8700
1,10580,"Albany-Schenectady-Troy, NY",1992,101.9125
2,10580,"Albany-Schenectady-Troy, NY",1993,100.1800
3,10580,"Albany-Schenectady-Troy, NY",1994,97.8225
4,10580,"Albany-Schenectady-Troy, NY",1995,94.9350
...,...,...,...,...
3495,49340,"Worcester, MA",2021,308.8025
3496,49340,"Worcester, MA",2022,345.0725
3497,49340,"Worcester, MA",2023,365.6825
3498,49340,"Worcester, MA",2024,390.6775
