In [1]:
import sys, os
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import psycopg2
from sqlalchemy import create_engine
import datetime as dt
from config import db_password

In [2]:
# Create an engine instance
alchemyEngine = create_engine(f"postgresql://postgres:{db_password}@127.0.0.1:5432/Unemployement_db", pool_recycle=3600);
# Connect to PostgreSQL server
dbConnection = alchemyEngine.connect();
# Read data from PostgreSQL database table and load into a DataFrame instance
ue_df = pd.read_sql("select * from \"unemployment_table\"", dbConnection);
pd.set_option('display.expand_frame_repr', False);
# Print the DataFrame
print(ue_df);
# Close the database connection
dbConnection.close();

Unnamed: 0,DATE,Unemployment_rate,GDP,PPI,Inflation_Rate,CPI,Interest Rates
0,1/1/2001,4.2,10470.231,140.0,0.01,103.3,5.98
1,2/1/2001,4.2,10470.231,137.4,0.02,103.7,5.49
2,3/1/2001,4.3,10470.231,135.9,0.03,103.9,5.31
3,4/1/2001,4.4,10599.0,136.4,0.04,104.2,4.8
4,5/1/2001,4.3,10599.0,136.8,0.05,104.6,4.21


In [4]:
## Changes "DATE" to integer
ue_df['date'] = ue_df.date.replace('/', '', regex=True).astype(int)
result = ue_df.dtypes
print(result)

DATE                   int32
Unemployment_rate    float64
GDP                  float64
PPI                  float64
Inflation_Rate       float64
CPI                  float64
Interest Rates       float64
dtype: object


In [5]:
# Scale database to 0 to 1 & Create new DataFrame
mms = MinMaxScaler()
unemployment_mms = mms.fit_transform(ue_df)
ue_mms_df = pd.DataFrame(unemployment_mms, 
                                   columns = ue_df.columns)
ue_mms_df.head()

Unnamed: 0,DATE,Unemployment_rate,GDP,PPI,Inflation_Rate,CPI,Interest Rates
0,0.0,0.0625,0.0,0.10331,0.333333,0.0,1.0
1,0.09090744,0.0625,0.0,0.080738,0.444444,0.007573,0.917369
2,0.1818149,0.071429,0.0,0.067716,0.555556,0.011359,0.887015
3,0.2727223,0.080357,0.009278,0.072057,0.666667,0.017039,0.801012
4,0.3636298,0.071429,0.009278,0.075529,0.777778,0.024612,0.701518
5,0.4545372,0.089286,0.009278,0.064243,0.777778,0.028398,0.661046
6,0.5454446,0.098214,0.009207,0.046012,0.777778,0.022719,0.627319
7,0.6363521,0.125,0.009207,0.046012,0.777778,0.024612,0.607083
8,0.7272595,0.133929,0.009207,0.045144,0.777778,0.030292,0.509275
9,0.8181669,0.160714,0.013707,0.019099,0.888889,0.026505,0.411467


In [6]:
# Select Features & Target
X = ue_mms_df[['federal_ir', 'cpi', 'gdp', 'ppi', 'inflation_rate']]
y = ue_mms_df['unemployment']

In [7]:
model = LinearRegression()

In [8]:
ue_mms_model = model.fit(X,y)
print(ue_mms_model.coef_)

[-0.21302425  2.87352719 -2.64780148 -0.46415895 -0.14674679]


In [9]:
# Calculate y-intercept
print(model.intercept_)

0.33107670089272057


In [10]:
# Calculate Goodness of Fit
r2 = model.score(X, y)
print(r2)

0.6828473881569777


In [None]:
# Plot Relationship between Federal Interest Rate & Unemployment Rate
plt.scatter(ue_df['federal_ir'], ue_df['unemployment'], color='green')
plt.title('Interest Rates Vs Unemployment Rate', fontsize=14)
plt.xlabel('Interest Rates', fontsize=14)
plt.ylabel('Unemployment Rate', fontsize=14)
plt.grid(True)
plt.show()

In [None]:
# Plot Relationship between Consumer Price Index & Unemployment Rate
plt.scatter(ue_df['cpi'], ue_df['unemployment'], color='red')
plt.title('Consumer Price Indices Vs Unemployment Rate', fontsize=14)
plt.xlabel('Consumer Price Index', fontsize=14)
plt.ylabel('Unemployment Rate', fontsize=14)
plt.grid(True)
plt.show()

In [None]:
# Plot Relationship between Gross Domestic Product & Unemployment Rate
plt.scatter(ue_df['gdp'], ue_df['unemployment'], color='purple')
plt.title('GDP Vs Unemployment Rate', fontsize=14)
plt.xlabel('Gross Domestic Product', fontsize=14)
plt.ylabel('Unemployment Rate', fontsize=14)
plt.grid(True)
plt.show()

In [None]:
# Plot Relationship between Producer Price Product & Unemployment Rate
plt.scatter(ue_df['ppi'], ue_df['unemployment'], color='orange')
plt.title('Producer Price Index Vs Unemployment Rate', fontsize=14)
plt.xlabel('Producer Price Index', fontsize=14)
plt.ylabel('Unemployment Rate', fontsize=14)
plt.grid(True)
plt.show()

In [None]:
# Plot Relationship between Inflation Rate & Unemployment Rate
plt.scatter(ue_df['inflation_rate'], ue_df['unemployment'], color='blue')
plt.title('Inflation Rate Vs Unemployment Rate', fontsize=14)
plt.xlabel('Inflation Rate', fontsize=14)
plt.ylabel('Unemployment Rate', fontsize=14)
plt.grid(True)
plt.show()