# Multivariate Regression - all five features for X & Unemployment Rate for y

In [1]:
import sys, os
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import psycopg2
from sqlalchemy import create_engine
from config import db_password

  """)


In [2]:
# Create an engine instance
alchemyEngine = create_engine(f"postgresql://postgres:{db_password}@127.0.0.1:5432/Unemployement_db", pool_recycle=3600);
# Connect to PostgreSQL server
dbConnection = alchemyEngine.connect();
# Read data from PostgreSQL database table and load into a DataFrame instance
unemployment_dataFrame = pd.read_sql("select * from \"unemployment_table\"", dbConnection);
pd.set_option('display.expand_frame_repr', False);
# Print the DataFrame
print(unemployment_dataFrame);
# Close the database connection
dbConnection.close();

           date  unemployment  federal_ir      cpi        gdp      ppi  inflation_rate
0    2001-01-01           4.2        5.98  103.300  10470.231  140.000          0.0140
1    2001-02-01           4.2        5.49  103.700  10470.231  137.400          0.0168
2    2001-03-01           4.3        5.31  103.900  10470.231  135.900          0.0262
3    2001-04-01           4.4        4.80  104.200  10599.000  136.400          0.0416
4    2001-05-01           4.3        4.21  104.600  10599.000  136.800          0.0499
..          ...           ...         ...      ...        ...      ...             ...
247  2021-08-01           5.2        0.09  153.301  23550.420  233.415          0.0272
248  2021-09-01           4.7        0.08  153.734  23550.420  235.678          0.0265
249  2021-10-01           4.6        0.08  154.973  24349.121  240.465          0.0213
250  2021-11-01           4.2        0.08  155.687  24349.121  243.287          0.0190
251  2021-12-01           3.9        0.08  

In [11]:
ue_df = unemployment_dataFrame.drop('date', axis=1)
ue_df

Unnamed: 0,unemployment,federal_ir,cpi,gdp,ppi,inflation_rate
0,4.2,5.98,103.300,10470.231,140.000,0.0140
1,4.2,5.49,103.700,10470.231,137.400,0.0168
2,4.3,5.31,103.900,10470.231,135.900,0.0262
3,4.4,4.80,104.200,10599.000,136.400,0.0416
4,4.3,4.21,104.600,10599.000,136.800,0.0499
...,...,...,...,...,...,...
247,5.2,0.09,153.301,23550.420,233.415,0.0272
248,4.7,0.08,153.734,23550.420,235.678,0.0265
249,4.6,0.08,154.973,24349.121,240.465,0.0213
250,4.2,0.08,155.687,24349.121,243.287,0.0190


In [12]:
# Scale database to 0 to 1 & Create new DataFrame
mms = MinMaxScaler()
unemployment_mms = mms.fit_transform(ue_df)
ue_mms_df = pd.DataFrame(unemployment_mms, 
                                   columns = ue_df.columns)
ue_mms_df.head()

Unnamed: 0,unemployment,federal_ir,cpi,gdp,ppi,inflation_rate
0,0.0625,1.0,0.0,0.0,0.10331,0.382932
1,0.0625,0.917369,0.007573,0.0,0.080738,0.413567
2,0.071429,0.887015,0.01136,0.0,0.067716,0.516411
3,0.080357,0.801012,0.01704,0.009278,0.072057,0.684902
4,0.071429,0.701518,0.024614,0.009278,0.075529,0.775711


In [14]:
# Select Features & Target
X = ue_mms_df[['federal_ir', 'cpi', 'gdp', 'ppi', 'inflation_rate']]
y = ue_mms_df['unemployment']

In [18]:
model = LinearRegression()

In [24]:
# What is the measured change of each index over the stated time period? 
# ue_mms_model = model.fit()

ue_mms_model = model.fit(X,y)
print(ue_mms_model.coef_) 

#Output will be array of Features Coefficient Values, 
#presents largest impact

[-0.21277647  2.87903701 -2.64676178 -0.47340552 -0.15599472]


In [None]:
# Calculate y-intercept
print(model.intercept_)

In [None]:
# Calculate Goodness of Fit
r2 = model.score(X, y)
print(r2)

In [None]:
# Plot Relatiionship between Interest Rate & Unemployment Rate
plt.scatter(df['federal_ir'], df['unemployment'], color='green')
plt.title('Interest Rates Vs Unemployment Rate', fontsize=14)
plt.xlabel('Interest Rates', fontsize=14)
plt.ylabel('Unemployment Rate', fontsize=14)
plt.grid(True)
plt.show()

In [None]:
# Plot Relatiionship between Consumer Price Index & Unemployment Rate
plt.scatter(df['cpi'], df['unemployment'], color='red')
plt.title('Consumer Price Indices Vs Unemployment Rate', fontsize=14)
plt.xlabel('Consumer Price Index', fontsize=14)
plt.ylabel('Unemployment Rate', fontsize=14)
plt.grid(True)
plt.show()

In [None]:
# Plot Relatiionship between Gross Domestic Product & Unemployment Rate
plt.scatter(df['gdp'], df['unemployment'], color='purple')
plt.title('Interest Rates Vs Unemployment Rate', fontsize=14)
plt.xlabel('Gross Domestic Product', fontsize=14)
plt.ylabel('Unemployment Rate', fontsize=14)
plt.grid(True)
plt.show()

In [None]:
# Plot Relatiionship between Producer Price Product & Unemployment Rate
plt.scatter(df['ppi'], df['unemployment'], color='orange')
plt.title('Producer Price Index Vs Unemployment Rate', fontsize=14)
plt.xlabel('Producer Price Index', fontsize=14)
plt.ylabel('Unemployment Rate', fontsize=14)
plt.grid(True)
plt.show()

In [None]:
# Plot Relatiionship between Inflation Rate & Unemployment Rate
plt.scatter(df['gdp'], df['unemployment'], color='blue')
plt.title('Inflation Rate Vs Unemployment Rate', fontsize=14)
plt.xlabel('Inflation Rate', fontsize=14)
plt.ylabel('Unemployment Rate', fontsize=14)
plt.grid(True)
plt.show()