In [None]:
import sys, os
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import psycopg2
from sqlalchemy import create_engine
import datetime as dt
from config import db_password
from sklearn.model_selection import train_test_split
from scipy.stats import linregress

In [None]:
# Create an engine instance
alchemyEngine = create_engine(f"postgresql://postgres:{db_password}@127.0.0.1:5432/Unemployement_db", pool_recycle=3600);
# Connect to PostgreSQL server
dbConnection = alchemyEngine.connect();
# Read data from PostgreSQL database table and load into a DataFrame instance
ue_df = pd.read_sql("select * from \"unemployment_table\"", dbConnection);
pd.set_option('display.expand_frame_repr', False);
# Print the DataFrame
print(ue_df);
# Close the database connection
dbConnection.close();

In [None]:
# Load Dataset into DataFrame
#file_path = "../Fed_Interest_Rate_Analysis/unemployment_table.csv"
#ue_df = pd.read_csv(file_path)
#ue_df

In [None]:
## Changes "DATE" to integer
# ue_df['date'] = ue_df.date.replace('-', '', regex=True)
ue_df['date'] = ue_df.date.astype(str)
ue_df['date'] = ue_df.date.replace(to_replace= '-', value= '', regex=True).astype(int)
result = ue_df.dtypes
print(result)


In [None]:
ue_df.head()

In [None]:
# Select Features & Target
X = ue_df[['federal_ir', 'cpi', 'gdp', 'ppi', 'inflation_rate']]
y = ue_df['unemployment']

In [None]:
# Split into Train/Test Data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
X_train.shape

In [None]:
# Scale database to 0 to 1 & Create new DataFrame
mms = MinMaxScaler()
unemployment_mms = mms.fit_transform(ue_df)
ue_mms_df = pd.DataFrame(unemployment_mms, 
                                   columns = ue_df.columns)
ue_mms_df.head()

In [None]:
# Create a function to create perform linear regression on the unemployment data
# and plot a regression line and the equation with the data.
def plot_linear_regression(x_values, y_values, title, x_label, y_label, text_coordinates):

    # Run regression on hemisphere weather data.
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)

    # Calculate the regression line "y values" from the slope and intercept.
    regress_values = x_values * slope + intercept
    # Get the equation of the line.
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    # Create a scatter plot and plot the regression line.
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r")
    # Annotate the text for the line equation.
    plt.annotate(line_eq, text_coordinates, fontsize=15, color="red")
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.show()

In [None]:
# Linear regression on the Unemployment Rate
x_values = X['federal_ir']
y_values = y
# Call the function.
plot_linear_regression(x_values, y_values,
        'Linear Regression on the Federal Interest Rate for Unemployment',
                       'Federal Interest Rate', 'Unemployment Rate', (3,13))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = X['gdp']
y_values = y
# Call the function.
plot_linear_regression(x_values, y_values,
        'Linear Regression on the Gross Domestic Product for Unemployment',
                       'Gross Domestic Product', 'Unemployment Rate', (12000,13))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = X['ppi']
y_values = y
# Call the function.
plot_linear_regression(x_values, y_values,
        'Linear Regression on the Producer Price Index for Unemployment',
                       'Producer Price Index', 'Unemployment Rate', (200,13))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = X['cpi']
y_values = y
# Call the function.
plot_linear_regression(x_values, y_values,
        'Linear Regression on the Consumer Price Index for Unemployment',
                       'Consumer Price Index','Unemployment Rate', (120,13))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = X['inflation_rate']
y_values = y
# Call the function.
plot_linear_regression(x_values, y_values,
        'Linear Regression on the Inflation Rate for Unemployment',
                       'Inflation Rate', 'Unemployment Rate', (0.03,13))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = X['gdp']
y_values = X['cpi']
# Call the function.
plot_linear_regression(x_values, y_values,
        'Gross Domestic Product vs. Consumer Price Index',
                       'Gross Domestic Product','Consumer Price Index', (10000,155))

In [None]:
ue_mms_model = model.fit(X_train, y_train)
print(ue_mms_model.coef_)

In [None]:
# Calculate y-intercept
print(model.intercept_)

In [None]:
# Calculate Goodness of Fit
r2 = model.score(X, y)
print(r2)

In [None]:
# Calculate Goodness of Fit - train
r2 = model.score(X_train, y_train)
print(r2)

In [None]:
# Calculate Goodness of Fit - test
r2 = model.score(X_test, y_test)
print(r2)