In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

In [None]:
df = pd.read_csv("Data.csv")

In [None]:
model = LinearRegression()
X = df[["Personal Consumption Expenditure (billions of usd)",
        "Unemployment (%)",
        "Supply (thousand barrels per day)",
        "Imports (thousand barrels per day)",
       "Production (thousand barrels per day)"]]
y = df["Staggered Price of Barrel"].values.reshape(-1, 1)

In [None]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Scaling values
X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(y_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

# Train Model
model.fit(X_train_scaled, y_train_scaled)

# Score Model
training_score = model.score(X_train_scaled, y_train_scaled)
testing_score = model.score(X_test_scaled, y_test_scaled)
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")

In [None]:
# Standard Scalar not only scales data, but also centers the data around zero. As such, this helps decrease multicolinearity.

In [None]:
train_vif = pd.DataFrame()
train_vif["VIF Factor"] = [variance_inflation_factor(X_train_scaled, i) for i in range(X_train_scaled.shape[1])]
train_vif["features"] = X.columns
train_vif.round(1)

In [None]:
import numpy as np
train_vif1 = pd.DataFrame()
train_vif1["VIF Factor"] = [variance_inflation_factor(np.asarray(X_train), i) for i in range(np.asarray(X_train).shape[1])]
train_vif1["features"] = X.columns
train_vif1.round(1)

In [None]:
train_vif = pd.DataFrame()
train_vif["VIF Factor"] = [variance_inflation_factor(X_test_scaled, i) for i in range(X_test_scaled.shape[1])]
train_vif["features"] = X.columns
train_vif

In [None]:
train_vif1 = pd.DataFrame()
train_vif1["VIF Factor"] = [variance_inflation_factor(np.asarray(X_test), i) for i in range(np.asarray(X_test).shape[1])]
train_vif1["features"] = X.columns
train_vif1.round(1)