### Using Toy Data for Code Correctness Checking

In [1]:
import sys
import os
sys.path.append(os.path.abspath("D:\\Python\\commodity_futures_price"))
import math

import numpy as np
import pandas as pd
# %load_ext autoreload
# %autoreload 2
%reload_ext autoreload
from model.savvy_sh import SavvySh

# Fix random seed
np.random.seed(223)

# Generate test data
n_obs = 100   # number of observations
n_features = 5  # number of predictors

# Random predictors
X = np.random.randn(n_obs, n_features)
# True coefficients
beta_true = np.array([1.5, -2.0, 0.5, 0.0, 1.0])
# Noise
sigma = 0.5
epsilon = np.random.randn(n_obs) * sigma
# Response
y = X @ beta_true + epsilon

test_X = np.random.randn(50, 5)
# True coefficients
beta_true = np.array([1.5, -2.0, 0.5, 0.0, 1.0])
# Noise
sigma = 0.5
epsilon = np.random.randn(50) * sigma
# Response
test_y = test_X @ beta_true + epsilon

In [2]:
from sklearn.linear_model import LinearRegression

base_model = LinearRegression()
base_model.fit(X, y)
print("OLS: ", base_model.intercept_, base_model.coef_)
rmse_ols = np.sqrt(np.mean((base_model.predict(test_X) - test_y) ** 2))

OLS:  0.0018617194173899954 [ 1.46302069 -1.96949052  0.52502162  0.02515925  1.10123564]


In [3]:
# Ridge regression
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV, KFold

kf = KFold(n_splits=5, shuffle=True, random_state=223)

params = {"alpha":np.arange(0.00001, 10, 500)}
ridge_model = Ridge()
ridge_cv = GridSearchCV(ridge_model, param_grid=params, cv=kf)
ridge_cv.fit(X, y)

ridge = Ridge(alpha=ridge_cv.best_params_['alpha'])
ridge.fit(X, y)
print("Ridge: ", ridge.intercept_, ridge.coef_)
rmse_ridge = np.sqrt(np.mean((y - ridge.predict(X)) ** 2))

Ridge:  0.0018617160325871962 [ 1.46302049 -1.96949035  0.52502163  0.0251593   1.10123558]


In [4]:
model_mult = SavvySh(model_class="Multiplicative", include_Sh=True)
model_mult.fit(X, y)

# Assume that each covariate is independent
model_linear = SavvySh(model_class="Linear")
model_linear.fit(X, y)

model_slab = SavvySh(model_class="Slab")
model_slab.fit(X, y)

model_shrink_ridge = SavvySh(model_class="ShrinkageRR")
model_shrink_ridge.fit(X, y)

np.set_printoptions(suppress=True, precision=6)
print("St: ", model_mult.coef_["St"])
print("DSh: ", model_mult.coef_["DSh"])
print("Sh: ", model_mult.coef_["Sh"])
# intercept as 0
print("L Sh: ", np.hstack([0, model_linear.coef_["LSh"]]))
print("Slab: ", model_slab.coef_["SR"])
print("G Slab: ", model_slab.coef_["GSR"])
print("Sh Ridge: ", model_shrink_ridge.coef_["SRR"])

St:  [ 0.001856  1.458795 -1.963802  0.523505  0.025087  1.098055]
DSh:  [ 0.000002  1.459792 -1.968123  0.519263  0.003677  1.097738]
Sh:  [ 0.001815  1.460336 -1.967287  0.525077  0.02577   1.100385]
L Sh:  [ 0.        1.462792 -1.969272  0.526177  0.02613   1.102485]
Slab:  [-0.140148  1.390711 -2.082399  0.415773 -0.052041  0.98366 ]
G Slab:  [-0.081332  1.379827 -2.052684  0.441828 -0.058034  1.018042]
Sh Ridge:  [ 0.001859  1.462975 -1.969501  0.525058  0.025192  1.101261]


In [5]:
# prediction
print("RMSE")
print("Baseline - OLS: ", rmse_ols)
print("Baseline - Ridge: ", rmse_ridge)
y_pred_mult = model_mult.predict(test_X)
for key, value in y_pred_mult.items():
    print(key, end=': ')
    temp_rmse = np.sqrt(np.mean((test_y - value) ** 2))
    print(temp_rmse)

y_pred_linear = model_linear.predict(test_X)
for key, value in y_pred_linear.items():
    print(key, end=': ')
    temp_rmse = np.sqrt(np.mean((test_y - value) ** 2))
    print(temp_rmse)

y_pred_slab = model_slab.predict(test_X)
for key, value in y_pred_slab.items():
    print(key, end=': ')
    temp_rmse = np.sqrt(np.mean((test_y - value) ** 2))
    print(temp_rmse)

y_pred_shrink_ridge = model_shrink_ridge.predict(test_X)
for key, value in y_pred_shrink_ridge.items():
    print(key, end=': ')
    temp_rmse = np.sqrt(np.mean((test_y - value) ** 2))
    print(temp_rmse)

RMSE
Baseline - OLS:  0.5814310951189064
Baseline - Ridge:  0.590639356884776
St: 0.5798729915371537
DSh: 0.5828927946929143
Sh: 0.5809127977742464
LSh: 0.5813029297459028
SR: 0.5538234530098122
GSR: 0.5644118807710132
SRR: 0.5814391993192146
