In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error
import pickle

In [2]:
X = np.load("../data/X_train_scaled.npy", allow_pickle=True)
y = np.load("../data/y_train_scaled.npy", allow_pickle=True)

In [3]:
print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (3104579, 26)
y shape: (3104579, 1)


In [4]:
regressor = LinearRegression()

In [5]:
param_grid = {}

In [7]:
grid_search = GridSearchCV(
    regressor,
    param_grid,
    scoring="neg_mean_squared_error",
    cv=10,
    verbose=False
)

In [8]:
np.random.seed(2907)
grid_search.fit(X, y)

GridSearchCV(cv=10, estimator=LinearRegression(), param_grid={},
             scoring='neg_mean_squared_error', verbose=False)

In [9]:
print("GridSearch results:")
grid_search.cv_results_

GridSearch results:


{'mean_fit_time': array([2.28739893]),
 'std_fit_time': array([0.04183615]),
 'mean_score_time': array([0.0078469]),
 'std_score_time': array([0.00050785]),
 'params': [{}],
 'split0_test_score': array([-0.45167504]),
 'split1_test_score': array([-0.34451623]),
 'split2_test_score': array([-0.28017544]),
 'split3_test_score': array([-0.30239897]),
 'split4_test_score': array([-0.34334178]),
 'split5_test_score': array([-0.45629733]),
 'split6_test_score': array([-0.37109504]),
 'split7_test_score': array([-0.43452845]),
 'split8_test_score': array([-0.45180146]),
 'split9_test_score': array([-0.45359976]),
 'mean_test_score': array([-0.38894295]),
 'std_test_score': array([0.06509504]),
 'rank_test_score': array([1], dtype=int32)}

In [10]:
with open("../grids/grid_search_linear_regressor_01.pkl", mode="wb") as f:
    pickle.dump(grid_search, f)

In [11]:
best_regressor = grid_search.best_estimator_

In [12]:
print("Best regressor:", )
print("Coefs.:", best_regressor.coef_)
print("Intercept:", best_regressor.intercept_)

Best regressor:
Coefs.: [[-3.58912425e+11  8.24859619e-01  5.18072491e-01  1.29200288e+11
   6.57992688e+00  1.65438003e+12 -1.56862265e+12 -1.41916092e-01
   5.34514167e+00 -1.82981851e-01  2.43634953e-01 -1.84599488e-01
   8.54979997e-01  8.98692457e-01 -9.47956903e-01  8.46490784e-01
  -6.18534635e-01 -2.11881714e+01 -1.02964826e+01 -9.74768829e+00
  -4.57653618e+00 -6.30145609e-01 -6.67421341e-01 -2.30918121e+00
  -1.78938103e+00  8.77685547e-02]]
Intercept: [0.00432633]


In [13]:
with open("../models/model_linear_regressor_01.pkl", mode="wb") as f:
    pickle.dump(best_regressor, f)

In [14]:
y_hat = best_regressor.predict(X)

In [15]:
r2 = r2_score(y, y_hat)
mse = mean_squared_error(y, y_hat)

print(f"R2 Score: {r2:.3f}")
print(f"MSE: {mse:.3f}")

R2 Score: 0.615
MSE: 0.385
