In [None]:
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error
import pickle

In [None]:
X = np.load("../data/X_train_scaled.npy", allow_pickle=True)
y = np.load("../data/y_train_scaled.npy", allow_pickle=True)

In [None]:
print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (3104579, 26)
y shape: (3104579, 1)


In [None]:
svm = SVR()

In [None]:
param_grid = {
    "C": [1, 100],
}

In [None]:
grid_search = GridSearchCV(
    svm,
    param_grid,
    scoring="neg_mean_squared_error",
    cv=10,
    verbose=10
)

In [None]:
y = y.ravel()

In [None]:
np.random.seed(2907)
grid_search.fit(X, y)

Fitting 10 folds for each of 2 candidates, totalling 20 fits
[CV 1/10; 1/2] START C=1........................................................


In [None]:
print("GridSearch results:")
grid_search.cv_results_

In [None]:
with open("../grids/grid_search_svm_01.pkl", mode="wb") as f:
    pickle.dump(grid_search, f)

In [None]:
best_regressor = grid_search.best_estimator_

In [None]:
print("Best regressor", best_regressor)

In [None]:
with open("../models/model_svm_01.pkl", mode="wb") as f:
    pickle.dump(best_regressor, f)

In [None]:
y_hat = best_regressor.predict(X)

In [None]:
r2 = r2_score(y, y_hat)
mse = mean_squared_error(y, y_hat)

print(f"R2 Score: {r2:.3f}")
print(f"MSE: {mse:.3f}")