In [None]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import GridSearchCV
import lib
import numpy as np

In [2]:
# Load data
data = lib.Dataset("YAHOO", random_state=1337, quantile_transform=True, quantile_noise=1e-3)
mu, std = data.y_train.mean(), data.y_train.std()
normalize = lambda x: ((x - mu) / std).astype(np.float32)
data.y_train, data.y_valid, data.y_test = map(normalize, [data.y_train, data.y_valid, data.y_test])

In [3]:
regressor = Lasso(random_state=0)
# Train the model and calculate the MSE on the training set
regressor.fit(data.X_train, data.y_train)
mse_train = mean_squared_error(data.y_train, regressor.predict(data.X_train))
print("Train MSE before optimization: ", round(mse_train, 4))

# Calculate the MSE on the validation set
mse_valid = mean_squared_error(data.y_valid, regressor.predict(data.X_valid))
print("Validation MSE before optimization: ", round(mse_valid, 4))

# Calculate the MSE on the test set
mse_test = mean_squared_error(data.y_test, regressor.predict(data.X_test))
print("Test MSE before optimization: ", round(mse_test, 4))

# Optimize the model parameters using grid search
param_grid = {'alpha': [0.1, 0.5, 1.0, 2.0, 5.0]}
grid_search = GridSearchCV(Lasso(random_state=0), param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(np.concatenate([data.X_train, data.X_valid]), np.concatenate([data.y_train, data.y_valid]))
best_regressor = grid_search.best_estimator_

# Calculate the MSE on the training, validation and test sets after optimization
mse_train_optimized = mean_squared_error(data.y_train, best_regressor.predict(data.X_train))
print("Train MSE after optimization: ", round(mse_train_optimized, 4))

mse_valid_optimized = mean_squared_error(data.y_valid, best_regressor.predict(data.X_valid))
print("Validation MSE after optimization: ", round(mse_valid_optimized, 4))

mse_test_optimized = mean_squared_error(data.y_test, best_regressor.predict(data.X_test))
print("Test MSE after optimization: ", round(mse_test_optimized, 4))

Train MSE before optimization:  1.0
Validation MSE before optimization:  1.0186
Test MSE before optimization:  0.9985
Train MSE after optimization:  0.7558
Validation MSE after optimization:  0.7722
Test MSE after optimization:  0.7541
