# Imports

In [18]:
import numpy as np
from sklearn.linear_model import Lasso
from datasets import generate_test_data
from preprocessing import polynomial_combinations
from models import OLS, Ridge
from assessment import mse, r_squared, cross_val_mse_and_r_squared
from resampling import train_test_split

# Data generation

In [3]:
data_points = 100
max_degree = 5
noise_scale = 0.1

initial_data, target = generate_test_data(data_points, noise_scale)
X = polynomial_combinations(initial_data, max_degree)

# Part a): Ordinary Least Squares 

In [4]:
ols_regressor = OLS()
ols_regressor.fit(X, target)
# Make predictions on train data:
train_predictions = ols_regressor.predict(X)

I have written some convenience functions to perform assessments:

In [8]:
# Confidence intervals

# MSE and R squared
computed_mse = mse(target, train_predictions)
computed_r_squared = r_squared(target, train_predictions)
print('MSE: {}\nR squared: {}'.format(computed_mse, computed_r_squared))

MSE: 0.5640182363946317
R squared: 0.931698844098268


## Resampling

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, target)
computed_mse, computed_r_squared = cross_val_mse_and_r_squared(ols_regressor, X, target)
print('MSE obtained using cross validation: {}'.format(computed_mse))
print('R squared obtained using cross validation: {}'.format(computed_r_squared))

MSE obtained using cross validation: 4.260666522003095
R squared obtained using cross validation: -2.6222418671658696


# Ridge regression

In [13]:
ridge_regressor = Ridge(0.1)
ridge_regressor.fit(X, target)
# Make predictions on train data:
train_predictions = ridge_regressor.predict(X)

In [15]:
# Confidence intervals

# MSE and R squared
computed_mse = mse(target, train_predictions)
computed_r_squared = r_squared(target, train_predictions)
print('MSE: {}\nR squared: {}'.format(computed_mse, computed_r_squared))

MSE: 1.830279115758963
R squared: 0.778357912630204


## Resampling

In [17]:
computed_mse, computed_r_squared = cross_val_mse_and_r_squared(ridge_regressor, X, target)
print('MSE obtained using cross validation: {}'.format(computed_mse))
print('R squared obtained using cross validation: {}'.format(computed_r_squared))

MSE obtained using cross validation: 0.4022373649889556
R squared obtained using cross validation: 0.7407934292145888


# Lasso

In [21]:
lasso_regressor = Lasso(0.1)
lasso_regressor.fit(X, target)
train_predictions = lasso_regressor.predict(X)

In [23]:
# Confidence intervals

# MSE and R squared
computed_mse = mse(target, train_predictions)
computed_r_squared = r_squared(target, train_predictions)
print('MSE: {}\nR squared: {}'.format(computed_mse, computed_r_squared))

MSE: 8.257813926401337
R squared: 0.0


In [24]:
computed_mse, computed_r_squared = cross_val_mse_and_r_squared(lasso_regressor, X, target)
print('MSE obtained using cross validation: {}'.format(computed_mse))
print('R squared obtained using cross validation: {}'.format(computed_r_squared))

MSE obtained using cross validation: 1.6818011773027457
R squared obtained using cross validation: -0.10236963994661083
