# Linear Regression - Ridge

### Import Libraries

In [3]:
import pandas as pd
import numpy  as np

from sklearn import metrics      as mt
from sklearn import linear_model as lm

### Load Dataset

In [4]:
X_train = pd.read_csv('Training/X_training.csv')
y_train = pd.read_csv('Training/y_training.csv')
X_val = pd.read_csv('Validation/X_val.csv')
y_val = pd.read_csv('Validation/y_val.csv')
X_test = pd.read_csv('Test/X_test.csv')
y_test = pd.read_csv('Test/y_test.csv')

### Model Training

In [5]:
# Ridge - L2 
a = np.arange( 1, 20, 1)
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []

for i in a:
    # define model
    ridge = lm.Ridge( alpha=i, max_iter=1000 )

    # Fit
    ridge.fit(X_train, y_train)

    # Predict
    yhat_train = ridge.predict(X_train)

    r2 = mt.r2_score(y_train, yhat_train)
    r2_list.append(r2)         

    mse = mt.mean_squared_error(y_train, yhat_train)
    mse_list.append(mse)
    
    rmse = np.sqrt(mse)
    rmse_list.append(rmse)
    
    mae = mt.mean_absolute_error(y_train, yhat_train)    
    mae_list.append(mae)

    mape = mt.mean_absolute_percentage_error(y_train, yhat_train)
    mape_list.append(mape)

best_a = r2_list.index(max(r2_list))
print(f'The best parameters ->\n'
      f'Alpha: {a[best_a]}\n'
      f'R2: {max(r2_list):.3f}\n'
      f'MSE: {min(mse_list):.2f}\n'
      f'RMSE: {min(rmse_list):.2f}\n'
      f'MAE: {min(mae_list):.2f}\n'
      f'MAPE: {min(mape_list):.2f}')

The best parameters ->
Alpha: 1
R2: 0.046
MSE: 456.00
RMSE: 21.35
MAE: 17.00
MAPE: 8.65


### Validation

In [6]:
# Best parameters retraining with validation data
best_a = r2_list.index(max(r2_list))

# define model
ridge = lm.Ridge( alpha=a[best_a], max_iter=1000 )

# model training
ridge.fit( X_train, y_train )

# predict
yhat_val = ridge.predict( X_val )

print(f'Alpha: {a[best_a]}')

r2_val = mt.r2_score(y_val, yhat_val)
print(f'R2: {r2_val:.3f}')

mse_val = mt.mean_squared_error(y_val, yhat_val)
print(f'MSE: {mse_val:.3f}')

rmse_val = np.sqrt(mse_val)
print(f'RMSE: {rmse_val:.3f}')

mae_val = mt.mean_absolute_error(y_val, yhat_val)
print(f'MAE: {mae_val:.3f}')

mape_val = mt.mean_absolute_percentage_error(y_val, yhat_val)
print(f'MAPE: {mape_val:.3f}')

Alpha: 1
R2: 0.040
MSE: 458.445
RMSE: 21.411
MAE: 17.039
MAPE: 8.682


### Test

In [8]:
# Best parameters retraining with test data
best_a = r2_list.index(max(r2_list))

# define model
ridge = lm.Ridge( alpha=a[best_a], max_iter=1000 )

# model training
ridge.fit(np.concatenate(( X_train, X_val )),
          np.concatenate(( y_train, y_val )))

# predict
y_pred = ridge.predict( X_test )

print(f'Alpha: {a[best_a]}')

r2_test = mt.r2_score(y_test, y_pred)
print(f'R2: {r2_test:.3f}')

mse_test = mt.mean_squared_error(y_test, y_pred)
print(f'MSE: {mse_test:.3f}')

rmse_test = np.sqrt(mse_test)
print(f'RMSE: {rmse_test:.3f}')

mae_test = mt.mean_absolute_error(y_test, y_pred)
print(f'MAE: {mae_test:.3f}')

mape_test = mt.mean_absolute_percentage_error(y_test, y_pred)
print(f'MAPE: {mape_test:.3f}')

Alpha: 1
R2: 0.051
MSE: 461.988
RMSE: 21.494
MAE: 17.144
MAPE: 8.532


