# REGULARIZATION

In [1]:
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn import metrics
import numpy as np
import pandas as pd



### Creating the dataframe


In [2]:
df = pd.DataFrame({'x': np.random.normal(size=100), 'y': np.random.normal(size=100)})
df

Unnamed: 0,x,y
0,-1.737316,0.379297
1,-0.418457,-0.536133
2,-0.628665,0.348569
3,-0.312796,0.127247
4,-1.066724,0.469227
...,...,...
95,-0.051047,1.439409
96,0.371654,0.109466
97,-0.853134,0.645480
98,-0.891279,2.152187


### Adding polynomial features to dataframe

In [3]:
for i in range(2, 50):
    colname = 'x_%d' % i
    df[colname] = df['x'] ** i


In [4]:
# Separate dependent and independent variables
independent_variables = list(df.columns)
independent_variables.remove('y')
X = df[independent_variables]
y = df.y

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.80, random_state=1)


### Ridge Regression

In [5]:
lr = linear_model.Ridge(alpha=0.001)
lr.fit(X_train, y_train)
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

print("------ Ridge Regression ------")
print("Train MAE: ", metrics.mean_absolute_error(y_train, y_train_pred))
print("Train RMSE: ", np.sqrt(metrics.mean_squared_error(y_train, y_train_pred)))
print("Test MAE: ", metrics.mean_absolute_error(y_test, y_test_pred))
print("Test RMSE: ", np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print("Ridge Coef: ", lr.coef_)


------ Ridge Regression ------
Train MAE:  0.75984725666159
Train RMSE:  0.9545954972449561
Test MAE:  0.6569376480790978
Test RMSE:  0.8954746597248873
Ridge Coef:  [-5.04758601e-12  9.81769215e-13  3.54362390e-12 -1.44500939e-12
 -2.57920958e-12  1.51091281e-13  1.18566763e-12 -9.82583584e-13
 -8.57454964e-13 -2.85920084e-12 -9.79057423e-13  6.67941872e-13
  1.92704203e-12  5.10764248e-12 -3.57434439e-13  2.40522907e-13
 -1.30133086e-12  1.04455286e-12 -8.80808989e-13  3.33065406e-12
 -3.49902359e-12  9.22374616e-13  6.61708139e-13  8.05729209e-13
 -2.99949816e-12 -1.76234528e-12  1.37683290e-12  2.83212485e-12
 -1.94522442e-12 -2.24364070e-12 -2.77604377e-13  7.55900420e-13
  3.87773692e-12  1.60343286e-12 -2.97581619e-13  3.78589802e-12
  4.11869418e-12  3.06073048e-13  1.61822741e-12 -1.81886756e-12
 -1.59313882e-12 -4.13632417e-13 -1.31600117e-12  1.29684735e-12
 -2.14786531e-13  2.03031104e-12 -1.20140931e-12 -1.03627763e-13
  1.05278629e-13]


### LASSO Regression 

In [15]:
lr = linear_model.Lasso(alpha=0.001) 
lr.fit(X_train, y_train) 
y_train_pred = lr.predict(X_train) 
y_test_pred = lr.predict(X_test)
print("----- LASSO Regression -----") 
print("Train MAE: ", metrics.mean_absolute_error(y_train, y_train_pred) )
print("Train RMSE: ", np.sqrt(metrics.mean_squared_error(y_train, y_train_pred)))
print("Test MAE: ", metrics.mean_absolute_error(y_test, y_test_pred))
print("Test RMSE: ", np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))
print("LASSO Coef: ", lr.coef_)

----- LASSO Regression -----
Train MAE:  0.6715472938285949
Train RMSE:  0.8436385363167183
Test MAE:  5.3816917528852635
Test RMSE:  20.340182489461387
LASSO Coef:  [-4.74493020e-01 -1.21720596e+00  7.56816260e-01  6.77167199e-01
 -2.64399145e-01 -5.69484076e-02  7.14210932e-03 -1.81169430e-02
  4.94786871e-03 -5.77700851e-04  8.25226104e-04  1.96475786e-04
  9.09531669e-05  4.93517222e-05  4.91924743e-06  7.30448456e-06
 -8.90095430e-07  7.60780492e-07 -3.99279618e-07  3.47513663e-08
 -1.00467135e-07 -9.01412891e-09 -2.10384260e-08 -3.63574944e-09
 -4.01554378e-09 -8.92472202e-10 -7.24920748e-10 -1.85158412e-10
 -1.26166808e-10 -3.52481815e-11 -2.14089359e-11 -6.37116799e-12
 -3.56754680e-12 -1.11297666e-12 -5.86659701e-13 -1.89871032e-13
 -9.55284899e-14 -3.18429480e-14 -1.54412984e-14 -5.27329335e-15
 -2.48215708e-15 -8.64996532e-16 -3.97336693e-16 -1.40857867e-16
 -6.34038939e-17 -2.28084507e-17 -1.00934019e-17 -3.67698977e-18
 -1.60390942e-18]


  model = cd_fast.enet_coordinate_descent(
