<a href="https://colab.research.google.com/github/ab-sa/Statistical-Machine-Learning/blob/main/Lecture7-modified.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge, RidgeCV, Lasso, LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RepeatedKFold, cross_val_score
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns

  import pandas.util.testing as tm


Import Credit data

In [2]:
Credit = pd.read_csv('Credit.csv')
#Insurance = pd.read_csv('Insurance.csv')
print('Dimension of the data: ' + str(Credit.shape))
Credit.head()

Dimension of the data: (400, 12)


Unnamed: 0,ID,Income,Limit,Rating,Cards,Age,Education,Gender,Student,Married,Ethnicity,Balance
0,1,14.891,3606,283,2,34,11,Male,No,Yes,Caucasian,333
1,2,106.025,6645,483,3,82,15,Female,Yes,Yes,Asian,903
2,3,104.593,7075,514,4,71,11,Male,No,No,Asian,580
3,4,148.924,9504,681,3,36,11,Female,No,No,Asian,964
4,5,55.882,4897,357,2,68,16,Male,No,Yes,Caucasian,331


Data preprocessing: splitting, standardizing, etc.

In [3]:
Credit_dumms = pd.get_dummies(Credit)
y = Credit_dumms['Balance']
X = Credit_dumms.drop(['ID', 'Balance'], axis=1)

X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

std_scale = StandardScaler().fit(X_train)
X_train_std = std_scale.transform(X_train)
X_test_std  = std_scale.transform(X_test)

**Ridge**: A linear model with a Ridge penalty term (norm 2)

First: with a fixed lambda (alpha) on a samll set of features:

In [4]:
X_temp = X_train[['Limit', 'Rating', 'Cards', 'Age', 'Education']]
std_scale_temp = StandardScaler().fit(X_temp)
X_temp_std = std_scale_temp.transform(X_temp)

LMRidge = Ridge(alpha=1.0)
print(LMRidge.fit(X_temp, y_train).coef_)
print(LMRidge.fit(X_temp_std, y_train).coef_)

[ 0.08097534  1.4132531  19.78523476 -1.91165513  2.72674614]
[191.49696239 203.88383002  28.46754314 -33.91630484   8.50066239]


Second: Find the optimal value for alpha (lambda) on all features:

In [5]:
# model evaluation: coss validation
CV10 = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
alphas = np.arange(-2, 5, 0.05)
scores = [-cross_val_score(Ridge(np.exp(alpha)), X_train_std, y_train, cv=10, scoring='neg_mean_squared_error') for alpha in alphas]

In [6]:
MSPE_avg = [np.mean([el for el in sublist]) for sublist in scores]
MSPE_sd = [np.std([el for el in sublist]) for sublist in scores]
alpha_min = np.exp(alphas[np.argmin(MSPE_avg)])
MSPE_1se = [x for x in MSPE_avg[(np.argmin(MSPE_avg)+1):]
            if x <= min(MSPE_avg) + MSPE_sd[np.argmin(MSPE_avg)]][0]
alpha_1se = np.exp(alphas[MSPE_avg == MSPE_1se])
print('alpha with min MSPE: ', alpha_min)
print('alpha with min MSPE + 1 SE: ', alpha_1se)

alpha with min MSPE:  0.9048374180359611
alpha with min MSPE + 1 SE:  [0.95122942]


Fit a Ridge model with the optimal alpha (lambda):

In [7]:
#LMRidgeCV = RidgeCV(alphas=np.arange(0, 2, 0.05), cv=CV10, scoring='neg_mean_squared_error')
#LMRidgeCV.fit(X_train_std, y_train)
# summarize chosen configuration
#print('alpha: %f' % LMRidgeCV.alpha_)
LMRidge_min = Ridge(alpha = alpha_min)
LMRidge_1se = Ridge(alpha = alpha_1se)
LMRidge_min.fit(X_train_std, y_train)
LMRidge_1se.fit(X_train_std, y_train)
print('MSPE of Ridge with min alpha rule: ', mean_squared_error(y_test, LMRidge_min.predict(X_test_std)))
print('MSPE of Ridge with 1-SE alpha rule: ', mean_squared_error(y_test, LMRidge_1se.predict(X_test_std)))
print('Coef estimates of Ridge with min alpha rule: ', LMRidge_min.coef_)
print('Coef estimates of Ridge with 1-SE alpha rule: ', LMRidge_1se.coef_)

MSPE of Ridge with min alpha rule:  11503.05411435731
MSPE of Ridge with 1-SE alpha rule:  11505.45260950522
Coef estimates of Ridge with min alpha rule:  [-2.64004506e+02  3.01097423e+02  2.94347126e+02  1.72235140e+01
 -1.51139305e+01 -1.48254410e+00 -1.45384213e-01  1.45384213e-01
 -6.31495274e+01  6.31495274e+01  4.26584383e-01 -4.26584383e-01
 -5.77954739e+00  6.31092833e+00 -6.07877456e-01]
Coef estimates of Ridge with 1-SE alpha rule:  [-2.63785468e+02  3.00893317e+02  2.94330395e+02  1.72269046e+01
 -1.51208060e+01 -1.47925645e+00 -1.50217669e-01  1.50217669e-01
 -6.31395203e+01  6.31395203e+01  4.27583995e-01 -4.27583995e-01
 -5.78627329e+00  6.31032974e+00 -6.01445112e-01]


LASSO: Fit a linear model with a LASSO penalty term (norm 1):

In [8]:
LMLassoCV = LassoCV(alphas = np.arange(0, 1, 0.01), cv = CV10, max_iter = 1000)
LMLassoCV.fit(X_train_std, y_train)
print(LMLassoCV.alpha_)

LMLasso = Lasso(max_iter = 10000)
LMLasso.set_params(alpha=LMLassoCV.alpha_)

LMLasso.fit(X_train_std, y_train)
mean_squared_error(y_test, LMLasso.predict(X_test_std))
print(LMLasso.coef_)

  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,


0.99
[-2.63863031e+02  3.03949746e+02  2.91323960e+02  1.64529674e+01
 -1.42079560e+01 -5.81108303e-01 -0.00000000e+00  0.00000000e+00
 -1.25577343e+02  1.82430832e-11  0.00000000e+00 -0.00000000e+00
 -4.49275741e+00  6.20221002e+00 -0.00000000e+00]


  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
  positive,
