In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge, RidgeCV, Lasso, LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RepeatedKFold
#import statsmodels.formula.api as smf
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
Credit = pd.read_csv('Credit.csv')
#Insurance = pd.read_csv('Insurance.csv')
print('Dimension of the data: ' + str(Credit.shape))
Credit.head()
     

In [None]:
Credit_dumms = pd.get_dummies(Credit)
y = Credit_dumms['Balance']
X = Credit_dumms.drop(['ID', 'Balance'], axis=1)

X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

std_scale = StandardScaler().fit(X_train)
X_train_std = std_scale.transform(X_train)
X_test_std  = std_scale.transform(X_test)
     

In [None]:
X_temp = X_train[['Limit', 'Rating', 'Cards', 'Age', 'Education']]
std_scale_temp = StandardScaler().fit(X_temp)
X_temp_std = std_scale_temp.transform(X_temp)

LMRidge = Ridge(alpha=1.0)
print(LMRidge.fit(X_temp, y_train).coef_)
print(LMRidge.fit(X_temp_std, y_train).coef_)
     

In [None]:
# model evaluation: coss validation
CV10 = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
LMRidgeCV = RidgeCV(alphas=np.arange(0, 1, 0.01), cv=CV10, scoring='neg_mean_squared_error')
LMRidgeCV.fit(X_train_std, y_train)
# summarize chosen configuration
print('alpha: %f' % LMRidgeCV.alpha_)

In [None]:
LMRidge = Ridge(alpha = LMRidgeCV.alpha_)
LMRidge.fit(X_train_std, y_train)
mean_squared_error(y_test, LMRidge.predict(X_test_std))
print(LMRidge.coef_)

In [None]:
lassocv = LassoCV(alphas = np.arange(0, 1, 0.01), cv = CV10, max_iter = 1000)
LMLassoCV.fit(X_train_std, y_train)
print(LMLassoCV.alpha_)

LMLasso = Lasso(max_iter = 10000)
LMLasso.set_params(alpha=LMLassoCV.alpha_)
LMLasso.fit(X_train_std, y_train)
mean_squared_error(y_test, LMLasso.predict(X_test_std))
print(LMLasso.coef_)