In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.preprocessing import StandardScaler, Normalizer, RobustScaler
from sklearn.metrics import r2_score, mean_squared_error

df = pd.read_csv('Credit.csv')

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df = pd.get_dummies(df)

labels = np.array(df['Balance'])
df= df.drop('Balance', axis = 1)


In [None]:
#Create a lambda vector with different values of lambda
lambdas = np.logspace(-3,3,50)

In [None]:
# Run a Ridge regression with each value of lambda and capture the results for each coefficient
ridge_coefs =[]
ridge_reg = Ridge()

#Standardize the data
X_= pd.DataFrame(StandardScaler().fit_transform(df),columns=df.columns)

for lam in lambdas:
  ridge_reg.set_params(alpha=lam)
  ridge_reg.fit(X_,labels)
  ridge_coefs.append(dict(zip(df.columns, ridge_reg.coef_)))
 # coefs.append(dict(zip(['Income','Limit','Rating','Student_Yes'],ridge_reg.coef_)))

In [None]:
#Create a DataFrame for the Ridge Coefficients
ridge_coef_df = pd.DataFrame(ridge_coefs)
ridge_coef_df['Lambda'] = pd.Series(lambdas,index=ridge_coef_df.index)
ridge_coef_df.head(10)

In [None]:
#Plot some coeefficients check how they vary for different values of lambda
fig , ax = plt.subplots(figsize=(10,10))
_ =ax.plot(ridge_coef_df['Lambda'],ridge_coef_df[['Income']],label='Income')
_ =ax.plot(ridge_coef_df['Lambda'],ridge_coef_df[['Rating']],label='Rating')
_ =ax.plot(ridge_coef_df['Lambda'],ridge_coef_df[['Limit']],label='Limit')
_ =ax.plot(ridge_coef_df['Lambda'],ridge_coef_df[['Student_Yes']],label='Student')


_=ax.legend(loc='best')
ax.set_xlabel('Lambda')
ax.set_ylabel('Coefficients')
ax.set_title('Ridge coefficients as a function of the regularization');

In [None]:
lasso_coefs =[]
lasso_reg = Lasso()


for lam in lambdas:
  lasso_reg.set_params(alpha=lam)
  lasso_reg.fit(X_,labels)
  lasso_coefs.append(dict(zip(df.columns, lasso_reg.coef_)))

In [None]:
lasso_coef_df = pd.DataFrame(lasso_coefs)
lasso_coef_df['Lambda'] = pd.Series(lambdas,index=lasso_coef_df.index)
lasso_coef_df.iloc[20:30,:]

In [None]:
fig , ax = plt.subplots(figsize=(10,10))
_ =ax.plot(lasso_coef_df['Lambda'],lasso_coef_df[['Income']],label='Income')
_ =ax.plot(lasso_coef_df['Lambda'],lasso_coef_df[['Rating']],label='Rating')
_ =ax.plot(lasso_coef_df['Lambda'],lasso_coef_df[['Limit']],label='Limit')
_ =ax.plot(lasso_coef_df['Lambda'],lasso_coef_df[['Student_Yes']],label='Student')


_=ax.legend(loc='best')
ax.set_xlabel('Lambda')
ax.set_ylabel('Coefficients')
ax.set_title('Lasso coefficients as a function of the regularization');

In [None]:
fig,ax = plt.subplots(figsize=(10,10))


for Model in [Lasso, Ridge]:
  mse =[]
  for lam in lambdas:
    model = Model(alpha=lam)
    model.fit(X_,labels)
    y_pred = model.predict(X_)
    mse.append(mean_squared_error(labels,y_pred))
  ax.plot(lambdas, mse,label=Model.__name__)
 
ax.legend(loc='best')
ax.set_xlabel('Lambda')
ax.set_ylabel('Mean Squared Error')

In [None]:
from sklearn.model_selection import cross_val_score
fig,ax = plt.subplots(figsize=(10,10))


for Model in [Lasso, Ridge]:
  scores = [cross_val_score(Model(alpha=lam) ,X_,labels,cv=3).mean() for lam in lambdas]
  ax.plot(lambdas, scores,label=Model.__name__)
 
ax.legend(loc='best')
ax.set_xlabel('Lambda')
ax.set_ylabel('Cross Validation Score')

  