# Lasso Regression on Loan Default Data
## With Implementations in Scikit-Learn and Pathwise LASSO in Numpy

## Import libraries and Data

In [1]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import linear_model

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import time

In [2]:
df = pd.read_csv('../Data/cleaned.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,loan_amount,term,income,Credit_Score,Status,loan_limit_cf,loan_limit_ncf,submission_of_application_not_inst,submission_of_application_to_inst,...,Region_North-East,Region_central,Region_south,age_25-34,age_35-44,age_45-54,age_55-64,age_65-74,age_<25,age_>74
0,0,116500,360.0,1740.0,758,1,1,0,0,1,...,0,0,1,1,0,0,0,0,0,0
1,1,206500,360.0,4980.0,552,1,1,0,0,1,...,0,0,0,0,0,0,1,0,0,0
2,2,406500,360.0,9480.0,834,0,1,0,0,1,...,0,0,1,0,1,0,0,0,0,0
3,3,456500,360.0,11880.0,587,0,1,0,1,0,...,0,0,0,0,0,1,0,0,0,0
4,4,696500,360.0,10440.0,602,0,1,0,1,0,...,0,0,0,1,0,0,0,0,0,0


In [3]:
# Split target and features
X = df.drop('Credit_Score', axis=1)
y = df['Credit_Score']
display(y)

0         758
1         552
2         834
3         587
4         602
         ... 
135103    659
135104    569
135105    702
135106    737
135107    830
Name: Credit_Score, Length: 135108, dtype: int64

In [4]:
# Training and testing data
X_train, X_test, y_train, y_test = train_test_split(
  X,
  y,
  test_size=0.3
)
X_train.head()

Unnamed: 0.1,Unnamed: 0,loan_amount,term,income,Status,loan_limit_cf,loan_limit_ncf,submission_of_application_not_inst,submission_of_application_to_inst,co-applicant_credit_type_CIB,...,Region_North-East,Region_central,Region_south,age_25-34,age_35-44,age_45-54,age_55-64,age_65-74,age_<25,age_>74
40155,40155,936500,360.0,15300.0,1,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,1
68364,68364,396500,360.0,13320.0,1,1,0,1,0,1,...,0,0,0,1,0,0,0,0,0,0
78662,78662,506500,360.0,8520.0,1,1,0,0,1,1,...,0,0,0,0,0,0,1,0,0,0
6872,6872,286500,360.0,14700.0,0,1,0,1,0,0,...,0,0,1,1,0,0,0,0,0,0
21401,21401,446500,360.0,11400.0,0,1,0,0,1,0,...,0,0,1,0,0,0,0,1,0,0


## Fitting Scikit-Learn LASSO Regression

In [5]:
start_time = time.time()
mean_squared_errors = []
alphas =  np.arange(0.01, 2, 0.1)

# Lasso Regression
for alpha in alphas:
  lasso = linear_model.Lasso(alpha=alpha)
  lasso.fit(X_train, y_train)

  # Predict and calculate MSE
  y_pred = lasso.predict(X_test)
  mse = mean_squared_error(y_test, y_pred)
  mean_squared_errors.append(mse)

# Plot MSE vs Alpha
fig = go.Figure(
  data=px.scatter(
    x=alphas,
    y=mean_squared_errors,
    title="Scikit-Learn LASSO Regression MSE vs Alpha Penalty",
    labels = {
      'x': 'Alpha',
      'y': 'MSE'
    }
  )
)

fig.show()

In [6]:
print(
  f"Time taken: {time.time() - start_time} seconds"
)

Time taken: 5.275741100311279 seconds


### Best Model is about Alpha = 0.40 without overfitting 

In [7]:
alpha = 0.4
lasso = linear_model.Lasso(alpha=alpha)
lasso.fit(X_train, y_train)

# Print predictions from this model
y_pred = lasso.predict(X_test)
y_pred

array([700.57523712, 698.85598386, 699.69829701, ..., 699.58361234,
       699.70382843, 700.08994163])

In [8]:
# Sort features by coefficient
coef_df = pd.DataFrame({'feature': X_test.columns, 'coef': lasso.coef_})
coef_df.sort_values(by='coef', ascending=False)

Unnamed: 0,feature,coef
1,loan_amount,0.000006
32,occupancy_type_pr,0.000000
34,loan_type_type1,0.000000
35,loan_type_type2,-0.000000
36,loan_type_type3,-0.000000
...,...,...
28,Credit_Worthiness_l2,0.000000
29,approv_in_adv_nopre,0.000000
0,Unnamed: 0,-0.000007
3,income,-0.000062


## Pathwise Coordinate Descent LASSO

The optimal $\beta$ by LASSO regression is:
$$
\begin{equation*}
\beta^* = argmin_{\beta} ||y - X\beta||^2 + \lambda||\beta||_1
\end{equation*}
$$
where $\lambda$ is the regularization parameter and greater than zero.

Each coefficient can be found individually with least-squares and residuals such as:
$$
\begin{equation*}
  r_{i,j} = y_i = \sum_{k=1}^n x_{i,k} \beta_k 
\end{equation*}
$$
$$
\begin{equation*}
\beta^*_j = \frac{1}{n} \sum_{i=1}^n x_{i,j} r_{i,j}
\end{equation*}
$$

For pathwise coordinate descent LASSO, we make updates by:
$$
\begin{equation*}
\beta^*_j = \frac{1}{n} \sum_{i=1}^n x_{i,j} r_i + \beta_j
\end{equation*}
$$

In [9]:
def pathwise_lasso(X, y, l_start):
  X = np.hstack((np.ones((len(X), 1)), X))
  nRows, nCols = np.shape(X)
  B_star = np.zeros((nCols))
  tolerance = 1e-6 

  # At or above l_max, all coefficients (except intercept) will be brought to 0
  l_max = max(list(abs( X[:, 1:].T @ y))) / nRows
  if l_start >= l_max:
    return np.append(np.mean(y), np.zeros((nCols - 1)))
  
  path_length = 100
  lambdas = np.geomspace(l_max, l_start, path_length)

  # Pathwise coordinate descent
  for i in range(len(lambdas)):
    while True:
      B_curr = B_star

      for j in range(nCols):
        k = np.where(B_curr != 0)[0]
        xy = X[:, j] @ y
        xx = X[:, j] @ X[:, k]
        residuals = (xy -  (xx @ B_curr[k]))
        update = (residuals/nRows)  + B_curr[j]
        B_star[j] = (np.sign(update) * max(abs(update) - lambdas[i], 0))

      if np.all(abs(B_curr - B_star) < tolerance):
        break

  return B_star

In [10]:
start_time = time.time()
mean_squared_errors = []
alphas = np.arange(0.01, 3, 0.25)

# Lasso Regression
for alpha in alphas:
  beta_lasso = pathwise_lasso(X_train/(np.linalg.norm(X_train, axis=0)), y_train, alpha)

  y_pred = beta_lasso[0] + (X_test @ beta_lasso[1:])

  mse = mean_squared_error(y_test, y_pred)

  mean_squared_errors.append(mse)

fig = go.Figure(
  data=px.scatter(
    x=alphas,
    y=mean_squared_errors,
    labels = {
      'x': 'Alpha',
      'y': 'MSE'
    }
  )
)
fig.show()

In [11]:
print(
  f"Time taken: {time.time() - start_time} seconds"
)

Time taken: 24.756644010543823 seconds


In [12]:
# Sort features by coefficient
coef_df = pd.DataFrame({'feature':X_test.columns, 'coef': beta_lasso[1:]})
coef_df.sort_values(by='coef', ascending=False)


Unnamed: 0,feature,coef
0,Unnamed: 0,0.0
1,loan_amount,0.0
34,loan_type_type1,0.0
35,loan_type_type2,0.0
36,loan_type_type3,0.0
...,...,...
26,open_credit_opc,0.0
27,Credit_Worthiness_l1,0.0
28,Credit_Worthiness_l2,0.0
29,approv_in_adv_nopre,0.0


### Smallest MSE is about Alpha = .10 without need to overfit

In [13]:
# Fit Lasso with best alpha
alpha = 0.1

beta_lasso = pathwise_lasso(X_train/(np.linalg.norm(X_train, axis=0)), y_train, alpha)

# Print test predictions from this model
y_pred = beta_lasso[0] + np.dot(beta_lasso[1:], X_test.T)
y_pred

array([699.51243458, 699.51243458, 699.51243458, ..., 699.51243458,
       699.51243458, 699.51243458])

In [14]:
# Sort features by coefficient
coef_df = pd.DataFrame({'feature':X_test.columns, 'coef': beta_lasso[1:]})
coef_df.sort_values(by='coef', ascending=False)

Unnamed: 0,feature,coef
0,Unnamed: 0,-0.0
1,loan_amount,0.0
34,loan_type_type1,0.0
35,loan_type_type2,-0.0
36,loan_type_type3,-0.0
...,...,...
26,open_credit_opc,0.0
27,Credit_Worthiness_l1,0.0
28,Credit_Worthiness_l2,0.0
29,approv_in_adv_nopre,0.0
