In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import time
import math
import statsmodels.api as sm
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split

In [4]:
from sklearn.datasets.california_housing import fetch_california_housing

data = fetch_california_housing()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=0)
print(X_train.shape)
print(X_test.shape)

(16512, 8)
(4128, 8)


In [11]:
#scaling the dataset

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

from sklearn.linear_model import Lasso
lasso = Lasso(alpha=0.001, max_iter = 10000).fit(X_train_scaled,y_train)

print('Lasso regression linear model intercept: {}'
     .format(lasso.intercept_))
print('Lasso regression linear model coeff:\n{}'
     .format(lasso.coef_))
print('Non-zero features: {}'
     .format(np.sum(lasso.coef_ != 0)))
print('R-squared score (training): {:.3f}'
     .format(lasso.score(X_train_scaled, y_train)))
print('R-squared score (test): {:.3f}\n'
     .format(lasso.score(X_test_scaled, y_test)))


Lasso regression linear model intercept: 3.784967487849346
Lasso regression linear model coeff:
[ 5.50716403  0.49631754  0.          0.13739865 -0.         -0.
 -3.86821726 -4.19183267]
Non-zero features: 5
R-squared score (training): 0.598
R-squared score (test): 0.576



In [13]:
print('Lasso regression: effect of alpha regularization\n\
parameter on number of features kept in final model\n')

for alpha in [0.001, 0.05, 0.01, 1, 2, 5, 10]:
    linlasso = Lasso(alpha, max_iter = 10000).fit(X_train_scaled, y_train)
    r2_train = linlasso.score(X_train_scaled, y_train)
    r2_test = linlasso.score(X_test_scaled, y_test)
    
    print('Alpha = {:.3f}\nFeatures kept: {}, r-squared training: {:.2f}, \
r-squared test: {:.2f}\n'
         .format(alpha, np.sum(linlasso.coef_ != 0), r2_train, r2_test))

Lasso regression: effect of alpha regularization
parameter on number of features kept in final model

Alpha = 0.001
Features kept: 5, r-squared training: 0.60, r-squared test: 0.58

Alpha = 0.050
Features kept: 1, r-squared training: 0.37, r-squared test: 0.35

Alpha = 0.010
Features kept: 4, r-squared training: 0.55, r-squared test: 0.53

Alpha = 1.000
Features kept: 0, r-squared training: 0.00, r-squared test: -0.00

Alpha = 2.000
Features kept: 0, r-squared training: 0.00, r-squared test: -0.00

Alpha = 5.000
Features kept: 0, r-squared training: 0.00, r-squared test: -0.00

Alpha = 10.000
Features kept: 0, r-squared training: 0.00, r-squared test: -0.00

