In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.model_selection import cross_val_score, KFold

from sklearn.pipeline import make_pipeline

from sklearn import linear_model

In [3]:
#Lets load the dataset and sample some
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
data = pd.read_csv("archive.zip", header=None, delimiter="\s+", names=column_names)
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [4]:
x = data.loc[:, data.columns[:-1]]
x.columns

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT'],
      dtype='object')

In [5]:
y = data.loc[:,'MEDV']

In [6]:
y = np.log1p(y)
for col in x.columns:
    if np.abs(x[col].skew()) > 0.3:
        x[col] = np.log1p(x[col])

In [7]:
kf = KFold(n_splits=10)

In [8]:
models = [linear_model.LinearRegression(),
          linear_model.Ridge(),
          linear_model.Lasso(),
          linear_model.Lars()]

scalers = [None,
           preprocessing.StandardScaler(),
           preprocessing.MinMaxScaler()]

In [9]:
# for sklearn API - that higher return values are better than lower return values. - that's why negative MSE

In [10]:
for m in models:
    print('Model: ', m)
    for s in scalers:
        print('Scaler:', s)
        model = make_pipeline(s, m)
        scores = cross_val_score(model, x, y, cv=kf, scoring='neg_mean_squared_error')
        print(f'nMSE: {scores.mean()}, Variance:{scores.std()}')

Model:  LinearRegression()
Scaler: None
nMSE: -0.04574725289167475, Variance:0.054707631294927415
Scaler: StandardScaler()
nMSE: -0.04574725289167468, Variance:0.054707631294927304
Scaler: MinMaxScaler()
nMSE: -0.045747252891674646, Variance:0.05470763129492705
Model:  Ridge()
Scaler: None
nMSE: -0.04260597300544776, Variance:0.04412459671029232
Scaler: StandardScaler()
nMSE: -0.04563796890788909, Variance:0.054892903694955184
Scaler: MinMaxScaler()
nMSE: -0.043801793922794506, Variance:0.05383518248151599
Model:  Lasso()
Scaler: None
nMSE: -0.14432008922198264, Variance:0.12283479071285128
Scaler: StandardScaler()
nMSE: -0.1655486283445406, Variance:0.13221366882890387
Scaler: MinMaxScaler()
nMSE: -0.1655486283445406, Variance:0.13221366882890387
Model:  Lars()
Scaler: None
nMSE: -0.045747252891674785, Variance:0.054707631294927304
Scaler: StandardScaler()
nMSE: -0.04574725289167482, Variance:0.05470763129492734
Scaler: MinMaxScaler()
nMSE: -0.04574725289167472, Variance:0.05470763129

In [11]:
l_lasso = linear_model.Lasso()
l_lasso.fit(x,y).coef_

array([-0.        ,  0.        , -0.00924071,  0.        , -0.        ,
        0.        , -0.        ,  0.        , -0.        , -0.        ,
       -0.        ,  0.        , -0.        ])

In [12]:
min_max_scaler = preprocessing.MinMaxScaler()
x_mm = pd.DataFrame(data=min_max_scaler.fit_transform(x), columns=x.columns)
l_lasso = linear_model.Lasso(alpha=0)
l_lasso.fit(x_mm,y).coef_

  l_lasso.fit(x_mm,y).coef_
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


array([-0.620259  , -0.01183429,  0.03960867,  0.08517625, -0.30323108,
        0.3022376 ,  0.0969144 , -0.43193508,  0.43963723, -0.19543905,
       -0.32485805,  0.23460293, -1.14403504])

In [13]:
standard_scaler = preprocessing.StandardScaler()
x_st = pd.DataFrame(data=standard_scaler.fit_transform(x), columns=x.columns)
l_lasso = linear_model.Lasso()
l_lasso.fit(x_st,y).coef_

array([-0.,  0., -0.,  0., -0.,  0., -0.,  0., -0., -0., -0.,  0., -0.])

In [14]:
l_lasso.fit(x_st,y).intercept_ # that's all it predicts -_-

3.085436621436588

In [15]:
l_lasso.predict(x_st)

array([3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543662,
       3.08543662, 3.08543662, 3.08543662, 3.08543662, 3.08543

In [16]:
y

0      3.218876
1      3.117950
2      3.575151
3      3.538057
4      3.616309
         ...   
501    3.152736
502    3.072693
503    3.214868
504    3.135494
505    2.557227
Name: MEDV, Length: 506, dtype: float64