# Linear Regression Class Demo

In [2]:
import pandas as pd
from sklearn.datasets import fetch_california_housing

cal_housing = fetch_california_housing()
X = pd.DataFrame(cal_housing.data, columns=cal_housing.feature_names)
y = cal_housing.target


In [3]:
X

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25
...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32


In [4]:
y

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

## Classic Linear Regression

In [6]:
#Classic Linear Regression
from sklearn.linear_model import LinearRegression
my_model = LinearRegression()

In [7]:
my_model.fit(X,y)

LinearRegression()

In [8]:
my_model.coef_

array([ 4.36693293e-01,  9.43577803e-03, -1.07322041e-01,  6.45065694e-01,
       -3.97638942e-06, -3.78654265e-03, -4.21314378e-01, -4.34513755e-01])

### Scale the data

In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [10]:
X_scaled = scaler.fit_transform(X)

In [13]:
X_scaled = pd.DataFrame(data = X_scaled, columns=cal_housing.feature_names)

In [14]:
X_scaled

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,2.344766,0.982143,0.628559,-0.153758,-0.974429,-0.049597,1.052548,-1.327835
1,2.332238,-0.607019,0.327041,-0.263336,0.861439,-0.092512,1.043185,-1.322844
2,1.782699,1.856182,1.155620,-0.049016,-0.820777,-0.025843,1.038503,-1.332827
3,0.932968,1.856182,0.156966,-0.049833,-0.766028,-0.050329,1.038503,-1.337818
4,-0.012881,1.856182,0.344711,-0.032906,-0.759847,-0.085616,1.038503,-1.337818
...,...,...,...,...,...,...,...,...
20635,-1.216128,-0.289187,-0.155023,0.077354,-0.512592,-0.049110,1.801647,-0.758826
20636,-0.691593,-0.845393,0.276881,0.462365,-0.944405,0.005021,1.806329,-0.818722
20637,-1.142593,-0.924851,-0.090318,0.049414,-0.369537,-0.071735,1.778237,-0.823713
20638,-1.054583,-0.845393,-0.040211,0.158778,-0.604429,-0.091225,1.778237,-0.873626


In [15]:
scaled_model = LinearRegression()
scaled_model.fit(X_scaled,y)

LinearRegression()

In [16]:
scaled_model.coef_

array([ 0.8296193 ,  0.11875165, -0.26552688,  0.30569623, -0.004503  ,
       -0.03932627, -0.89988565, -0.870541  ])

### Loop to add coefficients to column names

In [17]:
for i in range (len(X_scaled.columns)):
    print(X_scaled.columns[i], scaled_model.coef_[i])

MedInc 0.8296193042804504
HouseAge 0.11875165121214162
AveRooms -0.26552687950662046
AveBedrms 0.30569622980430894
Population -0.004503001312614049
AveOccup -0.03932626697814864
Latitude -0.8998856544145073
Longitude -0.8705410023357312


### Regularization

#### LASSO (L1)

In [19]:
from sklearn.linear_model import Lasso
l1_model = Lasso(alpha=1)

In [20]:
l1_model.fit(X_scaled,y)

Lasso(alpha=1)

In [21]:
l1_model.coef_

array([ 0.,  0.,  0., -0., -0., -0., -0., -0.])

##### Loop through alpha to see how the coefficients change

In [24]:
alpha = 1
for i in range(5):
    l1_model.alpha = alpha
    l1_model.fit(X_scaled, y)
    print("alpha = ", alpha, l1_model.coef_)
    print("----------")
    alpha = alpha / 10

alpha =  1 [ 0.  0.  0. -0. -0. -0. -0. -0.]
----------
alpha =  0.1 [ 0.70571337  0.10601099 -0.         -0.         -0.         -0.
 -0.01121267 -0.        ]
----------
alpha =  0.01 [ 0.77722333  0.12486709 -0.12940585  0.16912537 -0.         -0.02944551
 -0.79543737 -0.75899738]
----------
alpha =  0.001 [ 0.8244684   0.11967446 -0.25193711  0.29211349 -0.00315059 -0.03840844
 -0.88903624 -0.85906895]
----------
alpha =  0.0001 [ 0.82910679  0.11884435 -0.26417261  0.30434164 -0.00436762 -0.03923458
 -0.89879503 -0.86938865]
----------


##### Cross Validation

In [27]:
from sklearn.model_selection import cross_val_score
cross_val_score(scaled_model, X_scaled, y)

array([0.54866323, 0.46820691, 0.55078434, 0.53698703, 0.66051406])

###### Find the best Loss

In [28]:
alpha = 1
for i in range(5):
    l1_model.alpha = alpha
    print(alpha, cross_val_score(l1_model, X_scaled, y))
    print("----------")
    alpha = alpha / 10

1 [-0.21613668 -0.02423671 -0.09329273 -0.07285357 -0.0393445 ]
----------
0.1 [0.45536374 0.4023948  0.47036257 0.30458657 0.52233135]
----------
0.01 [0.56682644 0.45494291 0.55254378 0.50602901 0.66847995]
----------
0.001 [0.55132766 0.46710065 0.55132522 0.53439773 0.66213492]
----------
0.0001 [0.54893833 0.46809859 0.55084209 0.53673338 0.66068491]
----------


In [30]:
alpha = 1
for i in range(5):
    l1_model.alpha = alpha
    print(alpha, cross_val_score(l1_model, X_scaled, y))
    print("----------")
    alpha = alpha / 2

1 [-0.21613668 -0.02423671 -0.09329273 -0.07285357 -0.0393445 ]
----------
0.5 [0.15453553 0.26623911 0.21211167 0.17783615 0.25450913]
----------
0.25 [0.3629225  0.36698211 0.39731386 0.27190784 0.44295217]
----------
0.125 [0.44388744 0.39950571 0.46086639 0.28951521 0.51268836]
----------
0.0625 [0.47992086 0.41877029 0.49833942 0.32287246 0.53056434]
----------


#### Ridge (L2)

In [31]:
from sklearn.linear_model import Ridge
l2_model = Ridge(alpha = 1)
l2_model.fit(X_scaled, y)

Ridge(alpha=1)

In [32]:
l1_model.score(X_scaled, y)

0.6062322647677378

In [33]:
l2_model.coef_

array([ 0.82959256,  0.11881684, -0.26539682,  0.30552458, -0.00448006,
       -0.03932976, -0.89926646, -0.86991606])

In [34]:
alpha = 1E-5
for i in range(10):
    l2_model.alpha = alpha
    l2_model.fit(X_scaled, y)
    print("alpha = ", alpha, l2_model.coef_)
    print("----------")
    alpha = alpha * 10

alpha =  1e-05 [ 0.8296193   0.11875165 -0.26552688  0.30569623 -0.004503   -0.03932627
 -0.89988565 -0.870541  ]
----------
alpha =  0.0001 [ 0.8296193   0.11875166 -0.26552687  0.30569621 -0.004503   -0.03932627
 -0.89988559 -0.87054094]
----------
alpha =  0.001 [ 0.82961928  0.11875172 -0.26552675  0.30569606 -0.00450298 -0.03932627
 -0.89988503 -0.87054038]
----------
alpha =  0.01 [ 0.82961904  0.1187523  -0.26552558  0.30569451 -0.00450277 -0.0393263
 -0.89987946 -0.87053475]
----------
alpha =  0.1 [ 0.82961664  0.11875818 -0.26551388  0.30567906 -0.00450071 -0.03932662
 -0.89982369 -0.87047846]
----------
alpha =  1.0 [ 0.82959256  0.11881684 -0.26539682  0.30552458 -0.00448006 -0.03932976
 -0.89926646 -0.86991606]
----------
alpha =  10.0 [ 0.8293461   0.11939823 -0.26422311  0.30398067 -0.00427544 -0.03936068
 -0.8937389  -0.86433656]
----------
alpha =  100.0 [ 0.82639107  0.12472471 -0.25228369  0.28870558 -0.00239758 -0.03961935
 -0.84257736 -0.8126335 ]
----------
alpha 

In [37]:
alpha = 1E-5
for i in range(20):
    l2_model.alpha = alpha
    print(alpha, cross_val_score(l2_model, X_scaled, y))
    print("----------")
    alpha = alpha * 10

1e-05 [0.54866323 0.46820691 0.55078434 0.53698703 0.66051406]
----------
0.0001 [0.54866324 0.46820691 0.55078434 0.53698702 0.66051406]
----------
0.001 [0.54866334 0.46820689 0.55078436 0.53698696 0.66051407]
----------
0.01 [0.54866426 0.46820668 0.55078452 0.53698632 0.66051415]
----------
0.1 [0.54867348 0.46820467 0.55078609 0.53697995 0.66051499]
----------
1.0 [0.54876544 0.4681845  0.55080175 0.53691622 0.66052317]
----------
10.0 [0.54966271 0.46798279 0.55095161 0.53627682 0.66058868]
----------
100.0 [0.55669091 0.46597036 0.55186109 0.52974249 0.65993628]
----------
1000.0 [0.55440687 0.44998446 0.53991768 0.47189205 0.62504649]
----------
10000.0 [0.37180782 0.38729253 0.40830662 0.2927889  0.45139779]
----------
100000.0 [-0.04415765  0.11468789  0.05507241  0.03386815  0.10121941]
----------
1000000.0 [-0.19505191 -0.00674442 -0.0751857  -0.05956807 -0.0222241 ]
----------
10000000.0 [-0.2139798  -0.02244211 -0.09144179 -0.07149147 -0.0375945 ]
----------
100000000.0 [