# **Table of Contents**
* [Deliberate Overfit](#section_20)
    * [Add squared features](#section_21)
    * [Scaling](#section_22)
    * [Linear Reg , Score](#section_23)
    <br><br>
* [Regularisation](#section_24)
    * [Lasso](#section_25)
    * [Ridge](#section_26)
    * [Elastic Net](#section_27)

In [1]:
import numpy             as np
import pandas            as pd
import matplotlib.pyplot as plt
import seaborn           as sns

import warnings
warnings.simplefilter ('ignore')

In [2]:
from  sklearn.datasets  import  load_boston

bos  =  load_boston()

df   =  pd.DataFrame ( bos.data, columns =bos.feature_names)

df['Price'] = bos.target

In [3]:
df  =  df.sample (frac =1,  random_state =4)

df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
8,0.21124,12.5,7.87,0.0,0.524,5.631,100.0,6.0821,5.0,311.0,15.2,386.63,29.93,16.5
289,0.04297,52.5,5.32,0.0,0.405,6.565,22.9,7.3172,6.0,293.0,16.6,371.72,9.51,24.8
68,0.13554,12.5,6.07,0.0,0.409,5.594,36.8,6.498,4.0,345.0,18.9,396.9,13.09,17.4
211,0.37578,0.0,10.59,1.0,0.489,5.404,88.6,3.665,4.0,277.0,18.6,395.24,23.98,19.3
226,0.38214,0.0,6.2,0.0,0.504,8.04,86.5,3.2157,8.0,307.0,17.4,387.38,3.13,37.6


In [4]:
x  =  df.drop('Price', axis=1)
y  =  df['Price']

<a id='section_20'></a>
# Part 1 - **Deliberate overfit**

<a id='section_21'></a>
## 1. **Add squared features**

In [5]:
cols  =  list(x.columns)

cols.remove('CHAS')

for i in cols:
    x[i + " 2"] = x[i]**2
    
cols = list(x.columns)

cols

['CRIM',
 'ZN',
 'INDUS',
 'CHAS',
 'NOX',
 'RM',
 'AGE',
 'DIS',
 'RAD',
 'TAX',
 'PTRATIO',
 'B',
 'LSTAT',
 'CRIM 2',
 'ZN 2',
 'INDUS 2',
 'NOX 2',
 'RM 2',
 'AGE 2',
 'DIS 2',
 'RAD 2',
 'TAX 2',
 'PTRATIO 2',
 'B 2',
 'LSTAT 2']

<a id='section_22'></a>
## 2. **Scaling**

In [6]:
from sklearn.preprocessing import StandardScaler

ss =  StandardScaler()

x  =  ss.fit_transform (x)                               # x gets stored in form of array

x  =  pd.DataFrame (x,  columns =cols)                   # x converted to dataframe

x.head(2)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,...,INDUS 2,NOX 2,RM 2,AGE 2,DIS 2,RAD 2,TAX 2,PTRATIO 2,B 2,LSTAT 2
0,-0.395935,0.048772,-0.476654,-0.272599,-0.265154,-0.931206,1.117494,1.087196,-0.523001,-0.577519,...,-0.638096,-0.334436,-0.912972,1.307731,0.860217,-0.582762,-0.624199,-1.498432,0.333771,2.903851
1,-0.415517,1.765555,-0.848722,-0.272599,-1.293115,0.399425,-1.624226,1.674325,-0.408041,-0.684426,...,-0.834883,-1.129339,0.342857,-1.441853,1.643958,-0.537573,-0.693246,-0.914507,0.0632,-0.511193


<a id='section_23'></a>
## 3. **Linear Reg , Score**

In [7]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test    =     train_test_split ( x, y,   test_size =0.3,    random_state =42)

In [8]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()

lr.fit (x_train, y_train)                                       

LinearRegression()

In [9]:
y_train_pred  =  lr.predict (x_train)            
y_test_pred   =  lr.predict (x_test)

In [10]:
from   sklearn.metrics   import  r2_score

print  (  r2_score (y_train,  y_train_pred)  )
print  (  r2_score (y_test,   y_test_pred)   )

0.8631575913052096
0.6989384844461194


> OBS :
        
        * Significant difference in scores of train vs test

> Comments :
        
        * We have made the model complex by adding squared features
        * The scores are significantly varying
        * So, this is a case of overfitting.

<a id='section_24'></a>
# Part 2 - **Regularisation**

<a id='section_25'></a>
# 1. **Lasso**

In [11]:
from  sklearn.linear_model    import   Lasso  , LassoCV

## i - **Find best alpha**

In [12]:
i  =  []
i  =  np.logspace (-3, -1, 100)                                           # arg        # -3 to -1

In [13]:
lcv     =  LassoCV  ( alphas = i,    cv =3,    max_iter =2000 )              

lcv.fit (x,y)                                                                  

lcv.alpha_                                                                     

0.017073526474706915

## ii - **Use best alpha**

In [14]:
lasso  =  Lasso ( alpha = 0.017 )                                                

lasso.fit (x,y)                                                                
 
res   =   pd.DataFrame ( lasso.coef_  ,  index = x.columns )                           
#res                                                         

OBS
* Best result for Lasso
* -ve coeffs denote inverse relation
* In further analysis, drop those features whose coeff = 0

## iii - **Score**

In [15]:
y_pred  =  lasso.predict(x)

r2_score ( y , y_pred )

0.82543915955071

<a id='section_26'></a>
# 2. **Ridge**

In [16]:
from  sklearn.linear_model    import   Ridge , RidgeCV

## i - **Find best alpha**

In [17]:
i  =  []
i  =  np.logspace (-3, 1, 100)                                          

In [18]:
rcv     =  RidgeCV  ( alphas = i ,    cv = 3 )                              

rcv.fit (x,y)                                                                

rcv.alpha_                                                                  

0.4229242874389499

## ii - **Use best alpha**

In [19]:
ridge  =  Ridge ( alpha = 0.422 )                                                

ridge.fit (x,y)                                                                 
 
res = pd.DataFrame ( ridge.coef_ ,  index = x.columns )                          
#res                                                        

## iii - **Score**

In [20]:
y_pred  =  ridge.predict(x)

r2_score ( y , y_pred )

0.8301329814370588

<a id='section_27'></a>
# 3. **Elastic Net**

In [21]:
from  sklearn.linear_model  import  ElasticNet  , ElasticNetCV

## i - **Find best alpha**

In [22]:
i  =  []
i  =  np.logspace (-3, -1, 100)                                            

In [23]:
ecv     =  ElasticNetCV  ( alphas = i  ,   cv = 3  ,   max_iter = 5000 )           

ecv.fit (x,y)                                                                  

ecv.alpha_                                                                     

0.0025353644939701114

>Note

    * l1_ratio is also a param.
    * j  =  [ 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9 ]   .... input     
    * ecv.l1_ratio_    .........  output

## ii - **Use best alpha**

In [24]:
enet  =  ElasticNet ( alpha = 0.002  ,   max_iter = 5000 )                       

enet.fit(x,y)
 
res  =  pd.DataFrame ( enet.coef_  ,  index = x.columns )                         # Display the coeffs
#res                                                        

## iii - **Score**

In [25]:
y_pred  =  enet.predict(x)

r2_score   ( y , y_pred )

0.8295074306016069