## Gradient_Descent_and_ Regularization

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

In [6]:
def gradient_descent(x,y):
    
    
    n = len(x)
    learning_rate = 0.08
    intercept = slope = 0
    iterations = 50
    
    for i in range(iterations):
        
        y_pred = x*slope + intercept
        
        mse = mean_squared_error(y_pred,y)
        
        md = -(2/n)*sum(x*(y-y_pred))
        bd = -(2/n)*sum(y-y_pred)

        slope = slope - (learning_rate * md)
        intercept = intercept - (learning_rate * bd)
        
        print("slope: ",slope,"\nintercept: ",intercept,"\nmse: ",mse,"\niteration: ",i,"\n-------------------")

In [7]:
X = np.array([1,2,3,4])
y = np.array([10,12,20,25])

In [8]:
gradient_descent(X,y)

slope:  7.76 
intercept:  2.68 
mse:  317.25 
iteration:  0 
-------------------
slope:  5.136 
intercept:  1.8272000000000002 
mse:  37.5484 
iteration:  1 
-------------------
slope:  6.001919999999999 
intercept:  2.160448 
mse:  5.946675839999994 
iteration:  2 
-------------------
slope:  5.6954368 
intercept:  2.0940083200000004 
mse:  2.363295509503997 
iteration:  3 
-------------------
slope:  5.783309312 
intercept:  2.1607922688000003 
mse:  1.944685321356901 
iteration:  4 
-------------------
slope:  5.73902123008 
intercept:  2.181741780992 
mse:  1.8841286943434368 
iteration:  5 
-------------------
slope:  5.739499041587201 
intercept:  2.21705460400128 
mse:  1.8646353110795062 
iteration:  6 
-------------------
slope:  5.725278350082048 
intercept:  2.246526250726195 
mse:  1.8503780860340768 
iteration:  7 
-------------------
slope:  5.716333829693113 
intercept:  2.276970710577185 
mse:  1.8372827631707351 
iteration:  8 
-------------------
slope:  5.70594494983

In [9]:
#y = 5.4X + 3.04

In [10]:
# Inbuild library for GD. This work internally and process output.(Not Recommended)
#from sklearn.linear_model import SGDRegressor
from sklearn import linear_model

In [11]:
x_matrix = X.reshape(-1,1)
gd = linear_model.SGDRegressor(max_iter=100, tol=1e-3)
gd.fit(x_matrix,y)

SGDRegressor(alpha=0.0001, average=False, early_stopping=False, epsilon=0.1,
             eta0=0.01, fit_intercept=True, l1_ratio=0.15,
             learning_rate='invscaling', loss='squared_loss', max_iter=100,
             n_iter_no_change=5, penalty='l2', power_t=0.25, random_state=None,
             shuffle=True, tol=0.001, validation_fraction=0.1, verbose=0,
             warm_start=False)

In [12]:
gd.intercept_

array([2.15456466])

In [13]:
gd.coef_

array([5.74167907])

In [15]:
y_pred = gd.predict(x_matrix)
y_pred

array([ 7.89624373, 13.6379228 , 19.37960186, 25.12128093])

In [110]:
mean_squared_error(y_pred,y)

1.8970131611490646

## Regularization

In [2]:
df = pd.read_csv("cars.csv")

In [3]:
df.head()

Unnamed: 0,symboling,normalized-losses,make,fuel-type,body-style,drive-wheels,engine-location,width,height,engine-type,engine-size,horsepower,city-mpg,highway-mpg,price
0,3,?,alfa-romero,gas,convertible,rwd,front,64.1,48.8,dohc,130,111,21,27,13495
1,3,?,alfa-romero,gas,convertible,rwd,front,64.1,48.8,dohc,130,111,21,27,16500
2,1,?,alfa-romero,gas,hatchback,rwd,front,65.5,52.4,ohcv,152,154,19,26,16500
3,2,164,audi,gas,sedan,fwd,front,66.2,54.3,ohc,109,102,24,30,13950
4,2,164,audi,gas,sedan,4wd,front,66.4,54.3,ohc,136,115,18,22,17450


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 15 columns):
symboling            205 non-null int64
normalized-losses    205 non-null object
make                 205 non-null object
fuel-type            205 non-null object
body-style           205 non-null object
drive-wheels         205 non-null object
engine-location      205 non-null object
width                205 non-null float64
height               205 non-null float64
engine-type          205 non-null object
engine-size          205 non-null int64
horsepower           205 non-null object
city-mpg             205 non-null int64
highway-mpg          205 non-null int64
price                205 non-null int64
dtypes: float64(2), int64(5), object(8)
memory usage: 24.1+ KB


In [4]:
df["normalized-losses"].replace("?",np.nan,inplace=True)
df["normalized-losses"] = df["normalized-losses"].astype(float)
losses_mean = df["normalized-losses"].mean()
df["normalized-losses"].fillna(losses_mean,inplace=True)

In [5]:
df["horsepower"].replace("?",np.nan,inplace=True)
df["horsepower"] = df["horsepower"].astype(float)
horsepower_mean = df["horsepower"].mean()
df["horsepower"].fillna(horsepower_mean,inplace=True)

In [5]:
df.dtypes

symboling              int64
normalized-losses    float64
make                  object
fuel-type             object
body-style            object
drive-wheels          object
engine-location       object
width                float64
height               float64
engine-type           object
engine-size            int64
horsepower           float64
city-mpg               int64
highway-mpg            int64
price                  int64
dtype: object

In [6]:
df_num = df.select_dtypes(["int64","float64"])
df_cat = df.select_dtypes("object")

In [7]:
df_num.head()

Unnamed: 0,symboling,normalized-losses,width,height,engine-size,horsepower,city-mpg,highway-mpg,price
0,3,122.0,64.1,48.8,130,111.0,21,27,13495
1,3,122.0,64.1,48.8,130,111.0,21,27,16500
2,1,122.0,65.5,52.4,152,154.0,19,26,16500
3,2,164.0,66.2,54.3,109,102.0,24,30,13950
4,2,164.0,66.4,54.3,136,115.0,18,22,17450


In [8]:
df_cat.head()

Unnamed: 0,make,fuel-type,body-style,drive-wheels,engine-location,engine-type
0,alfa-romero,gas,convertible,rwd,front,dohc
1,alfa-romero,gas,convertible,rwd,front,dohc
2,alfa-romero,gas,hatchback,rwd,front,ohcv
3,audi,gas,sedan,fwd,front,ohc
4,audi,gas,sedan,4wd,front,ohc


In [7]:
from sklearn.preprocessing import LabelEncoder

In [8]:
for col in df_cat:
    le = LabelEncoder()
    df_cat[col] = le.fit_transform(df_cat[col])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [11]:
df_cat.head()

Unnamed: 0,make,fuel-type,body-style,drive-wheels,engine-location,engine-type
0,0,1,0,2,0,0
1,0,1,0,2,0,0
2,0,1,2,2,0,5
3,1,1,3,1,0,3
4,1,1,3,0,0,3


In [9]:
df_new = pd.concat([df_num,df_cat],axis=1)

In [10]:
df_new.head()

Unnamed: 0,symboling,normalized-losses,width,height,engine-size,horsepower,city-mpg,highway-mpg,price,make,fuel-type,body-style,drive-wheels,engine-location,engine-type
0,3,122.0,64.1,48.8,130,111.0,21,27,13495,0,1,0,2,0,0
1,3,122.0,64.1,48.8,130,111.0,21,27,16500,0,1,0,2,0,0
2,1,122.0,65.5,52.4,152,154.0,19,26,16500,0,1,2,2,0,5
3,2,164.0,66.2,54.3,109,102.0,24,30,13950,1,1,3,1,0,3
4,2,164.0,66.4,54.3,136,115.0,18,22,17450,1,1,3,0,0,3


### Baseline model

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [12]:
X = df_new.drop("price",axis=1)
y = df["price"]

In [13]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1)


In [14]:
lin = LinearRegression()

lin.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [15]:
# Train score
lin.score(X_train,y_train)

0.8504573774895473

In [16]:
# Test score
lin.score(X_test,y_test)

0.7965566780397374

In [17]:
# this train - high than test score gives hint of overfitting

In [18]:
lin.coef_

array([ 4.51384957e+01,  1.53127607e+00,  7.89452171e+02,  3.62663990e+02,
        9.83682875e+01, -1.08169245e+01,  3.08017854e+02, -4.17024371e+02,
       -2.00099087e+02, -6.22650015e+02, -1.70235175e+02,  1.86860719e+03,
        1.64133620e+04,  2.83174279e+02])

In [19]:
from sklearn.linear_model import Lasso # lambda*sum(abs(coef))
from sklearn.linear_model import Ridge # lambda*sum(square(coef))

In [20]:
# Ridge: Reduces values of less important features coef
l2 = Ridge(10)  # random lambda value = 10
l2.fit(X_train,y_train)
l2.coef_

array([ 2.08658930e+02, -5.60173023e-01,  3.64420144e+02,  5.72916414e+02,
        1.04441215e+02,  2.21332730e+01,  2.11271281e+02, -2.72864381e+02,
       -1.86340249e+02, -9.06610516e+02, -6.30655861e+02,  1.56860422e+03,
        2.57047785e+03,  5.15948757e+02])

In [21]:
l2.coef_.round(3)

array([ 2.086590e+02, -5.600000e-01,  3.644200e+02,  5.729160e+02,
        1.044410e+02,  2.213300e+01,  2.112710e+02, -2.728640e+02,
       -1.863400e+02, -9.066110e+02, -6.306560e+02,  1.568604e+03,
        2.570478e+03,  5.159490e+02])

In [22]:
# Lasso helps in feature selection : it completely reduce coef values to zero
l1 = Lasso(1000) 
l1.fit(X_train,y_train)
l1.coef_

array([   0.        ,    3.96310671,    0.        ,  276.93279232,
        135.34169218,   16.00129073,   -0.        , -113.03311963,
       -126.40695244,   -0.        ,   -0.        ,    0.        ,
          0.        ,    0.        ])

In [23]:
lc = l1.coef_.round()
lc

array([   0.,    4.,    0.,  277.,  135.,   16.,   -0., -113., -126.,
         -0.,   -0.,    0.,    0.,    0.])

In [24]:
i = 0
for col in X:
    print(col, ":",lc[i])
    i += 1

symboling : 0.0
normalized-losses : 4.0
width : 0.0
height : 277.0
engine-size : 135.0
horsepower : 16.0
city-mpg : -0.0
highway-mpg : -113.0
make : -126.0
fuel-type : -0.0
body-style : -0.0
drive-wheels : 0.0
engine-location : 0.0
engine-type : 0.0


### Finding right lambda value

<p>Ridge(l2)</p>

In [46]:
for i in range(1,100,10):
    l2 = Ridge(i)
    l2.fit(X_train,y_train)
    print(i,"->",l2.score(X_test,y_test))

1 -> 0.8074518758147271
11 -> 0.8150277245431792
21 -> 0.8142819591129825
31 -> 0.8131633096537952
41 -> 0.8120815180669059
51 -> 0.8111047186143225
61 -> 0.8102356739754502
71 -> 0.809462627892344
81 -> 0.8087718415627225
91 -> 0.8081508480501449


<p>Lasso(l1)</p>

In [47]:
for i in range(200,500,50):
    l1 = Lasso(i)
    l1.fit(X_train,y_train)
    print(i,"->",l1.score(X_test,y_test))

200 -> 0.8139201358023782
250 -> 0.8124219090780234
300 -> 0.8085057299003378
350 -> 0.8036053753129062
400 -> 0.7977229768452246
450 -> 0.7950465607641614


### Final model
<p>Ridge lambda - 11, score - 0.81</p>
<p>Lasso lambda - 200, score - 0.81</p>

In [48]:
l2 = Ridge(alpha=11)
l2.fit(X_train,y_train)
print(l2.score(X_test,y_test))

0.8150277245431792


In [49]:
l2.coef_

array([ 2.08075572e+02, -4.19381533e-01,  3.60059061e+02,  5.73951520e+02,
        1.04837625e+02,  2.25081577e+01,  2.09621781e+02, -2.70364868e+02,
       -1.85682528e+02, -8.73561956e+02, -6.31723522e+02,  1.53165287e+03,
        2.37667318e+03,  5.13039213e+02])

In [50]:
l1 = Lasso(alpha=200)
l1.fit(X_train,y_train)
print(l1.score(X_test,y_test))

0.8139201358023782


In [51]:
l1.coef_

array([ 0.00000000e+00,  2.07002782e+00,  3.23427588e+02,  4.69420186e+02,
        1.15553544e+02,  1.42784033e+01,  3.62975615e+01, -1.24776284e+02,
       -1.67461354e+02, -0.00000000e+00, -4.05115132e+02,  1.11242794e+03,
        2.09462838e+03,  3.05815537e+02])

### Cross Validation
<p>cross validation let us know which model is working perfectly on overall data</p>
<p>Ridge and Lasso are working on training data not on entire data</p>

In [52]:
from sklearn.model_selection import cross_val_score

In [53]:
l2_cross = cross_val_score(l2,X,y,cv=4)

In [54]:
l2_cross

array([0.74102278, 0.859919  , 0.40754955, 0.45287323])

In [55]:
np.mean(l2_cross)

0.6153411408406555

In [56]:
l1_cross = cross_val_score(l1,X,y,cv=4)

In [57]:
l1_cross

array([0.76560829, 0.81872367, 0.43344753, 0.448364  ])

In [58]:
l1_cross.mean()

0.6165358705430692

In [38]:
# Lasso is higher than Ridge hence Lasso wins