# Ridge
Auto-generated notebook.

Due to the few points in each dimension and the straight line that linear regression uses to follow these points as well as it can, noise on the observations will cause great variance as shown in the first plot. Every line’s slope can vary quite a bit for each prediction due to the noise induced in the observations.

Ridge regression is basically minimizing a penalised version of the least-squared function. The penalising shrinks the value of the regression coefficients. Despite the few data points in each dimension, the slope of the prediction is much more stable and the variance in the line itself is greatly reduced, in comparison to that of the standard linear regression.

Here, α (alpha) is the parameter which balances the amount of emphasis given to minimizing RSS vs minimizing sum of square of coefficients. α can take various values:

    α = 0:
        The objective becomes same as simple linear regression.
        We’ll get the same coefficients as simple linear regression.
    α = ∞:
        The coefficients will be zero. Why? Because of infinite weightage on square of coefficients, anything less than zero will make the objective infinite.
    0 < α < ∞:
        The magnitude of α will decide the weightage given to different parts of objective.
        The coefficients will be somewhere between 0 and ones for simple linear regression.


https://www.analyticsvidhya.com/blog/2016/01/complete-tutorial-ridge-lasso-regression-python/

In [4]:
from sklearn import linear_model

import numpy as np
import pandas as pd
import os
import sys
import plotly.graph_objects as go
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
   sys.path.append(module_path) 

from erudition.learning.helpers.plots.plotly_render import render, scatter

np.random.seed(42) 

In [5]:
x = np.array([i*np.pi/180 for i in range(60,300,4)])
y = np.sin(x) + np.random.normal(0,0.15,len(x))

df = pd.DataFrame(np.column_stack([x,y]),columns=['x','y'])

for i in range(2,20):
    colname = 'x_%d'%i
    df[colname] = df['x']**i

fig = go.Figure(
    data=[
        go.Scatter(
            x=df.x, 
            y=df.y, 
            mode='markers',
            marker=dict(
                size=3,
                color='yellow'
            )),
        ]
)
render(fig)
fig.show()

This resembles a sine curve but not exactly because of the noise. We’ll use this as an example to test different scenarios in this article. Let’s try to estimate the sine function using polynomial regression with powers of x from 1 to 15. Let’s add a column for each power upto 15 in our dataframe. This can be accomplished using the following code:

Now that we have the 100 powers let's make 100 different linear models

In [6]:
def linear_regression(df, power):

    linreg = linear_model.LinearRegression()

    predictors = ['x']

    if power >=2:
        predictors.extend(['x_%d'%i for i in range(2,power+1)])

    linreg.fit(df[predictors], df['y'])

    plot = go.Scatter(
        x=df.x, 
        y=linreg.predict(df[predictors]), 
        mode ='lines',
        opacity = 0.5,
        name = 'x_%d'%power,
        marker=dict(
            size=3,
            )
        )


    #Return the result in pre-defined format
    rss = sum((linreg.predict(df[predictors])-df.y)**2)
    ret = [rss]
    ret.extend([linreg.intercept_])
    ret.extend(linreg.coef_)

    return plot, ret

data = []

data.append(
    go.Scatter(
        x=df.x, 
        y=df.y, 
        mode ='markers',
        name = 'Data',
        marker=dict(
            size=3,
            )
        )
)

#Initialize a dataframe to store the results:
col = ['rss','intercept'] + ['coef_x_%d'%i for i in range(1,20)]
ind = ['model_pow_%d'%i for i in range(1,20)]
coef_matrix_simple = pd.DataFrame(index=ind, columns=col)

for i in range(2,20):
    plot, res = linear_regression(df, i)
    data.append(plot)
    coef_matrix_simple.iloc[i-1,0:i+2] = res



fig = go.Figure(data=data)

render(fig, 1000,500)
fig.show()

    

In [7]:
#Set the display format to be scientific for ease of analysis
pd.options.display.float_format = '{:,.2g}'.format
coef_matrix_simple

Unnamed: 0,rss,intercept,coef_x_1,coef_x_2,coef_x_3,coef_x_4,coef_x_5,coef_x_6,coef_x_7,coef_x_8,...,coef_x_10,coef_x_11,coef_x_12,coef_x_13,coef_x_14,coef_x_15,coef_x_16,coef_x_17,coef_x_18,coef_x_19
model_pow_1,,,,,,,,,,,...,,,,,,,,,,
model_pow_2,2.7,2.1,-0.74,0.019,,,,,,,...,,,,,,,,,,
model_pow_3,0.99,-0.59,2.5,-1.1,0.12,,,,,,...,,,,,,,,,,
model_pow_4,0.96,0.2,1.2,-0.4,-0.044,0.013,,,,,...,,,,,,,,,,
model_pow_5,0.96,0.23,1.1,-0.34,-0.066,0.017,-0.00024,,,,...,,,,,,,,,,
model_pow_6,0.93,-6.2,17.0,-16.0,7.9,-2.1,0.28,-0.015,,,...,,,,,,,,,,
model_pow_7,0.91,-18.0,52.0,-58.0,34.0,-12.0,2.3,-0.24,0.01,,...,,,,,,,,,,
model_pow_8,0.91,-5.9,11.0,-0.26,-10.0,9.0,-3.6,0.77,-0.086,0.0039,...,,,,,,,,,,
model_pow_9,0.91,7.7,-41.0,85.0,-88.0,53.0,-20.0,4.5,-0.63,0.049,...,,,,,,,,,,
model_pow_10,0.89,250.0,-1100.0,2000.0,-2100.0,1400.0,-610.0,180.0,-36.0,4.5,...,0.01,,,,,,,,,


In [17]:
def linear_regression(df, power):

    linreg = linear_model.Lasso(alpha=0.4)

    predictors = ['x']

    if power >=2:
        predictors.extend(['x_%d'%i for i in range(2,power+1)])

    linreg.fit(df[predictors], df['y'])

    plot = go.Scatter(
        x=df.x, 
        y=linreg.predict(df[predictors]), 
        mode ='lines',
        opacity = 0.5,
        name = 'x_%d'%power,
        marker=dict(
            size=3,
            )
        )


    #Return the result in pre-defined format
    rss = sum((linreg.predict(df[predictors])-df.y)**2)
    ret = [rss]
    ret.extend([linreg.intercept_])
    ret.extend(linreg.coef_)

    return plot, ret

data = []

data.append(
    go.Scatter(
        x=df.x, 
        y=df.y, 
        mode ='markers',
        name = 'Data',
        marker=dict(
            size=3,
            )
        )
)

#Initialize a dataframe to store the results:
col = ['rss','intercept'] + ['coef_x_%d'%i for i in range(1,20)]
ind = ['model_pow_%d'%i for i in range(1,20)]
coef_matrix_ridge = pd.DataFrame(index=ind, columns=col)

for i in range(2,20):
    plot, res = linear_regression(df, i)
    data.append(plot)
    coef_matrix_ridge.iloc[i-1,0:i+2] = res



fig = go.Figure(data=data)

render(fig, 1000,1000)
fig.show()

    


Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.3692962566302873, tolerance: 0.003714520096867641


Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.10239625697845889, tolerance: 0.003714520096867641


Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.6018124533047208, tolerance: 0.003714520096867641


Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.926562485602077, tolerance: 0.003714520096867641


Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.9394214470024638, tolerance: 0.003714520096867641


Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.9158628164496512, tolerance: 0.003714520096867641


Objective did not converge. You might want to increase the number of iterations. Duality gap: 0.890342482610024

In [14]:
#Set the display format to be scientific for ease of analysis
pd.options.display.float_format = '{:,.2g}'.format
coef_matrix_ridge

Unnamed: 0,rss,intercept,coef_x_1,coef_x_2,coef_x_3,coef_x_4,coef_x_5,coef_x_6,coef_x_7,coef_x_8,...,coef_x_10,coef_x_11,coef_x_12,coef_x_13,coef_x_14,coef_x_15,coef_x_16,coef_x_17,coef_x_18,coef_x_19
model_pow_1,,,,,,,,,,,...,,,,,,,,,,
model_pow_2,2.8,2.0,-0.64,0.0032,,,,,,,...,,,,,,,,,,
model_pow_3,1.6,1.0,0.58,-0.47,0.054,,,,,,...,,,,,,,,,,
model_pow_4,0.98,0.89,0.13,0.15,-0.16,0.022,,,,,...,,,,,,,,,,
model_pow_5,0.99,0.94,0.11,0.1,-0.12,0.012,0.00085,,,,...,,,,,,,,,,
model_pow_6,0.97,0.94,0.044,0.061,0.023,-0.072,0.019,-0.0014,,,...,,,,,,,,,,
model_pow_7,0.97,0.98,0.036,0.036,0.00072,-0.03,0.0005,0.0021,-0.00023,,...,,,,,,,,,,
model_pow_8,0.97,0.98,0.033,0.033,0.002,-0.024,-0.0055,0.0042,-0.00057,2.1e-05,...,,,,,,,,,,
model_pow_9,0.97,0.98,0.034,0.036,0.0061,-0.024,-0.013,0.0093,-0.0021,0.00024,...,,,,,,,,,,
model_pow_10,0.96,0.97,0.043,0.048,0.0099,-0.04,-0.029,0.04,-0.019,0.0046,...,2.7e-05,,,,,,,,,


In [15]:
coef_matrix_ridge.apply(lambda x: sum(x.values==0),axis=1)

model_pow_1     0
model_pow_2     0
model_pow_3     0
model_pow_4     0
model_pow_5     0
model_pow_6     0
model_pow_7     0
model_pow_8     0
model_pow_9     0
model_pow_10    0
model_pow_11    0
model_pow_12    0
model_pow_13    0
model_pow_14    0
model_pow_15    0
model_pow_16    0
model_pow_17    0
model_pow_18    0
model_pow_19    0
dtype: int64

In [30]:
def regression(df, power, model):

    predictors = ['x']

    if power >=2:
        predictors.extend(['x_%d'%i for i in range(2,power+1)])

    model.fit(df[predictors], df['y'])

    plot = go.Scatter(
        x=df.x, 
        y=model.predict(df[predictors]), 
        mode ='lines',
        opacity = 0.5,
        name = 'x_%d'%power,
        marker=dict(
            size=3,
            )
        )


    #Return the result in pre-defined format
    rss = sum((model.predict(df[predictors])-df.y)**2)
    ret = [rss]
    ret.extend([model.intercept_])
    ret.extend(model.coef_)

    return plot, ret

data = []

data.append(
    go.Scatter(
        x=df.x, 
        y=df.y, 
        mode ='markers',
        name = 'Data',
        marker=dict(
            size=3,
            )
        )
)

#Initialize a dataframe to store the results:
col = ['rss','intercept'] + ['coef_x_%d'%i for i in range(1,20)]
ind = ['model_pow_%d'%i for i in range(1,20)]
coef_matrix= pd.DataFrame(index=ind, columns=col)

for i in range(2,20):
    plot, res = regression(df, i, linear_model.BayesianRidge())
    data.append(plot)
    coef_matrix.iloc[i-1,0:i+2] = res



fig = go.Figure(data=data)

render(fig, 1000,1000)
fig.show()

    

In [31]:
coef_matrix

Unnamed: 0,rss,intercept,coef_x_1,coef_x_2,coef_x_3,coef_x_4,coef_x_5,coef_x_6,coef_x_7,coef_x_8,...,coef_x_10,coef_x_11,coef_x_12,coef_x_13,coef_x_14,coef_x_15,coef_x_16,coef_x_17,coef_x_18,coef_x_19
model_pow_1,,,,,,,,,,,...,,,,,,,,,,
model_pow_2,2.7,2.0,-0.69,0.011,,,,,,,...,,,,,,,,,,
model_pow_3,0.99,-0.48,2.4,-1.1,0.12,,,,,,...,,,,,,,,,,
model_pow_4,0.99,0.94,0.11,0.14,-0.15,0.021,,,,,...,,,,,,,,,,
model_pow_5,1.0,1.1,-0.00082,-0.0049,-0.014,-0.017,0.0034,,,,...,,,,,,,,,,
model_pow_6,1.0,1.1,-4.2e-05,-0.0028,-0.011,-0.022,0.0049,-0.00015,,,...,,,,,,,,,,
model_pow_7,0.97,1.1,-9.5e-05,-0.00072,-0.0026,-0.0065,-0.0094,0.0036,-0.00031,,...,,,,,,,,,,
model_pow_8,1.0,1.0,-0.00014,-0.00049,-0.0012,-0.0023,-0.0033,-0.0022,0.0011,-0.00012,...,,,,,,,,,,
model_pow_9,0.99,1.0,-0.00012,-0.00042,-0.0011,-0.0023,-0.0035,-0.0029,0.0017,-0.00025,...,,,,,,,,,,
model_pow_10,0.99,1.0,-4e-05,-0.00014,-0.00036,-0.00078,-0.0014,-0.0019,-0.0013,0.0011,...,1.6e-05,,,,,,,,,
