Ridge Regression is used when columns are more and rows are less to reduce the overfitting

In [1]:
from sklearn.linear_model import LinearRegression, Ridge
import numpy as np

# Example data
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
# Target values
y = np.dot(X, np.array([1, 2])) + 3

# Linear regression
lr = LinearRegression()
lr.fit(X, y)

# Coefficients
print("Coefficients:", lr.coef_)
# Intercept
print("Intercept:", lr.intercept_)

Coefficients: [1. 2.]
Intercept: 3.0000000000000018


In [2]:
from sklearn.linear_model import Ridge
import numpy as np

# Example data
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
# Target values
y = np.dot(X, np.array([1, 2])) + 3

# Ridge Regression Model
ridge_reg = Ridge(alpha=0.5)  # alpha is the equivalent of lambda in the formula
ridge_reg.fit(X, y)

# Coefficients
print("Coefficients:", ridge_reg.coef_)
# Intercept
print("Intercept:", ridge_reg.intercept_)

Coefficients: [0.90909091 1.63636364]
Intercept: 3.8636363636363633


Compairing Linear Regression and Ridge Regression

In [3]:
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import numpy as np

# Load the data set
df = sns.load_dataset('diamonds')

In [5]:
# preproceess the data
# separate the features X and the target/labels y
X = df.drop('price', axis=1)
y = df['price']

# numeric features
numeric_features = ['carat', 'depth', 'table', 'x', 'y', 'z']
# categorical features
categorical_features = ['cut', 'color', 'clarity']

# preprocess the data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ]
)

# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)


In [6]:
# Create a Pipeline
# Linear Regression Pipeline
lr_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                              ('regressor', LinearRegression())])

# Ridge Regression Pipeline
ridge_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                 ('regressor', Ridge(alpha=0.5))])

In [7]:
# Train and evaluate Linear Regression
lr_pipeline.fit(X_train, y_train)
lr_pred = lr_pipeline.predict(X_test)
lr_mse = mean_squared_error(y_test, lr_pred)
lr_r2 = r2_score(y_test, lr_pred)
lr_mae = mean_absolute_error(y_test, lr_pred)
lr_mape = mean_absolute_percentage_error(y_test, lr_pred)
lr_rmse = np.sqrt(lr_mse)

# Train and evaluate Ridge Regression
ridge_pipeline.fit(X_train, y_train)
ridge_pred = ridge_pipeline.predict(X_test)
ridge_mse = mean_squared_error(y_test, ridge_pred)
ridhe_r2 = r2_score(y_test, ridge_pred)
ridge_mae = mean_absolute_error(y_test, ridge_pred)
ridge_mape = mean_absolute_percentage_error(y_test, ridge_pred)
ridge_rmse = np.sqrt(ridge_mse)

print("Linear Regression MSE:", lr_mse)
print("Ridge Regression MSE:", ridge_mse)
print(f"------------------------")

print("Linear Regression R2:", lr_r2)
print("Ridge Regression R2:", ridhe_r2)
print(f"------------------------")
print("Linear Regression MAE:", lr_mae)
print("Ridge Regression MAE:", ridge_mae)
print(f"------------------------")
print("Linear Regression MAPE:", lr_mape)
print("Ridge Regression MAPE:", ridge_mape)
print(f"------------------------")
print("Linear Regression RMSE:", lr_rmse)
print("Ridge Regression RMSE:", ridge_rmse)

Linear Regression MSE: 1288705.4778516763
Ridge Regression MSE: 1288691.2489788434
------------------------
Linear Regression R2: 0.9189331350419386
Ridge Regression R2: 0.9189340301185347
------------------------
Linear Regression MAE: 737.1513665933285
Ridge Regression MAE: 737.145505679206
------------------------
Linear Regression MAPE: 0.3952933516494362
Ridge Regression MAPE: 0.39525103728552
------------------------
Linear Regression RMSE: 1135.2116445190634
Ridge Regression RMSE: 1135.2053774444707
