In [9]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures


In [10]:
# Load dataset (replace with your own if necessary)
# Example: Using synthetic data for illustration
np.random.seed(0)
X = np.random.rand(100, 1) * 10
y = 2.5 * X.squeeze() + np.random.randn(100) * 2

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Linear Regression with gradient descent

In [16]:
# Define parameters
learning_rate = 0.0001
iterations = 1000
m = X_train.shape[0]

# Initialize weights and bias
theta = np.random.randn(1)
bias = np.random.randn(1)

# Gradient Descent Loop
for i in range(iterations):
    # Predictions based on current theta and bias
    y_pred = theta * X_train + bias
    # Compute the gradients
    d_theta = -(2/m) * np.sum((y_train - y_pred) * X_train)
    d_bias = -(2/m) * np.sum(y_train - y_pred)
    # Update parameters
    theta -= learning_rate * d_theta
    bias -= learning_rate * d_bias

# Display final parameters
print(f"Final parameters: theta = {theta[0]}, bias = {bias[0]}")


Final parameters: theta = 0.019017258935391607, bias = 11.729794587452252


Model Evaluation

In [17]:
# Make predictions on the test set
y_pred_test = theta * X_test + bias

# Calculate metrics
mse = mean_squared_error(y_test, y_pred_test)
r2 = r2_score(y_test, y_pred_test)

print(f"Gradient Descent - MSE: {mse}, R^2 Score: {r2}")


Gradient Descent - MSE: 38.1368485111762, R^2 Score: -0.07542744863841122


LR with least squares

In [18]:
# Import the linear regression model from Scikit-Learn
from sklearn.linear_model import LinearRegression

# Create and fit the model
ols_model = LinearRegression()
ols_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_ols = ols_model.predict(X_test)

# Calculate metrics
mse_ols = mean_squared_error(y_test, y_pred_ols)
r2_ols = r2_score(y_test, y_pred_ols)

print(f"OLS - MSE: {mse_ols}, R^2 Score: {r2_ols}")


OLS - MSE: 3.671012987885716, R^2 Score: 0.896480483165161


Polynomial Regression

In [19]:
# Import PolynomialFeatures
from sklearn.preprocessing import PolynomialFeatures

# Define the degree of the polynomial (e.g., 2 for quadratic)
degree = 2
poly = PolynomialFeatures(degree)
X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)

# Fit a linear regression model on the transformed features
poly_model = LinearRegression()
poly_model.fit(X_poly_train, y_train)

# Make predictions on the test set
y_pred_poly = poly_model.predict(X_poly_test)

# Calculate metrics
mse_poly = mean_squared_error(y_test, y_pred_poly)
r2_poly = r2_score(y_test, y_pred_poly)

print(f"Polynomial Regression (Degree {degree}) - MSE: {mse_poly}, R^2 Score: {r2_poly}")


Polynomial Regression (Degree 2) - MSE: 3.646463944509997, R^2 Score: 0.8971727457960474


LASSO Regression

In [23]:
# Import Lasso regression model from Scikit-Learn
from sklearn.linear_model import Lasso

# Create and fit the Lasso model
alpha = 0.1  # Regularization strength; you can adjust this value
lasso_model = Lasso(alpha=alpha)
lasso_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_lasso = lasso_model.predict(X_test)

# Calculate metrics
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

print(f"LASSO Regression - MSE: {mse_lasso}, R^2 Score: {r2_lasso}")


LASSO Regression - MSE: 3.6634928753091396, R^2 Score: 0.8966925440930411


Ridge

In [22]:
# Import Ridge regression model from Scikit-Learn
from sklearn.linear_model import Ridge

# Create and fit the Ridge model
alpha = 1.0  # Regularization strength; you can adjust this value
ridge_model = Ridge(alpha=alpha)
ridge_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_ridge = ridge_model.predict(X_test)

# Calculate metrics
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print(f"RIDGE Regression - MSE: {mse_ridge}, R^2 Score: {r2_ridge}")


RIDGE Regression - MSE: 3.6685117766157402, R^2 Score: 0.8965510152452775


comparison


In [24]:
# Compile metrics into a DataFrame for comparison
comparison_table = pd.DataFrame({
    'Model': ['Gradient Descent', 'OLS', 'Polynomial', 'LASSO', 'RIDGE'],
    'MSE': [mse, mse_ols, mse_poly, mse_lasso, mse_ridge],
    'R^2 Score': [r2, r2_ols, r2_poly, r2_lasso, r2_ridge]
})

print(comparison_table)


              Model        MSE  R^2 Score
0  Gradient Descent  38.136849  -0.075427
1               OLS   3.671013   0.896480
2        Polynomial   3.646464   0.897173
3             LASSO   3.663493   0.896693
4             RIDGE   3.668512   0.896551
