# Regression Models - Machine Learning


In [None]:
# Importing necessary libraries
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

In [None]:
# Load the cleaned dataset
data_path = "cleaned_final_data.csv"  # Ensure this file exists
data = pd.read_csv(data_path)

In [None]:
# Define target and features
target = "price"  # Modify based on dataset
X = data.drop(columns=[target])
y = data[target]

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Standardizing numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Step 1: Training Linear Regression Model


In [None]:
# Train Linear Regression Model
lin_reg = LinearRegression()
lin_reg.fit(X_train_scaled, y_train)

# Predictions
y_pred_train = lin_reg.predict(X_train_scaled)
y_pred_test = lin_reg.predict(X_test_scaled)

# Model Evaluation
print("\nLinear Regression Evaluation:")
print("Train R²:", r2_score(y_train, y_pred_train))
print("Test R²:", r2_score(y_test, y_pred_test))
print("Train MAE:", mean_absolute_error(y_train, y_pred_train))
print("Test MAE:", mean_absolute_error(y_test, y_pred_test))

In [None]:
# Linear Regression establishes a linear relationship between input features and the target variable.
# We evaluate it using R² (coefficient of determination) and Mean Absolute Error (MAE) to measure performance.

## Step 2: Ridge & Lasso Regression - Hyperparameter Tuning

In [None]:
# Ridge Regression Hyperparameter Tuning
ridge_grid = GridSearchCV(Ridge(), {'alpha': [0.1, 1, 10, 100]}, cv=5, scoring='neg_mean_absolute_error')
ridge_grid.fit(X_train_scaled, y_train)
best_ridge = ridge_grid.best_estimator_

In [None]:
# Ridge Regression is a regularized version of Linear Regression.
# It penalizes large coefficients to reduce overfitting.
# We tune the alpha parameter using GridSearchCV to find the optimal regularization strength.

In [None]:
# Lasso Regression Hyperparameter Tuning
lasso_grid = GridSearchCV(Lasso(), {'alpha': [0.1, 1, 10, 100]}, cv=5, scoring='neg_mean_absolute_error')
lasso_grid.fit(X_train_scaled, y_train)
best_lasso = lasso_grid.best_estimator_

In [None]:
# Lasso Regression is similar to Ridge but with L1 regularization.
# It can shrink some coefficients to zero, effectively performing feature selection.
# We use GridSearchCV to determine the best alpha value that balances bias and variance.

In [None]:
# Save Models
joblib.dump(best_ridge, "ridge_model.pkl")
joblib.dump(best_lasso, "lasso_model.pkl")
joblib.dump(lin_reg, "linear_regression.pkl")

In [None]:
# Trained Linear Regression, Ridge, and Lasso models.
# Applied Hyperparameter Tuning for Ridge & Lasso.
# Saved the best models for later use.