<a href="https://colab.research.google.com/github/asupraja3/py-ml-toolkit-collab/blob/main/LinearRegression_ScikitLearn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Optional Lab: Linear Regression using Scikit-Learn

This lab demonstrates:
- Using **scikit-learn** to implement Linear Regression and SGD-based regression
- Scaling/normalizing features before training
- Comparing model coefficients


In [None]:

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.preprocessing import StandardScaler

# For reproducibility
np.set_printoptions(precision=2)


## 1. Generate synthetic 'house' dataset

In [None]:

# Features: size(sqft), bedrooms, floors, age
rng = np.random.default_rng(1)
m = 200
size = rng.uniform(500, 3500, m)
bedrooms = rng.integers(1, 6, m)
floors = rng.integers(1, 4, m)
age = rng.uniform(0, 40, m)

X_train = np.c_[size, bedrooms, floors, age].astype(float)
true_w = np.array([250, 10_000, 5_000, -300])  # weights
true_b = 50_000
noise = rng.normal(0, 20_000, m)

y_train = X_train @ true_w + true_b + noise

X_features = ["size(sqft)", "bedrooms", "floors", "age"]


## 2. Scale/normalize the training data

In [None]:

scaler = StandardScaler()
X_norm = scaler.fit_transform(X_train)

print(f"Peak to Peak range by column in Raw X: {np.ptp(X_train, axis=0)}")
print(f"Peak to Peak range by column in Normalized X: {np.ptp(X_norm, axis=0)}")


## 3. Linear Regression (Normal Equation)

In [None]:

lr = LinearRegression()
lr.fit(X_norm, y_train)

print("Intercept:", lr.intercept_)
print("Coefficients:", lr.coef_)

y_pred_lr = lr.predict(X_norm)

plt.scatter(y_train, y_pred_lr, alpha=0.7)
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title("Linear Regression Predictions (scikit-learn)")
plt.grid(True, alpha=0.3)
plt.show()


## 4. Linear Regression using SGDRegressor (Gradient Descent)

In [None]:

sgdr = SGDRegressor(max_iter=1000, tol=1e-3, penalty=None, eta0=0.01, learning_rate='constant', random_state=1)
sgdr.fit(X_norm, y_train)

print("Intercept:", sgdr.intercept_)
print("Coefficients:", sgdr.coef_)

y_pred_sgd = sgdr.predict(X_norm)

plt.scatter(y_train, y_pred_sgd, alpha=0.7, color='orange')
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title("SGD Regressor Predictions (scikit-learn)")
plt.grid(True, alpha=0.3)
plt.show()
