<a href="https://colab.research.google.com/github/jahanshukhy/Intro-to-Machine-Learning/blob/main/HW5_prob3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [233]:
# ------problem 3a [one hidden layer]------
import time, math, numpy as np, pandas as pd
import torch, torch.nn as nn, torch.optim as optim
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [234]:
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.float32

In [235]:
CSV = "/content/drive/My Drive/ML/HW2/Housing.csv"
df = pd.read_csv(CSV)

feat_cols = ["area", "bedrooms", "bathrooms", "stories", "parking"]
target_col = "price"

X_np = df[feat_cols].astype(float).values
y_np = df[target_col].astype(float).values.reshape(-1, 1)


In [236]:
# 80/20 split using PyTorch

N = X_np.shape[0]
n_train = int(0.8 * N)

g = torch.Generator().manual_seed(SEED)
perm = torch.randperm(N, generator=g).numpy()
tr_idx, va_idx = perm[:n_train], perm[n_train:]

X_tr_np, y_tr_np = X_np[tr_idx], y_np[tr_idx]
X_va_np, y_va_np = X_np[va_idx], y_np[va_idx]

# Standardize X

mu_X = X_tr_np.mean(axis=0, keepdims=True)
sd_X = X_tr_np.std(axis=0, keepdims=True) + 1e-8

Xtr_std_np = (X_tr_np - mu_X) / sd_X
Xva_std_np = (X_va_np - mu_X) / sd_X

# Tensors
Xtr = torch.tensor(Xtr_std_np, dtype=dtype, device=device)
ytr = torch.tensor(y_tr_np,     dtype=dtype, device=device)
Xva = torch.tensor(Xva_std_np,  dtype=dtype, device=device)
yva = torch.tensor(y_va_np,     dtype=dtype, device=device)

In [272]:
BATCH_SIZE = 64
train_ds = torch.utils.data.TensorDataset(Xtr, ytr)
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, generator=g)

In [278]:
class MLP1H(nn.Module):
    def __init__(self, d_in, d_hidden=8):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(d_in, d_hidden),
            nn.ReLU(),
            nn.Linear(d_hidden, 1)
        )
    def forward(self, x): return self.net(x)

model = MLP1H(d_in=Xtr.shape[1]).to(device)
print("3a model params:", count_params(model))


3a model params: 57


In [274]:
# Loss & Optimizer (SGD)

criterion = nn.MSELoss()

optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

EPOCHS = 200
t0 = time.time()

for ep in range(1, EPOCHS + 1):
    model.train()
    for xb, yb in train_loader:
        optimizer.zero_grad()
        pred = model(xb)
        loss = criterion(pred, yb)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
    scheduler.step()

train_time_s = time.time() - t0

In [275]:
# Final metrics at epoch 200

model.eval()
with torch.no_grad():
    ytr_hat = model(Xtr).cpu().numpy().flatten()
    yva_hat = model(Xva).cpu().numpy().flatten()

ytr_true = y_tr_np.flatten()
yva_true = y_va_np.flatten()

train_mse  = float(np.mean((ytr_hat - ytr_true)**2))
val_mse    = float(np.mean((yva_hat - yva_true)**2))
train_rmse = math.sqrt(train_mse)
val_rmse   = math.sqrt(val_mse)

ss_res_tr = np.sum((ytr_true - ytr_hat)**2)
ss_tot_tr = np.sum((ytr_true - np.mean(ytr_true))**2)
train_r2  = 1.0 - ss_res_tr / ss_tot_tr

ss_res = np.sum((yva_true - yva_hat)**2)
ss_tot = np.sum((yva_true - np.mean(yva_true))**2)
val_r2  = 1.0 - ss_res/ss_tot

print("Metrices for Problem 3a using Neural Network [one hidden layer with 8 nodes]")
print(f"Training time: {train_time_s:.2f} sec")
print(f"Train loss  -> MSE: {train_mse:,.0f}   RMSE: {train_rmse:,.0f} R^2(accuracy): {train_r2:.3f}")
print(f"Validation  -> MSE: {val_mse:,.0f}   RMSE: {val_rmse:,.0f}   R^2 (accuracy): {val_r2:.3f}")

Metrices for Problem 3a using Neural Network [one hidden layer with 8 nodes]
Training time: 1.95 sec
Train loss  -> MSE: 26,305,949,382,184   RMSE: 5,128,933 R^2(accuracy): -6.188
Validation  -> MSE: 25,845,319,801,263   RMSE: 5,083,829   R^2 (accuracy): -8.161


In [262]:
#--------- problem 3b [3 hidden layers] ---------

class MLP3H(nn.Module):
    def __init__(self, d_in, h1=32, h2=16, h3=8):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(d_in, h1),
            nn.ReLU(),
            nn.Linear(h1, h2),
            nn.ReLU(),
            nn.Linear(h2, h3),
            nn.ReLU(),
            nn.Linear(h3, 1)
        )
    def forward(self, x):
        return self.net(x)

# parameters  to compare "model size" vs 3a
def count_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [263]:
# Make the deeper model
model_b = MLP3H(d_in=Xtr.shape[1], h1=32, h2=16, h3=8).to(device)


optimizer_b = torch.optim.SGD(model_b.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)


print("3b model params:", count_params(model_b))


3b model params: 865


In [264]:
EPOCHS_3B = 200
t0 = time.time()

model_b.train()
for ep in range(1, EPOCHS_3B + 1):
    for xb, yb in train_loader:
        optimizer_b.zero_grad()
        pred = model_b(xb)
        loss = criterion(pred, yb)  # MSE on original price units
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model_b.parameters(), 1.0)
        optimizer_b.step()
        scheduler.step()

train_time_b = time.time() - t0


In [276]:
model_b.eval()
with torch.no_grad():
    ytr_hat_b = model_b(Xtr).cpu().numpy().flatten()
    yva_hat_b = model_b(Xva).cpu().numpy().flatten()

ytr_true = y_tr_np.flatten()
yva_true = y_va_np.flatten()

# Train/Val MSE, RMSE
train_mse_b  = float(np.mean((ytr_hat_b - ytr_true)**2))
val_mse_b    = float(np.mean((yva_hat_b - yva_true)**2))
train_rmse_b = math.sqrt(train_mse_b)
val_rmse_b   = math.sqrt(val_mse_b)

# R^2 (accuracy) on training & validation
ss_res_tr = np.sum((ytr_true - ytr_hat_b)**2)
ss_tot_tr = np.sum((ytr_true - np.mean(ytr_true))**2)
train_r2_b  = 1.0 - ss_res_tr/ss_tot_tr

ss_res = np.sum((yva_true - yva_hat_b)**2)
ss_tot = np.sum((yva_true - np.mean(yva_true))**2)
val_r2_b  = 1.0 - ss_res/ss_tot

print("\nMetrices for Problem 3b (3 hidden layers)")
print(f"Training time: {train_time_b:.2f} sec")
print(f"Params (trainable): {count_params(model_b)}")
print(f"Train loss  -> MSE: {train_mse_b:,.0f}   RMSE: {train_rmse_b:,.0f}   R^2: {train_r2_b:.3f}")
print(f"Validation  -> MSE: {val_mse_b:,.0f}   RMSE: {val_rmse_b:,.0f}   R^2: {val_r2_b:.3f}")



Metrices for Problem 3b (3 hidden layers)
Training time: 3.73 sec
Params (trainable): 865
Train loss  -> MSE: 26,305,941,371,947   RMSE: 5,128,932   R^2: -6.188
Validation  -> MSE: 25,845,311,913,968   RMSE: 5,083,828   R^2: -8.161
