# California Housing Market Dataset

In [None]:
import torch
import numpy as np

from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

print('torch:', torch.__version__)


In [None]:
ds = tf.keras.datasets.california_housing.load_data()
(X, y) = ds

X = X.astype("float32")
y = y.astype("float32")

# Train/val/test split: 70/15/15 (example)
X_train, X_tmp, y_train, y_tmp = train_test_split(
    X, y, test_size=0.30, random_state=42
)
X_val, X_test, y_val, y_test = train_test_split(
    X_tmp, y_tmp, test_size=0.50, random_state=42
)

# Standardize based on training set
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val   = scaler.transform(X_val)
X_test  = scaler.transform(X_test)

# Print dimensions
print("Train X:", X_train.shape, "Train y:", y_train.shape)
print("Val   X:", X_val.shape,   "Val   y:", y_val.shape)
print("Test  X:", X_test.shape,  "Test  y:", y_test.shape)



## Train Without Regularization

In [None]:
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

w = torch.zeros((X_train.shape[1], 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

lr = 1e-3
epochs = 200

for _ in range(epochs):
    y_pred = X_train_t @ w + b
    loss = ((y_pred - y_train_t) ** 2).mean()
    loss.backward()
    with torch.no_grad():
        w -= lr * w.grad
        b -= lr * b.grad
        w.grad.zero_()
        b.grad.zero_()

print("Final loss:", loss.item())


## Test Loss

In [None]:
X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

with torch.no_grad():
    y_pred_test = X_test_t @ w + b
    test_loss = ((y_pred_test - y_test_t) ** 2).mean()

print("Test loss:", test_loss.item())


## Now Train With Regularization