In [None]:
import kagglehub
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
camnugent_california_housing_prices_path = kagglehub.dataset_download('camnugent/california-housing-prices')
data = pd.read_csv(camnugent_california_housing_prices_path + "/housing.csv")
data = data.dropna()
data['ocean_proximity'] = data['ocean_proximity'].astype('category').cat.codes

In [None]:
features = data.drop("median_house_value", axis=1).values
target = data["median_house_value"].values.reshape(-1, 1)

scaler_X = StandardScaler()
scaler_Y = StandardScaler()
features = scaler_X.fit_transform(features)
target = scaler_Y.fit_transform(target)

X = features.T
Y = target.T

In [None]:
m = X.shape[1]
split_index = int(0.7 * m)
X_train = X[:, :split_index]
Y_train = Y[:, :split_index]
X_test = X[:, split_index:]
Y_test = Y[:, split_index:]

In [None]:
activation_choice = input("Choose activation function for hidden layers (sigmoid or relu): ").strip().lower()

epochs_input = input("Enter number of epochs (or leave blank to use error threshold stopping): ").strip()
if epochs_input != "":
    num_epochs = int(epochs_input)
else:
    num_epochs = 1000

error_threshold_input = input("Enter error threshold (or leave blank to ignore): ").strip()
if error_threshold_input != "":
    error_threshold = float(error_threshold_input)
else:
    error_threshold = None

minibatch_input = input("Enter mini-batch size (or leave blank for default 32): ").strip()
if minibatch_input != "":
    batch_size = int(minibatch_input)
else:
    batch_size = 32

learning_rate = 0.01

input_dim = 9
n_hidden1 = 4
n_hidden2 = 4
output_dim = 1


Choose activation function for hidden layers (sigmoid or relu): relu
Enter number of epochs (or leave blank to use error threshold stopping): 10000
Enter error threshold (or leave blank to ignore): 
Enter mini-batch size (or leave blank for default 32): 


In [None]:
if activation_choice == "sigmoid":
    def activation(x):
        return 1 / (1 + np.exp(-x))
    def activation_derivative(x):
        s = activation(x)
        return s * (1 - s)
elif activation_choice == "relu":
    def activation(x):
        return np.maximum(0, x)
    def activation_derivative(x):
        return (x > 0).astype(np.float64)
else:
    print("Invalid activation function choice. Defaulting to ReLU.")
    def activation(x):
        return np.maximum(0, x)
    def activation_derivative(x):
        return (x > 0).astype(np.float64)

In [None]:
def init_params():
    W1 = np.random.randn(n_hidden1, input_dim) * 0.01
    b1 = np.zeros((n_hidden1, 1))
    W2 = np.random.randn(n_hidden2, n_hidden1) * 0.01
    b2 = np.zeros((n_hidden2, 1))
    W3 = np.random.randn(output_dim, n_hidden2) * 0.01
    b3 = np.zeros((output_dim, 1))
    return W1, b1, W2, b2, W3, b3

In [None]:
W1, b1, W2, b2, W3, b3 = init_params()

In [None]:
def forward(X, W1, b1, W2, b2, W3, b3):
    Z1 = np.dot(W1, X) + b1
    A1 = activation(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = activation(Z2)
    Z3 = np.dot(W3, A2) + b3
    return Z1, A1, Z2, A2, Z3

def compute_loss(Y_pred, Y_true):
    m = Y_true.shape[1]
    return np.sum((Y_pred - Y_true)**2) / m

def backward(X, Y, Z1, A1, Z2, A2, Z3, W2, W3):
    m = X.shape[1]
    dZ3 = Z3 - Y
    dW3 = (1/m) * np.dot(dZ3, A2.T)
    db3 = (1/m) * np.sum(dZ3, axis=1, keepdims=True)
    dA2 = np.dot(W3.T, dZ3)
    dZ2 = dA2 * activation_derivative(Z2)
    dW2 = (1/m) * np.dot(dZ2, A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * activation_derivative(Z1)
    dW1 = (1/m) * np.dot(dZ1, X.T)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)
    return dW1, db1, dW2, db2, dW3, db3


In [None]:
num_train = X_train.shape[1]

for epoch in range(1, num_epochs + 1):
    permutation = np.random.permutation(num_train)
    X_train_shuffled = X_train[:, permutation]
    Y_train_shuffled = Y_train[:, permutation]

    for i in range(0, num_train, batch_size):
        X_batch = X_train_shuffled[:, i:i+batch_size]
        Y_batch = Y_train_shuffled[:, i:i+batch_size]

        Z1, A1, Z2, A2, Z3 = forward(X_batch, W1, b1, W2, b2, W3, b3)
        dW1, db1, dW2, db2, dW3, db3 = backward(X_batch, Y_batch, Z1, A1, Z2, A2, Z3, W2, W3)
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2
        W3 -= learning_rate * dW3
        b3 -= learning_rate * db3

    _, _, _, _, Z3_train = forward(X_train, W1, b1, W2, b2, W3, b3)
    train_loss = compute_loss(Z3_train, Y_train)

    if epoch % 5 == 0:
        print(f"Epoch {epoch}/{num_epochs}, Training MSE: {train_loss:.6f}")

    if error_threshold is not None and train_loss < error_threshold:
        print(f"Stopping early at epoch {epoch} as training MSE {train_loss:.6f} is below threshold {error_threshold}.")
        break




Epoch 5/10000, Training MSE: 0.946378
Epoch 10/10000, Training MSE: 0.946362
Epoch 15/10000, Training MSE: 0.946375
Epoch 20/10000, Training MSE: 0.946676
Epoch 25/10000, Training MSE: 0.946390
Epoch 30/10000, Training MSE: 0.946350
Epoch 35/10000, Training MSE: 0.946407
Epoch 40/10000, Training MSE: 0.946201
Epoch 45/10000, Training MSE: 0.943700
Epoch 50/10000, Training MSE: 0.365236
Epoch 55/10000, Training MSE: 0.314236
Epoch 60/10000, Training MSE: 0.304699
Epoch 65/10000, Training MSE: 0.292443
Epoch 70/10000, Training MSE: 0.280339
Epoch 75/10000, Training MSE: 0.273434
Epoch 80/10000, Training MSE: 0.268651
Epoch 85/10000, Training MSE: 0.267134
Epoch 90/10000, Training MSE: 0.262132
Epoch 95/10000, Training MSE: 0.259080
Epoch 100/10000, Training MSE: 0.256343
Epoch 105/10000, Training MSE: 0.258372
Epoch 110/10000, Training MSE: 0.253031
Epoch 115/10000, Training MSE: 0.251199
Epoch 120/10000, Training MSE: 0.250471
Epoch 125/10000, Training MSE: 0.251078
Epoch 130/10000, Tra

In [None]:
_, _, _, _, Z3_test = forward(X_test, W1, b1, W2, b2, W3, b3)
test_loss = compute_loss(Z3_test, Y_test)
print("Test Prediction Error (MSE):", test_loss)

Y_pred_scaled = scaler_Y.inverse_transform(Z3_test.T)
Y_test_original = scaler_Y.inverse_transform(Y_test.T)

Test Prediction Error (MSE): 0.40867119299904664
