In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

In [2]:
def load_data():
    boston = fetch_openml(name='boston', version=1, as_frame=False)
    X, y = boston.data, boston.target.reshape(-1, 1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    return X_train, X_test, y_train, y_test

In [3]:
def initialize_weights(input_size, hidden1_size, hidden2_size, output_size):
    np.random.seed(42)
    weights = {
        "W1": np.random.randn(input_size, hidden1_size),
        "b1": np.zeros((1, hidden1_size)),
        "W2": np.random.randn(hidden1_size, hidden2_size),
        "b2": np.zeros((1, hidden2_size)),
        "W3": np.random.randn(hidden2_size, output_size),
        "b3": np.zeros((1, output_size))
    }
    return weights


In [4]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [5]:
def forward_pass(X, weights):
    Z1 = np.dot(X, weights["W1"]) + weights["b1"]
    A1 = sigmoid(Z1)
    
    Z2 = np.dot(A1, weights["W2"]) + weights["b2"]
    A2 = sigmoid(Z2)
    
    Z3 = np.dot(A2, weights["W3"]) + weights["b3"]
    return Z1, A1, Z2, A2, Z3

In [6]:
def backpropagation(X, y, weights, Z1, A1, Z2, A2, Z3, learning_rate, batch_size):
    error = y - Z3  # Compute error
    dZ3 = -2 * error / batch_size  # Compute derivative of loss w.r.t output
    
    # Compute gradients for output layer:
    # dL/dW3 = A2^T * dZ3
    dW3 = np.dot(A2.T, dZ3)
    db3 = np.sum(dZ3, axis=0, keepdims=True)
    
    # Compute gradients for hidden layer 2:
    # dL/dA2 = dZ3 * W3^T
    # dL/dZ2 = dL/dA2 * sigmoid_derivative(A2)
    dA2 = np.dot(dZ3, weights["W3"].T) * sigmoid_derivative(A2)
    dW2 = np.dot(A1.T, dA2)
    db2 = np.sum(dA2, axis=0, keepdims=True)
    
    # Compute gradients for hidden layer 1:
    # dL/dA1 = dA2 * W2^T
    # dL/dZ1 = dL/dA1 * sigmoid_derivative(A1)
    dA1 = np.dot(dA2, weights["W2"].T) * sigmoid_derivative(A1)
    dW1 = np.dot(X.T, dA1)
    db1 = np.sum(dA1, axis=0, keepdims=True)
    
    # Update weights using gradient descent
    weights["W3"] -= learning_rate * dW3
    weights["b3"] -= learning_rate * db3
    weights["W2"] -= learning_rate * dW2
    weights["b2"] -= learning_rate * db2
    weights["W1"] -= learning_rate * dW1
    weights["b1"] -= learning_rate * db1
    
    return weights, np.mean(error ** 2)

In [18]:
def train(X_train, y_train, weights, learning_rate=0.01, batch_size=32, epochs=600):
    for epoch in tqdm(range(epochs), desc="Training Progress"):
        indices = np.arange(X_train.shape[0])
        np.random.shuffle(indices)
        X_train_shuffled = X_train[indices]
        y_train_shuffled = y_train[indices]
        
        for start in range(0, X_train.shape[0], batch_size):
            end = start + batch_size
            X_batch, y_batch = X_train_shuffled[start:end], y_train_shuffled[start:end]
            
            Z1, A1, Z2, A2, Z3 = forward_pass(X_batch, weights)
            weights, mse = backpropagation(X_batch, y_batch, weights, Z1, A1, Z2, A2, Z3, learning_rate, batch_size)
        
        # print(f"Epoch {epoch + 1}, Mean Squared Error: {mse}")
    
    return weights

In [19]:
def predict(X, weights):
    _, _, _, _, Z3 = forward_pass(X, weights)
    return Z3

In [20]:
X_train, X_test, y_train, y_test = load_data()
weights = initialize_weights(input_size=X_train.shape[1], hidden1_size=8, hidden2_size=6, output_size=1)
weights = train(X_train, y_train, weights)

y_pred = predict(X_test, weights)
test_mse = np.mean((y_test - y_pred) ** 2)
print(f"Test Mean Squared Error: {test_mse}")

Training Progress: 100%|█| 600/600 [00:

Test Mean Squared Error: 12.859007665455895





In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error


mlp = MLPRegressor(hidden_layer_sizes=(8, 6), activation='logistic', solver='sgd', learning_rate_init=0.01, max_iter=600, random_state=42)
mlp.fit(X_train, y_train.ravel())
y_pred_sklearn = mlp.predict(X_test)
test_mse_sklearn = mean_squared_error(y_test, y_pred_sklearn)
print(f"Sklearn MLP Test Mean Squared Error: {test_mse_sklearn}")