In [56]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
X = housing['data']
y = housing['target']

std = StandardScaler()
X_scaled = std.fit_transform(X)

In [51]:
class FNN: # Feedforward Neural Network
    def __init__(self, input_size: int, hidden_sizes: list[int], output_size: int):
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size

    def fit(self, X, y):
        layer_sizes = [self.input_size] + self.hidden_sizes + [self.output_size]
        self.Ws = []
        self.bs = []

        for size1, size2 in zip(layer_sizes, layer_sizes[1:]):
            W = np.random.randn(size1, size2)
            b = np.random.randn(size2)
            self.Ws.append(W)
            self.bs.append(b)

        # and we have to fit them ...


    def predict(self, X):
        h_in = X
        for W, b in zip(self.Ws, self.bs):
            z = h_in @ W + b
            h_out = heaviside(z)
            h_in = h_out

        return h_out


nn = FNN(input_size=8, hidden_sizes=[6, 4, 2, 7], output_size=10)
nn.fit(X, y)
nn.predict(X)


array([[0, 1, 1, ..., 0, 0, 0],
       [0, 1, 1, ..., 0, 0, 0],
       [0, 1, 1, ..., 0, 0, 0],
       ...,
       [0, 1, 1, ..., 0, 0, 0],
       [0, 1, 1, ..., 0, 0, 0],
       [0, 1, 1, ..., 0, 0, 0]], shape=(20640, 10))

In [57]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

class FNN:
    def __init__(self, input_size: int, hidden_sizes: list[int], output_size: int):
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size
        self.Ws = []
        self.bs = []

    def fit(self, X, y, epochs=1000, learning_rate=0.1):
        layer_sizes = [self.input_size] + self.hidden_sizes + [self.output_size]

        if not self.Ws:
            for size1, size2 in zip(layer_sizes, layer_sizes[1:]):
                W = np.random.randn(size1, size2)
                b = np.random.randn(size2)
                self.Ws.append(W)
                self.bs.append(b)

        # Training Loop
        for epoch in range(epochs):
            # Forward Pass
            activations = [X]
            zs = []
            current_activation = X
            for W, b in zip(self.Ws, self.bs):
                z = current_activation @ W + b
                zs.append(z)
                current_activation = sigmoid(z)
                activations.append(current_activation)
            
            output = activations[-1]

            # Backpropagation
            # Calculate output layer error
            error = y - output
            
            # This is a simplified derivative for a squared error loss.
            # It should be multiplied by the derivative of the activation function.
            d_out = error * sigmoid_derivative(output) 
            
            # Backpropagate the error
            deltas = [d_out]
            # Loop backward through the layers from the second-to-last layer
            for i in range(len(self.Ws) - 1, 0, -1):
                delta = deltas[-1] @ self.Ws[i].T * sigmoid_derivative(activations[i])
                deltas.append(delta)
            deltas.reverse()

            # Update weights and biases
            for i in range(len(self.Ws)):
                # Reshape for matrix multiplication
                # Check for batch vs single example and reshape accordingly
                if len(X.shape) > 1: # Batch input
                    # Update W using outer product of activations and deltas
                    self.Ws[i] += learning_rate * activations[i].T @ deltas[i]
                    self.bs[i] += learning_rate * np.sum(deltas[i], axis=0) # Sum deltas for batch
                else: # Single input
                    # Update W using outer product
                    self.Ws[i] += learning_rate * np.outer(activations[i], deltas[i])
                    self.bs[i] += learning_rate * deltas[i]

    def predict(self, X):
        h_in = X
        for W, b in zip(self.Ws, self.bs):
            z = h_in @ W + b
            h_out = sigmoid(z) 
            h_in = h_out
        return h_out

In [58]:
nn = FNN(input_size=8, hidden_sizes=[6, 4, 2, 7], output_size=10)
nn.fit(X_scaled, y)
nn.predict(X_scaled)

ValueError: operands could not be broadcast together with shapes (20640,) (20640,10) 