In [None]:
class CNNLayer:
    def __init__(self, input_shape, kernel_size, depth):
        input_depth, input_height, input_width = input_shape
        self.depth = depth
        self.input_shape = input_shape
        self.input_depth = input_depth
        self.kernel_size = kernel_size
        self.output_shape = (depth, input_height - kernel_size + 1, input_width - kernel_size + 1)
        self.kernels_shape = (depth, input_depth, kernel_size, kernel_size)
        self.kernels = np.random.randn(*self.kernels_shape) * 0.1
        self.biases = np.random.randn(*self.output_shape) * 0.1  # Changed shape of biases

    def forward(self, input):
        self.input = input
        self.output = np.zeros((input.shape[0], *self.output_shape))
        for b in range(input.shape[0]):  # Iterate over batch size
            for d in range(self.depth):
                for c in range(self.input_depth):
                    self.output[b, d] += signal.correlate2d(input[b, c], self.kernels[d, c], mode='valid')
                # Adding biases, now with correct shape
                self.output[b, d] += self.biases[d]
        return self.output

    def backward(self, output_gradient, learning_rate):
        kernels_gradient = np.zeros(self.kernels_shape)
        input_gradient = np.zeros((self.input.shape[0], *self.input_shape))
        for b in range(output_gradient.shape[0]):  # Iterate over batch size
            for d in range(self.depth):
                for c in range(self.input_depth):
                    kernels_gradient[d, c] += signal.correlate2d(self.input[b, c], output_gradient[b, d], mode='valid')
                    input_gradient[b, c] += signal.convolve2d(output_gradient[b, d], self.kernels[d, c], mode='full')
        self.kernels -= learning_rate * kernels_gradient / output_gradient.shape[0]
        self.biases -= learning_rate * np.mean(output_gradient, axis=0)
        return input_gradient

In [None]:
class DenseLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.1
        self.biases = np.random.randn(output_size) * 0.1

    def forward(self, input):
        self.input = input
        return np.dot(input, self.weights) + self.biases

    def backward(self, output_gradient, learning_rate):
        input_gradient = np.dot(output_gradient, self.weights.T)
        weights_gradient = np.dot(self.input.T, output_gradient)
        self.weights -= learning_rate * weights_gradient
        self.biases -= learning_rate * np.mean(output_gradient, axis=0)
        return input_gradient

In [None]:
class ReLU:
    def forward(self, input):
        self.input = input
        return np.maximum(0, input)

    def backward(self, output_gradient, learning_rate):
        return output_gradient * (self.input > 0)

In [None]:
# Network setup
cnn_layer = CNNLayer(input_shape=(1, 28, 28), kernel_size=3, depth=16)
relu = ReLU()
flatten_layer = lambda x: x.reshape(x.shape[0], -1)
dense_layer = DenseLayer(16 * 26 * 26, num_classes)  # 16 filters, 26x26 output from CNN
softmax = Softmax()

# Training loop
for epoch in range(epochs):
    for i in range(0, len(x_train), batch_size):
        x_batch = x_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        
        # Forward pass
        conv_output = cnn_layer.forward(x_batch)
        relu_output = relu.forward(conv_output)
        flattened_output = flatten_layer(relu_output)
        dense_output = dense_layer.forward(flattened_output)
        softmax_output = softmax.forward(dense_output)

        # Compute loss
        loss = categorical_cross_entropy(y_batch, softmax_output)
        print(f"Epoch {epoch+1}, Batch {i//batch_size+1}, Loss: {loss}")

        # Backward pass
        loss_grad = categorical_cross_entropy_prime(y_batch, softmax_output)
        dense_grad = dense_layer.backward(loss_grad, learning_rate)
        flattened_grad = dense_grad.reshape(relu_output.shape)
        relu_grad = relu.backward(flattened_grad, learning_rate)
        conv_grad = cnn_layer.backward(relu_grad, learning_rate)

# Evaluation loop (batched for efficiency)
correct = 0
total = 0
for i in range(0, len(x_test), batch_size):
    x_batch = x_test[i:i+batch_size]
    y_batch = y_test[i:i+batch_size]
    
    conv_output = cnn_layer.forward(x_batch)
    relu_output = relu.forward(conv_output)
    flattened_output = flatten_layer(relu_output)
    dense_output = dense_layer.forward(flattened_output)
    softmax_output = softmax.forward(dense_output)
    
    predictions = np.argmax(softmax_output, axis=1)
    correct += np.sum(predictions == np.argmax(y_batch, axis=1))
    total += len(y_batch)

print(f"Accuracy: {correct / total * 100:.2f}%")