In [5]:
import numpy as np

class ReLU:
    def forward(self, x):
        self.input = x
        self.output = np.maximum(0, x)
        return self.output

    def backward(self, grad_output, learning_rate):
        return grad_output * (self.input > 0)


In [6]:

class ConvolutionalLayer:
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.filters = np.random.randn(out_channels, in_channels, kernel_size, kernel_size) * 0.01
        self.biases = np.zeros((out_channels, 1, 1))

    def forward(self, input_data):
        self.input = input_data
        n_samples, n_channels, height, width = input_data.shape
        filter_height, filter_width = self.filters.shape[2], self.filters.shape[3]
        out_height = (height - filter_height + 2 * self.padding) // self.stride + 1
        out_width = (width - filter_width + 2 * self.padding) // self.stride + 1
        output = np.zeros((n_samples, self.out_channels, out_height, out_width))
        padded_input = np.pad(input_data, ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant')
        for i in range(n_samples):
            for f in range(self.out_channels):
                for y in range(0, out_height, self.stride):
                    for x in range(0, out_width, self.stride):
                        input_slice = padded_input[i, :, y:y + filter_height, x:x + filter_width]
                        output[i, f, y // self.stride, x // self.stride] = np.sum(input_slice * self.filters[f]) + self.biases[f]
        return output

    def backward(self, grad_output, learning_rate):
        grad_input = np.zeros_like(self.input)
        grad_filters = np.zeros_like(self.filters)
        grad_biases = np.zeros_like(self.biases)
        n_samples, n_channels, height, width = self.input.shape
        filter_height, filter_width = self.filters.shape[2], self.filters.shape[3]
        out_height, out_width = grad_output.shape[2], grad_output.shape[3]
        padded_input = np.pad(self.input, ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant')
        padded_grad_input = np.pad(grad_input, ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant')
        for i in range(n_samples):
            for f in range(self.out_channels):
                for y in range(out_height):
                    for x in range(out_width):
                        dy = y * self.stride
                        dx = x * self.stride
                        input_slice = padded_input[i, :, dy:dy + filter_height, dx:dx + filter_width]
                        grad = grad_output[i, f, y, x]
                        padded_grad_input[i, :, dy:dy + filter_height, dx:dx + filter_width] += grad * self.filters[f]
                        grad_filters[f] += grad * input_slice
                        grad_biases[f] += grad
        self.filters -= learning_rate * grad_filters
        self.biases -= learning_rate * grad_biases
        return padded_grad_input[:, :, self.padding:-self.padding, self.padding:-self.padding]


In [7]:
# --- Part 3: MaxPooling and Flatten Layers ---
class MaxPoolingLayer:
    def __init__(self, pool_size, stride=None):
        self.pool_size = pool_size
        self.stride = stride if stride is not None else pool_size

    def forward(self, input_data):
        self.input = input_data
        n_samples, n_channels, height, width = input_data.shape
        out_height = (height - self.pool_size) // self.stride + 1
        out_width = (width - self.pool_size) // self.stride + 1
        output = np.zeros((n_samples, n_channels, out_height, out_width))
        self.max_indices = np.zeros_like(input_data, dtype=bool)
        for i in range(n_samples):
            for c in range(n_channels):
                for y in range(0, height - self.pool_size + 1, self.stride):
                    for x in range(0, width - self.pool_size + 1, self.stride):
                        pool_region = input_data[i, c, y:y + self.pool_size, x:x + self.pool_size]
                        max_val = np.max(pool_region)
                        output[i, c, y // self.stride, x // self.stride] = max_val
                        (max_y, max_x) = np.unravel_index(pool_region.argmax(), pool_region.shape)
                        self.max_indices[i, c, y + max_y, x + max_x] = 1
        return output

    def backward(self, grad_output, learning_rate):
        grad_input = np.zeros_like(self.input)
        n_samples, n_channels, out_height, out_width = grad_output.shape
        for i in range(n_samples):
            for c in range(n_channels):
                for y in range(out_height):
                    for x in range(out_width):
                        dy = y * self.stride
                        dx = x * self.stride
                        grad_input[i, c, dy:dy + self.pool_size, dx:dx + self.pool_size] += (
                            grad_output[i, c, y, x] * self.max_indices[i, c, dy:dy + self.pool_size, dx:dx + self.pool_size]
                        )
        return grad_input

class FlattenLayer:
    def forward(self, input_data):
        self.input_shape = input_data.shape
        return input_data.reshape(input_data.shape[0], -1)

    def backward(self, grad_output, learning_rate):
        return grad_output.reshape(self.input_shape)


In [10]:
class DenseLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.biases = np.zeros((1, output_size))

    def forward(self, input_data):
        self.input = input_data
        return np.dot(input_data, self.weights) + self.biases

    def backward(self, grad_output, learning_rate):
        grad_input = np.dot(grad_output, self.weights.T)
        grad_weights = np.dot(self.input.T, grad_output)
        grad_biases = np.sum(grad_output, axis=0, keepdims=True)
        self.weights -= learning_rate * grad_weights
        self.biases -= learning_rate * grad_biases
        return grad_input

class Softmax:
    def forward(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        self.output = exp_x / np.sum(exp_x, axis=1, keepdims=True)
        return self.output

    def backward(self, grad_output, learning_rate):
        return grad_output

def cross_entropy_loss(y_pred, y_true):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[np.arange(m), y_true])
    loss = np.sum(log_likelihood) / m
    return loss

def cross_entropy_backward(y_pred, y_true):
    m = y_true.shape[0]
    grad = y_pred.copy()
    grad[np.arange(m), y_true] -= 1
    grad = grad / m
    return grad



In [11]:
class CNN:
    def __init__(self):
        self.layers = []

    def add(self, layer):
        self.layers.append(layer)

    def forward(self, input_data):
        output = input_data
        for layer in self.layers:
            output = layer.forward(output)
        return output

    def backward(self, grad_output, learning_rate):
        grad = grad_output
        for layer in reversed(self.layers):
            grad = layer.backward(grad, learning_rate)
        return grad

    def train(self, X_train, y_train, epochs, learning_rate):
        for epoch in range(epochs):
            for i in range(X_train.shape[0]):
                input_sample = X_train[i][np.newaxis, ...]
                target = y_train[i]
                output = self.forward(input_sample)
                probs = self.layers[-1].output
                loss = cross_entropy_loss(probs, np.array([target]))
                print(f"Epoch {epoch+1}/{epochs}, Sample {i+1}/{X_train.shape[0]}, Loss: {loss:.4f}")
                grad = cross_entropy_backward(probs, np.array([target]))
                self.backward(grad, learning_rate)

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

if __name__ == '__main__':
    X_train_ex = np.random.rand(10, 1, 8, 8)
    y_train_ex = np.random.randint(0, 2, 10)

    cnn = CNN()
    cnn.add(ConvolutionalLayer(in_channels=1, out_channels=4, kernel_size=3, stride=1, padding=1))
    cnn.add(ReLU())
    cnn.add(MaxPoolingLayer(pool_size=2, stride=2))
    cnn.add(FlattenLayer())
    cnn.add(DenseLayer(input_size=4 * 4 * 4, output_size=10))
    cnn.add(ReLU())
    cnn.add(DenseLayer(input_size=10, output_size=2))
    cnn.add(Softmax())

    cnn.train(X_train_ex, y_train_ex, epochs=5, learning_rate=0.01)

    X_test_ex = np.random.rand(2, 1, 8, 8)
    predictions = cnn.predict(X_test_ex)
    print("Predictions:", predictions)

  output[i, f, y // self.stride, x // self.stride] = np.sum(input_slice * self.filters[f]) + self.biases[f]


Epoch 1/5, Sample 1/10, Loss: 0.6931
Epoch 1/5, Sample 2/10, Loss: 0.6982
Epoch 1/5, Sample 3/10, Loss: 0.6932
Epoch 1/5, Sample 4/10, Loss: 0.6981
Epoch 1/5, Sample 5/10, Loss: 0.6932
Epoch 1/5, Sample 6/10, Loss: 0.6981
Epoch 1/5, Sample 7/10, Loss: 0.6932
Epoch 1/5, Sample 8/10, Loss: 0.6981
Epoch 1/5, Sample 9/10, Loss: 0.6931
Epoch 1/5, Sample 10/10, Loss: 0.6881
Epoch 2/5, Sample 1/10, Loss: 0.6831
Epoch 2/5, Sample 2/10, Loss: 0.7083
Epoch 2/5, Sample 3/10, Loss: 0.7031
Epoch 2/5, Sample 4/10, Loss: 0.6883
Epoch 2/5, Sample 5/10, Loss: 0.7031
Epoch 2/5, Sample 6/10, Loss: 0.6884
Epoch 2/5, Sample 7/10, Loss: 0.7030
Epoch 2/5, Sample 8/10, Loss: 0.6884
Epoch 2/5, Sample 9/10, Loss: 0.6835
Epoch 2/5, Sample 10/10, Loss: 0.6786
Epoch 3/5, Sample 1/10, Loss: 0.6737
Epoch 3/5, Sample 2/10, Loss: 0.7179
Epoch 3/5, Sample 3/10, Loss: 0.7127
Epoch 3/5, Sample 4/10, Loss: 0.6790
Epoch 3/5, Sample 5/10, Loss: 0.7125
Epoch 3/5, Sample 6/10, Loss: 0.6791
Epoch 3/5, Sample 7/10, Loss: 0.7124