In [1]:
import numpy as np
import gzip
import os

# Load MNIST dataset
def load_mnist_images(filename):
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    return data.reshape(-1, 28*28) / 255.0

def load_mnist_labels(filename):
    with gzip.open(filename, 'rb') as f:
        labels = np.frombuffer(f.read(), np.uint8, offset=8)
    return labels

train_images = load_mnist_images('train-images-idx3-ubyte.gz')
train_labels = load_mnist_labels('train-labels-idx1-ubyte.gz')
test_images = load_mnist_images('t10k-images-idx3-ubyte.gz')
test_labels = load_mnist_labels('t10k-labels-idx1-ubyte.gz')

# One-hot encode labels
num_classes = 10
train_labels_one_hot = np.eye(num_classes)[train_labels]
test_labels_one_hot = np.eye(num_classes)[test_labels]

# Define neural network architecture
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.weights_input_hidden = np.random.randn(input_size, hidden_size)
        self.bias_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.randn(hidden_size, output_size)
        self.bias_output = np.zeros((1, output_size))

    def forward(self, x):
        self.hidden_layer = np.dot(x, self.weights_input_hidden) + self.bias_hidden
        self.hidden_layer_activation = self.sigmoid(self.hidden_layer)
        self.output_layer = np.dot(self.hidden_layer_activation, self.weights_hidden_output) + self.bias_output
        self.output_probs = self.softmax(self.output_layer)
        return self.output_probs

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Hyperparameters
input_size = 28*28
hidden_size = 128
output_size = num_classes
learning_rate = 0.1
num_epochs = 10

# Initialize neural network
model = NeuralNetwork(input_size, hidden_size, output_size)

# Training loop
for epoch in range(num_epochs):
    total_loss = 0

    for i in range(len(train_images)):
        # Forward pass
        output_probs = model.forward(train_images[i:i+1])

        # Calculate loss (cross-entropy)
        loss = -np.sum(train_labels_one_hot[i] * np.log(output_probs))
        total_loss += loss

        # Backpropagation
        gradient_output = output_probs - train_labels_one_hot[i:i+1]
        gradient_hidden = np.dot(gradient_output, model.weights_hidden_output.T) * \
                          (model.hidden_layer_activation * (1 - model.hidden_layer_activation))

        # Update weights and biases
        model.weights_hidden_output -= learning_rate * np.dot(model.hidden_layer_activation.T, gradient_output)
        model.bias_output -= learning_rate * gradient_output
        model.weights_input_hidden -= learning_rate * np.dot(train_images[i:i+1].T, gradient_hidden)
        model.bias_hidden -= learning_rate * gradient_hidden

    average_loss = total_loss / len(train_images)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}")

# Testing
correct = 0
for i in range(len(test_images)):
    output_probs = model.forward(test_images[i:i+1])
    predicted_label = np.argmax(output_probs)
    if predicted_label == test_labels[i]:
        correct += 1

accuracy = correct / len(test_images)
print(f"Test Accuracy: {accuracy:.4f}")


FileNotFoundError: ignored

In [None]:
Implementing a Convolutional Neural Network (CNN) from scratch involves a lot of complex code, especially for tasks like image classification. However, I can provide you with a simplified example of a CNN in Python using NumPy for a basic image classification task. This example won't cover all the advanced optimizations and features of modern CNNs, but it will give you a basic idea of how CNNs work.

For a complete and efficient implementation, it's highly recommended to use deep learning frameworks like TensorFlow or PyTorch.

```python
import numpy as np
import gzip

# Load MNIST dataset
def load_mnist_images(filename):
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    return data.reshape(-1, 28, 28) / 255.0

def load_mnist_labels(filename):
    with gzip.open(filename, 'rb') as f:
        labels = np.frombuffer(f.read(), np.uint8, offset=8)
    return labels

train_images = load_mnist_images('train-images-idx3-ubyte.gz')
train_labels = load_mnist_labels('train-labels-idx1-ubyte.gz')
test_images = load_mnist_images('t10k-images-idx3-ubyte.gz')
test_labels = load_mnist_labels('t10k-labels-idx1-ubyte.gz')

# One-hot encode labels
num_classes = 10
train_labels_one_hot = np.eye(num_classes)[train_labels]
test_labels_one_hot = np.eye(num_classes)[test_labels]

# Define CNN architecture
class ConvNet:
    def __init__(self):
        self.conv_layer = ConvolutionalLayer(in_channels=1, num_filters=8, kernel_size=3)
        self.flatten_layer = FlattenLayer()
        self.fc_layer = FullyConnectedLayer(13*13*8, num_classes)

    def forward(self, x):
        conv_out = self.conv_layer.forward(x)
        flattened = self.flatten_layer.forward(conv_out)
        output = self.fc_layer.forward(flattened)
        return output

class ConvolutionalLayer:
    def __init__(self, in_channels, num_filters, kernel_size):
        self.filters = np.random.randn(num_filters, kernel_size, kernel_size) / (kernel_size ** 2)

    def forward(self, x):
        num_filters, kernel_size, _ = self.filters.shape
        h, w = x.shape
        conv_out = np.zeros((h - kernel_size + 1, w - kernel_size + 1, num_filters))
        for i in range(h - kernel_size + 1):
            for j in range(w - kernel_size + 1):
                x_slice = x[i:i+kernel_size, j:j+kernel_size]
                conv_out[i, j] = np.sum(x_slice[:, :, np.newaxis] * self.filters, axis=(0, 1))
        return conv_out

class FlattenLayer:
    def forward(self, x):
        return x.reshape(-1)

class FullyConnectedLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size)
        self.bias = np.zeros((1, output_size))

    def forward(self, x):
        return np.dot(x, self.weights) + self.bias

# Hyperparameters
learning_rate = 0.01
num_epochs = 5

# Initialize CNN
model = ConvNet()

# Training loop
for epoch in range(num_epochs):
    total_loss = 0

    for i in range(len(train_images)):
        image = train_images[i]
        label = train_labels_one_hot[i]

        # Forward pass
        output_probs = model.forward(image)

        # Calculate loss (cross-entropy)
        loss = -np.sum(label * np.log(output_probs))
        total_loss += loss

        # Backpropagation (not implemented in this simplified example)

    average_loss = total_loss / len(train_images)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}")

# Testing (not implemented in this simplified example)
```

Please note that this code is a very basic and simplified version of a CNN and doesn't cover many important aspects such as backpropagation, pooling layers, advanced activation functions, regularization, optimization algorithms, and more. For a complete and efficient CNN implementation, consider using deep learning libraries like TensorFlow or PyTorch.

In [None]:
import numpy as np
import gzip

# Load MNIST dataset (same loading functions as before)
# ...

# Define activation functions and their derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

# Initialize CNN parameters
input_channels = 1
output_channels = 8
kernel_size = 3
pool_size = 2
num_classes = 10

# Hyperparameters
learning_rate = 0.01
num_epochs = 5
batch_size = 32
dropout_prob = 0.5
l2_reg_lambda = 0.001

# Initialize weights and biases
conv_weights = np.random.randn(output_channels, input_channels, kernel_size, kernel_size)
conv_bias = np.zeros((output_channels, 1))
fc_weights = np.random.randn(13*13*output_channels, num_classes)
fc_bias = np.zeros((num_classes, 1))

# Training loop
for epoch in range(num_epochs):
    total_loss = 0

    for i in range(0, len(train_images), batch_size):
        batch_images = train_images[i:i+batch_size]
        batch_labels = train_labels_one_hot[i:i+batch_size]

        # Forward pass
        conv_output = np.zeros((batch_size, 26, 26, output_channels))

        # Convolution layer
        for b in range(batch_size):
            for oc in range(output_channels):
                for ic in range(input_channels):
                    for r in range(26):
                        for c in range(26):
                            conv_output[b, r, c, oc] += np.sum(
                                batch_images[b, r:r+kernel_size, c:c+kernel_size, ic] * conv_weights[oc, ic]
                            ) + conv_bias[oc]

        # Apply ReLU activation
        conv_output = relu(conv_output)

        # Max Pooling layer
        pool_output = np.zeros((batch_size, 13, 13, output_channels))
        for b in range(batch_size):
            for oc in range(output_channels):
                for r in range(0, 26, pool_size):
                    for c in range(0, 26, pool_size):
                        pool_output[b, r//pool_size, c//pool_size, oc] = np.max(
                            conv_output[b, r:r+pool_size, c:c+pool_size, oc]
                        )

        # Flatten layer
        fc_input = pool_output.reshape(batch_size, -1)

        # Fully connected layer
        fc_output = np.dot(fc_input, fc_weights) + fc_bias

        # Apply softmax activation
        softmax_output = np.exp(fc_output - np.max(fc_output, axis=1, keepdims=True))
        softmax_output /= np.sum(softmax_output, axis=1, keepdims=True)

        # Calculate loss (cross-entropy + L2 regularization)
        loss = -np.sum(batch_labels * np.log(softmax_output)) + \
               0.5 * l2_reg_lambda * (np.sum(conv_weights**2) + np.sum(fc_weights**2))
        total_loss += loss

        # Backpropagation (gradient calculations and weight updates) - not implemented here

    average_loss = total_loss / (len(train_images) // batch_size)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}")

# Testing (calculate accuracy)
correct = 0
for i in range(0, len(test_images), batch_size):
    batch_images = test_images[i:i+batch_size]
    batch_labels = test_labels_one_hot[i:i+batch_size]

    # Forward pass (similar to training)
    # ...

    predicted_labels = np.argmax(softmax_output, axis=1)
    correct += np.sum(predicted_labels == test_labels[i:i+batch_size])

accuracy = correct / len(test_images)
print(f"Test Accuracy: {accuracy:.4f}")


In [None]:
# ... (previous code)

# Training loop
for epoch in range(num_epochs):
    total_loss = 0

    for i in range(0, len(train_images), batch_size):
        batch_images = train_images[i:i+batch_size]
        batch_labels = train_labels_one_hot[i:i+batch_size]

        # Forward pass (similar to previous code)
        # ...

        # Backpropagation
        delta_output = softmax_output - batch_labels

        # Fully connected layer
        delta_fc_weights = np.dot(fc_input.T, delta_output)
        delta_fc_bias = np.sum(delta_output, axis=0, keepdims=True)
        delta_fc_input = np.dot(delta_output, fc_weights.T)

        # Reshape delta_fc_input back to match pool_output shape
        delta_pool_output = delta_fc_input.reshape(batch_size, 13, 13, output_channels)

        # Backpropagate through max pooling (distribute gradients to max elements)
        delta_conv_output = np.zeros_like(conv_output)
        for b in range(batch_size):
            for oc in range(output_channels):
                for r in range(0, 26, pool_size):
                    for c in range(0, 26, pool_size):
                        max_indices = np.unravel_index(
                            np.argmax(conv_output[b, r:r+pool_size, c:c+pool_size, oc]),
                            (pool_size, pool_size)
                        )
                        delta_conv_output[b, r+max_indices[0], c+max_indices[1], oc] = \
                            delta_pool_output[b, r//pool_size, c//pool_size, oc]

        # Backpropagate through ReLU
        delta_relu = relu_derivative(conv_output) * delta_conv_output

        # Backpropagate through convolution
        delta_conv_weights = np.zeros_like(conv_weights)
        delta_conv_bias = np.sum(delta_relu, axis=(0, 1, 2), keepdims=True)
        for b in range(batch_size):
            for oc in range(output_channels):
                for ic in range(input_channels):
                    for r in range(26):
                        for c in range(26):
                            delta_conv_weights[oc, ic] += batch_images[b, r:r+kernel_size, c:c+kernel_size, ic] * delta_relu[b, r, c, oc]

        # Weight updates
        fc_weights -= learning_rate * (delta_fc_weights + l2_reg_lambda * fc_weights)
        fc_bias -= learning_rate * delta_fc_bias
        conv_weights -= learning_rate * (delta_conv_weights + l2_reg_lambda * conv_weights)
        conv_bias -= learning_rate * delta_conv_bias

        # Calculate loss (similar to previous code)
        # ...

    average_loss = total_loss / (len(train_images) // batch_size)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}")

# ... (testing code)


In [None]:
import numpy as np
import gzip

# Load MNIST dataset (same loading functions as before)
# ...

# Activation functions and their derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

class ConvLayer:
    def __init__(self, num_filters, kernel_size):
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.filters = np.random.randn(num_filters, kernel_size, kernel_size) / (kernel_size ** 2)

    def forward(self, input_data):
        # Convolution forward pass
        # ...
        return conv_output

    def backward(self, gradient_output):
        # Convolution backward pass
        # ...
        return delta_input

class MaxPoolingLayer:
    def __init__(self, pool_size):
        self.pool_size = pool_size

    def forward(self, input_data):
        # Max pooling forward pass
        # ...
        return pool_output

    def backward(self, gradient_output):
        # Max pooling backward pass
        # ...
        return delta_input

class FullyConnectedLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size)
        self.bias = np.zeros((1, output_size))

    def forward(self, input_data):
        # Fully connected forward pass
        # ...
        return fc_output

    def backward(self, gradient_output):
        # Fully connected backward pass
        # ...
        return delta_input

class CNN:
    def __init__(self):
        self.conv_layer = ConvLayer(num_filters=8, kernel_size=3)
        self.pool_layer = MaxPoolingLayer(pool_size=2)
        self.fc_layer = FullyConnectedLayer(13*13*8, num_classes)

    def forward(self, input_data):
        conv_out = self.conv_layer.forward(input_data)
        pool_out = self.pool_layer.forward(conv_out)
        flattened = pool_out.reshape(pool_out.shape[0], -1)
        output = self.fc_layer.forward(flattened)
        return output

    def backward(self, gradient_output):
        delta_fc = self.fc_layer.backward(gradient_output)
        delta_fc = delta_fc.reshape(delta_fc.shape[0], 13, 13, 8)
        delta_pool = self.pool_layer.backward(delta_fc)
        delta_conv = self.conv_layer.backward(delta_pool)
        return delta_conv

# Hyperparameters and training loop
# ...

# Initialize CNN
model = CNN()

# Training loop
# ...

# Testing and accuracy calculation
# ...



In [None]:
import numpy as np
import gzip

# Load MNIST dataset (same loading functions as before)
# ...

# Activation functions and their derivatives
# ...

# Defining the Adam optimizer
class AdamOptimizer:
    def __init__(self, parameters, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = [np.zeros_like(param) for param in parameters]
        self.v = [np.zeros_like(param) for param in parameters]
        self.t = 0

    def update(self, parameters, gradients):
        self.t += 1
        for i in range(len(parameters)):
            self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * gradients[i]
            self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * gradients[i]**2
            m_hat = self.m[i] / (1 - self.beta1**self.t)
            v_hat = self.v[i] / (1 - self.beta2**self.t)
            parameters[i] -= self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)

# Defining the CNN layers and architecture
# ...

# Hyperparameters
# ...

# Initialize CNN and Adam optimizer
model = CNN()
parameters = model.get_parameters()
optimizer = AdamOptimizer(parameters, learning_rate=learning_rate)

# Training loop
for epoch in range(num_epochs):
    total_loss = 0

    for i in range(0, len(train_images), batch_size):
        batch_images = train_images[i:i+batch_size]
        batch_labels = train_labels_one_hot[i:i+batch_size]

        # Forward pass
        # ...

        # Calculate loss (similar to previous code)
        # ...

        # Backpropagation
        delta_output = softmax_output - batch_labels

        # Backpropagate through layers
        delta_conv, delta_fc = model.backward(delta_output)

        # Calculate gradients
        fc_gradients = fc_input.T.dot(delta_output)
        fc_bias_gradients = np.sum(delta_output, axis=0, keepdims=True)

        # Update fully connected layer weights and biases
        gradients = [fc_gradients, fc_bias_gradients]
        optimizer.update(parameters, gradients)

        # ... (similar updates for convolutional layer)

        # Update total loss
        total_loss += loss

    average_loss = total_loss / (len(train_images) // batch_size)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}")

# ... (testing and accuracy calculation)


In [None]:
# ... (previous code)

# Training loop
for epoch in range(num_epochs):
    total_loss = 0

    for i in range(0, len(train_images), batch_size):
        # Forward pass
        # ...

        # Backpropagation
        # ...

        # Update fully connected layer weights and biases
        # ...

        # Update convolutional layer weights and biases
        # ...

        # Update total loss
        total_loss += loss

    average_loss = total_loss / (len(train_images) // batch_size)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}")

# Testing and accuracy calculation
correct = 0
total_samples = len(test_images)

for i in range(0, len(test_images), batch_size):
    batch_images = test_images[i:i+batch_size]
    batch_labels = test_labels_one_hot[i:i+batch_size]

    # Forward pass for testing
    batch_outputs = []
    for j in range(len(batch_images)):
        output = model.forward(batch_images[j])
        batch_outputs.append(output)
    batch_outputs = np.array(batch_outputs)

    # Calculate batch predictions
    batch_predictions = np.argmax(batch_outputs, axis=1)
    batch_true_labels = np.argmax(batch_labels, axis=1)

    # Update correct count
    correct += np.sum(batch_predictions == batch_true_labels)

# Calculate accuracy
accuracy = correct / total_samples
print(f"Test Accuracy: {accuracy:.4f}")
