In [1]:
import numpy as np  
import matplotlib.pyplot as plt  
from sklearn.model_selection import train_test_split  
from sklearn.metrics import accuracy_score  
from sklearn.utils import shuffle  

# Load MNIST dataset  
from tensorflow.keras.datasets import mnist  
(X_train, y_train), (X_test, y_test) = mnist.load_data()  

# Reshape and normalize data  
X_train = X_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0  
X_test = X_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0  

# Split data into training and validation sets  
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)  

# Define ReLU activation function  
def relu(x):  
    return np.maximum(x, 0)  

# Define softmax activation function  
def softmax(x):  
    return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)  

# Define convolution operation  
def convolve(x, weights):  
    output = np.zeros((x.shape[0] - weights.shape[0] + 1, x.shape[1] - weights.shape[1] + 1))  
    for i in range(output.shape[0]):  
        for j in range(output.shape[1]):  
            output[i, j] = np.sum(x[i:i+weights.shape[0], j:j+weights.shape[1]] * weights)  
    return output  

# Define max pooling operation  
def max_pool(x, size):  
    output = np.zeros((x.shape[0] // size, x.shape[1] // size))  
    for i in range(output.shape[0]):  
        for j in range(output.shape[1]):  
            output[i, j] = np.max(x[i*size:(i+1)*size, j*size:(j+1)*size])  
    return output  

# Define flatten operation  
def flatten(x):  
    return x.reshape(x.shape[0], -1)  

# Define dense layer operation  
def dense(x, weights, bias):  
    return np.dot(x, weights) + bias  

# Initialize weights and biases  
weights_conv1 = np.random.rand(3, 3, 1, 32)  
weights_conv2 = np.random.rand(3, 3, 32, 64)  
weights_dense1 = np.random.rand(7*7*64, 128)  
weights_dense2 = np.random.rand(128, 10)  
bias_conv1 = np.zeros((32,))  
bias_conv2 = np.zeros((64,))  
bias_dense1 = np.zeros((128,))  
bias_dense2 = np.zeros((10,))  

# Train the model  
for epoch in range(10):  
    X_train, y_train = shuffle(X_train, y_train)  
    for i in range(0, len(X_train), 32):  
        # Forward pass  
        x = X_train[i:i+32]  
        conv1 = relu(convolve(x, weights_conv1))  
        pool1 = max_pool(conv1, 2)  
        conv2 = relu(convolve(pool1, weights_conv2))  
        pool2 = max_pool(conv2, 2)  
        flat = flatten(pool2)  
        dense1 = relu(dense(flat, weights_dense1, bias_dense1))  
        output = softmax(dense(dense1, weights_dense2, bias_dense2))  

        # Backward pass  
        loss = -np.mean(np.log(output[np.arange(32), y_train[i:i+32]]))  
        d_output = output  
        d_output[np.arange(32), y_train[i:i+32]] -= 1  
        d_dense2 = d_output  
        d_dense1 = np.dot(d_dense2, weights_dense2.T)  
        d_flat = np.dot(d_dense1, weights_dense1.T)  
        d_pool2 = d_flat.reshape(-1, 7, 7, 64)  
        d_conv2 = np.zeros((d_pool2.shape[0], 14, 14, 64))  
        for j in range(d_pool2.shape[1]):  
            for k in range(d_pool2.shape[2]):  
                d_conv2[:, j*2:j*2+2, k*2:k*2+2, :] += d_pool2[:, j, k, :]  
        d_conv2 = d_conv2[:, 1:-1, 1:-1, :]  
        d_conv2 = np.multiply(d_conv2, np.where(conv2 > 0, 1, 0))  
        d_weights_conv2 = np.zeros((3, 3, 32, 64))  
        for j in range(d_conv2.shape[1]):  
            for k in range(d_conv2.shape[2]):  
                d_weights_conv2 += np.dot(d_conv2[:, j:j+3, k:k+3, :].reshape(-1, 1), pool1[:, j:j+3, k:k+3, :].reshape(-1, 1).T)  
        d_pool1 = np.zeros((d_conv2.shape[0], 28, 28, 32))  
        for j in range(d_conv2.shape[1]):  
            for k in range(d_conv2.shape[2]):  
                d_pool1[:, j*2:j*2+2, k*2:k*2+2, :] += d_conv2[:, j, k, :]  
        d_conv1 = np.zeros((d_pool1.shape[0], 30, 30, 32))  
        for j in range(d_pool1.shape[1]):  
            for k in range(d_pool1.shape[2]):  
                d_conv1[:, j:j+3, k:k+3, :] += d_pool1[:, j, k, :]  
        d_conv1 = d_conv1[:, 1:-1, 1:-1, :]  
        d_conv1 = np.multiply(d_conv1, np.where(conv1 > 0, 1, 0))  
        d_weights_conv1 = np.zeros((3, 3, 1, 32))  
        for j in range(d_conv1.shape[1]):  
            for k in range(d_conv1.shape[2]):  
                d_weights_conv1 += np.dot(d_conv1[:, j:j+3, k:k+3, :].reshape(-1, 1), x[:, j:j+3, k:k+3, :].reshape(-1, 1).T)  

        # Weight update  
        weights_conv1 -= 0.01 * d_weights_conv1  
        weights_conv2 -= 0.01 * d_weights_conv2  
        weights_dense1 -= 0.01 * np.dot(flat.T, d_dense1)  
        weights_dense2 -= 0.01 * np.dot(dense1.T, d_dense2)  
        bias_conv1 -= 0.01 * np.sum(d_conv1, axis=(0, 1, 2))  
        bias_conv2 -= 0.01 * np.sum(d_conv2, axis=(0, 1, 2))  
        bias_dense1 -= 0.01 * np.sum(d_dense1, axis=0)  
        bias_dense2 -= 0.01 * np.sum(d_dense2, axis=0)  

    print('Epoch', epoch+1, 'Loss:', loss)  

# Evaluate the model  
conv1 = relu(convolve(X_val, weights_conv1))  
pool1 = max_pool(conv1, 2)  
conv2 = relu(convolve(pool1, weights_conv2))  
pool2 = max_pool(conv2, 2)  
flat = flatten(pool2)  
dense1 = relu(dense(flat, weights_dense1, bias_dense1))  
output = softmax(dense(dense1, weights_dense2, bias_dense2))  
print('Validation Accuracy:', accuracy_score(y_val, np.argmax(output, axis=1)))

ValueError: operands could not be broadcast together with shapes (3,3) (3,3,32,64) 