## Loaded all the useful libraries

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torch
from torch.utils.data import TensorDataset, DataLoader

## Loaded Dataset and Preprocessing  step

In [None]:
# Load MNIST Dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.values.astype(np.float32).reshape(-1, 1, 28, 28) / 255.0
y = mnist.target.astype(int).values

# One-hot encode labels
encoder = OneHotEncoder(sparse_output=False)
y_encoded = encoder.fit_transform(y.reshape(-1, 1))

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)  

## Build the CNN Components

In [2]:
class Conv2D:
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        self.stride = stride
        self.padding = padding
        self.k = kernel_size
        self.W = np.random.randn(out_channels, in_channels, kernel_size, kernel_size) * 0.1
        self.b = np.zeros((out_channels, 1))
    
    def forward(self, x):
        self.x = x
        N, C, H, W = x.shape
        F, _, k, _ = self.W.shape
        out_h = (H + 2 * self.padding - k) // self.stride + 1
        out_w = (W + 2 * self.padding - k) // self.stride + 1
        out = np.zeros((N, F, out_h, out_w))

        x_padded = np.pad(x, ((0,0), (0,0), (self.padding, self.padding), (self.padding, self.padding)))
        
        for n in range(N):
            for f in range(F):
                for i in range(out_h):
                    for j in range(out_w):
                        h_start = i * self.stride
                        w_start = j * self.stride
                        region = x_padded[n, :, h_start:h_start+k, w_start:w_start+k]
                        out[n, f, i, j] = np.sum(region * self.W[f]) + self.b[f]
        
        return out


## ReLU Activation and Max Pooling

In [None]:
class ReLU:
    def forward(self, x):
        self.x = x
        return np.maximum(0, x)
    

class MaxPool2D:
    def __init__(self, size, stride):
        self.size = size
        self.stride = stride

    def forward(self, x):
        N, C, H, W = x.shape
        out_h = (H - self.size) // self.stride + 1
        out_w = (W - self.size) // self.stride + 1
        out = np.zeros((N, C, out_h, out_w))
        
        for n in range(N):
            for c in range(C):
                for i in range(out_h):
                    for j in range(out_w):
                        h_start = i * self.stride
                        w_start = j * self.stride
                        region = x[n, c, h_start:h_start+self.size, w_start:w_start+self.size]
                        out[n, c, i, j] = np.max(region)
        
        return out


## Dense

In [6]:
class Dense:
    def __init__(self, in_features, out_features):
        self.W = np.random.randn(in_features, out_features) * 0.1
        self.b = np.zeros((1, out_features))
    
    def forward(self, x):
        self.x = x
        return x @ self.W + self.b


In [None]:
class Flatten:
    def forward(self, x):
        self.x_shape = x.shape
        return x.reshape(x.shape[0], -1)

In [7]:
class SoftmaxCrossEntropy:
    def forward(self, logits, labels):
        exps = np.exp(logits - np.max(logits, axis=1, keepdims=True))
        self.probs = exps / np.sum(exps, axis=1, keepdims=True)
        self.labels = labels
        loss = -np.sum(labels * np.log(self.probs + 1e-9)) / logits.shape[0]
        return loss

    def backward(self):
        return (self.probs - self.labels) / self.labels.shape[0]


## ConvNet

In [11]:
conv = Conv2D(1, 8, 3, stride=1, padding=1)
relu = ReLU()
pool = MaxPool2D(2, 2)
flatten = Flatten()
dense = Dense(14*14*8, 10)
loss_fn = SoftmaxCrossEntropy()


## Forward Pass

In [12]:
def forward_pass(x):
    out = conv.forward(x)
    out = relu.forward(out)
    out = pool.forward(out)
    out = flatten.forward(out)
    out = dense.forward(out)
    return out

# Run one forward pass for demo
logits = forward_pass(X_train[:32])
loss = loss_fn.forward(logits, y_train[:32])
print(f"Loss (first batch): {loss}")


  out[n, f, i, j] = np.sum(region * self.W[f]) + self.b[f]


Loss (first batch): 2.3387095988836353


## Applying ConvNet by using Pytroch

In [None]:

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Conv2d(1, 8, 3, padding=1)
        self.pool = nn.MaxPool2d(2)
        self.fc = nn.Linear(14*14*8, 10)
    
    def forward(self, x):
        x = torch.relu(self.conv(x))
        x = self.pool(x)
        x = x.view(-1, 14*14*8)
        return self.fc(x)

model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())


## Accuracy by using Numpy CNN without any training 

In [15]:
def accuracy(logits, labels):
    preds = np.argmax(logits, axis=1)
    targets = np.argmax(labels, axis=1)
    return np.mean(preds == targets)

test_logits = forward_pass(X_test)
test_acc = accuracy(test_logits, y_test)
print("NumPy CNN Test Accuracy:", test_acc * 100)


  out[n, f, i, j] = np.sum(region * self.W[f]) + self.b[f]


NumPy CNN Test Accuracy: 10.335714285714285


## Accuracy by using Pytorch CNN without any training 

In [None]:

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(np.argmax(y_test, axis=1), dtype=torch.long)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

def evaluate(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            outputs = model(data)
            _, preds = torch.max(outputs, 1)
            correct += (preds == target).sum().item()
            total += target.size(0)
    return 100 * correct / total

test_acc = evaluate(model, test_loader)
print("PyTorch CNN Test Accuracy:", test_acc)


PyTorch CNN Test Accuracy: 10.285714285714286



## Conclusion

The comparison below demonstrates the behavior of a Convolutional Neural Network (CNN) implemented using both **NumPy** and **PyTorch**, evaluated **without any training**.

## Test Accuracy Results

- **NumPy CNN Accuracy:** 10.33%
- **PyTorch CNN Accuracy:** 10.28%

The minor difference in accuracy (~0.05%) is negligible. Both models have randomly initialized weights, so for a 10-class classification task, the expected accuracy due to random guessing is close to **10%**.

## Execution Time Comparison

- **NumPy:** ~12 minutes and 50 seconds  
- **PyTorch:** 1.1 seconds  

This significant difference highlights a major advantage of PyTorch:

> **PyTorch is highly optimized for performance and can leverage GPU acceleration, whereas NumPy is restricted to CPU-based operations and lacks deep learning-specific optimizations.**

## Key Takeaways

- Without training, both CNNs perform close to chance level (≈10%).
- PyTorch is **much faster** during evaluation and suitable for real-world, large-scale problems.
- NumPy is ideal for educational purposes and understanding the inner workings of CNNs.
- For practical deployment, **PyTorch is preferred** due to its performance, scalability, and support for hardware acceleration.

## Summary Table

| Framework | Accuracy (%) | Evaluation Time |
|-----------|---------------|-----------------|
| NumPy     | 10.33         | 12m 50s         |
| PyTorch   | 10.28         | 1.1s            |

*Table: Comparison of NumPy and PyTorch CNN evaluation without training*
