In [None]:
import numpy as np
import keras
from keras import layers
import tensorflow as tf

tf.config.list_physical_devices("GPU")
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices("GPU")))

##### Load dataset MNIST

In [12]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


##### Build model

In [13]:
model = keras.Sequential([
    keras.Input(shape=input_shape),
    layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation="softmax"),
])

model.summary()

##### Training

In [None]:
from keras import optimizers

batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(learning_rate=-1e9), metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/15


2025-05-09 22:00:44.889886: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 19ms/step - accuracy: 0.7571 - loss: 0.7832 - val_accuracy: 0.9762 - val_loss: 0.0850
Epoch 2/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - accuracy: 0.9639 - loss: 0.1241 - val_accuracy: 0.9840 - val_loss: 0.0599
Epoch 3/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - accuracy: 0.9729 - loss: 0.0853 - val_accuracy: 0.9872 - val_loss: 0.0466
Epoch 4/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - accuracy: 0.9774 - loss: 0.0725 - val_accuracy: 0.9893 - val_loss: 0.0430
Epoch 5/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 18ms/step - accuracy: 0.9820 - loss: 0.0613 - val_accuracy: 0.9892 - val_loss: 0.0400
Epoch 6/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 18ms/step - accuracy: 0.9820 - loss: 0.0570 - val_accuracy: 0.9893 - val_loss: 0.0382
Epoch 7/15
[1m422/422[0m [32m━

<keras.src.callbacks.history.History at 0x108721270>

##### Evaluate the trained model

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.026239627972245216
Test accuracy: 0.9909999966621399


#### Build CNN with PyTorch

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchinfo import summary

# Define relevant variables for the ML task
batch_size = 64
num_classes = 10
learning_rate = 0.001
num_epochs = 20

# Device will determine whether to run the training on GPU or CPU.
device = torch.device("mps" if torch.mps.is_available() else "cpu")
device

device(type='mps')

##### Load dataset MNIST

In [None]:
all_transforms = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

# Create Training dataset
train_dataset = torchvision.datasets.MNIST(
    root = './data',
    train = True,
    transform = all_transforms,
    download = True
)

# Create Testing dataset
test_dataset = torchvision.datasets.MNIST(
    root = './data',
    train = False,
    transform = all_transforms,
    download=True
)

# Instantiate loader objects to facilitate processing
train_loader = torch.utils.data.DataLoader(
    dataset = train_dataset,
    batch_size = batch_size,
    shuffle = True
)

test_loader = torch.utils.data.DataLoader(
    dataset = test_dataset,
    batch_size = batch_size,
    shuffle = True
)

print(train_loader.dataset.data.shape)
print(test_loader.dataset.data.shape)

torch.Size([60000, 28, 28])
torch.Size([10000, 28, 28])


##### Build Model

In [12]:
# Creating a CNN class
class ConvNeuralNet(nn.Module):
#  Determine what layers and their order in CNN object 
    def __init__(self, num_classes):
        super(ConvNeuralNet, self).__init__()

        # X:  torch.Size([64, 1, 32, 32])
        # X1:  t`orch.Size([64, 32, 30, 30])
        # X2:  torch.Size([64, 32, 15, 15])
        # X3:  torch.Size([64, 64, 6, 6])
        # X4:  torch.Size([64, 2304])

        # torch.Size([64, 3, 32, 32])
        self.conv_layer1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
        # => torch.Size([64, 32, 30, 30])
        self.relu1 = nn.ReLU()
        self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        # => torch.Size([64, 32, 15, 15])

        self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        # => torch.Size([64, 64, 13, 13])
        self.relu2 = nn.ReLU()
        self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2) 
        # => torch.Size([64, 64, 6, 6])

        self.flatten = nn.Flatten() # => torch.Size([64, 2304])
        self.dropout = nn.Dropout(0.5)

        self.fc = nn.Linear(in_features=2304, out_features=num_classes)
    
    # Progresses data across layers    
    def forward(self, x):
        # Stage 1
        out = self.conv_layer1(x)

        out = self.relu1(out)
        out = self.max_pool1(out)
        # Stage 2
        out = self.conv_layer2(out)
        out = self.relu2(out)
        out = self.max_pool2(out)
        # Stage 3

        out = self.flatten(out)
        out = self.dropout(out)
        out = self.fc(out)
        return out

In [13]:
model = ConvNeuralNet(num_classes).to(device)

# Set Loss function with criterion
criterion = nn.CrossEntropyLoss()

# Set optimizer with optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)  
total_step = len(train_loader)

# We use the pre-defined number of epochs to determine how many iterations to train the network on
for epoch in range(num_epochs):
# Load in the data in batches using the train_loader object
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

Epoch [1/20], Loss: 0.3703
Epoch [2/20], Loss: 0.2329
Epoch [3/20], Loss: 0.1238
Epoch [4/20], Loss: 0.1052
Epoch [5/20], Loss: 0.4695
Epoch [6/20], Loss: 0.0552
Epoch [7/20], Loss: 0.0549
Epoch [8/20], Loss: 0.0903
Epoch [9/20], Loss: 0.2304
Epoch [10/20], Loss: 0.0916
Epoch [11/20], Loss: 0.2033
Epoch [12/20], Loss: 0.3862
Epoch [13/20], Loss: 0.0627
Epoch [14/20], Loss: 0.2978
Epoch [15/20], Loss: 0.0949
Epoch [16/20], Loss: 0.1539
Epoch [17/20], Loss: 0.0488
Epoch [18/20], Loss: 0.2172
Epoch [19/20], Loss: 0.0878
Epoch [20/20], Loss: 0.0579


##### Testing

In [14]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    print('Accuracy of the network on the {} train images: {} %'.format(50000, 100 * correct / total))

Accuracy of the network on the 50000 train images: 97.25833333333334 %
