In [1]:
import torch
from torch import nn
from torchvision.datasets import MNIST

In [2]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)) # The values 0.1307 and 0.3081 used for the Normalize() transformation are the global mean and standard deviation of the MNIST dataset.
    ])

In [3]:
# choose the training dataset
train_data = MNIST(root='data', train=True,
                                   download=True, 
                                   transform=transform)

In [4]:
#size of train dataset
len(train_data)

60000

In [5]:
batch_size = 2048

In [6]:
import numpy as np
from torch.utils.data import DataLoader

# data loader preparation
train_loader = DataLoader(train_data, batch_size=batch_size)

In [9]:
from torch import nn, optim
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 56)
        self.fc5 = nn.Linear(56, 10)
        
    def forward(self, x):
        # input tensor is flattened 
        x = x.view(x.shape[0], -1)
        
        # without applying dropout
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        
        x = self.fc5(x)
        
        return x

### Training with MacBook GPU - Metal Performance Shaders (MPS)

In [10]:
# Creating a device object 
device = torch.device("mps") if torch.mps.is_available() else torch.device("cpu")
print(device)

mps


In [11]:
model = Model().to(device)

# Loss Function and Optimizer

criterion = nn.CrossEntropyLoss()

from torch import optim

optimizer = optim.SGD(model.parameters(), lr=0.01)

In [12]:
%%time
for epoch in range(1, 16): ## run the model for 15 epochs
    train_loss = []
    ## training part 
    model.train()
    for data, target in train_loader:
        
        # Move input and label tensors to the avialable device
        data, target = data.to(device), target.to(device)
        
        #Reshaping the input data before sending into the model
        data = data.view(data.shape[0], -1)
        
        optimizer.zero_grad()
        
        ## 1. forward propagation
        output = model(data)
        
        ## 2. loss calculation
        loss = criterion(output, target)
        
        ## 3. backward propagation
        loss.backward()
        
        ## 4. weight optimization
        optimizer.step()
        
        train_loss.append(loss.item())
        
    print ("Epoch:", epoch, "Training Loss: ", np.mean(train_loss))

Epoch: 1 Training Loss:  2.302324406305949
Epoch: 2 Training Loss:  2.3001217285792035
Epoch: 3 Training Loss:  2.297734332084656
Epoch: 4 Training Loss:  2.2951090733210244
Epoch: 5 Training Loss:  2.292158524195353
Epoch: 6 Training Loss:  2.2887542883555096
Epoch: 7 Training Loss:  2.284791620572408
Epoch: 8 Training Loss:  2.2801591555277505
Epoch: 9 Training Loss:  2.274731167157491
Epoch: 10 Training Loss:  2.2682989676793417
Epoch: 11 Training Loss:  2.2605528195699054
Epoch: 12 Training Loss:  2.2510716756184896
Epoch: 13 Training Loss:  2.2391775925954183
Epoch: 14 Training Loss:  2.223794468243917
Epoch: 15 Training Loss:  2.2034788688023883
CPU times: user 24.4 s, sys: 619 ms, total: 25 s
Wall time: 31.5 s
