In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [2]:
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(mean=0.1736, std=0.3317)
    ]
)

In [3]:
train_dataset = torchvision.datasets.EMNIST(
    root='./data', train=True, download=True, transform=transform, split='letters'
)
test_dataset = torchvision.datasets.EMNIST(
    root='./data', train=False, download=True, transform=transform, split='letters'
)

In [4]:
# data loader
# shuffle will prevent the model from memorizing the data
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# we don't want to shuffle since model is done learning
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

In [5]:
# we will not use a custom model

class EMNISTClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        # 64, 1, 28, 28 tensor. '64' images '1' is grayscale
        # linear vectors wants a single long list of vector
        # 28*28 = 784
        self.flatten = nn.Flatten() 
        self.layers = nn.Sequential(
            nn.Linear(784, 128),
            nn.ReLU(),
            nn.Linear(128, 26)
        )

    def forward(self, x):
            x = self.flatten(x) #flatten the data
            x = self.layers(x) # pass it through layers
            return x
            

In [6]:
# device selection
device = torch.device("mps")
print(f"Using {device}")

Using mps


In [7]:
# initialize model and move it to devic
model = EMNISTClassifier().to(device)

In [8]:
# loss function
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [9]:
def train_epoch(
    model,
    train_loader,    
    loss_function,
    optimizer,
    device
):

    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()

        # track progress
        running_loss += loss.item()
        _, predicted = output.max(1) # what is this doing?
        total += target.size(0)
        correct += predicted.eq(target).sum().item()

        if batch_idx % 100 == 0 and batch_idx > 0:
            avg_loss = running_loss / 100
            accuracy = 100. * correct / total
            print(f'\t[{batch_idx * 64}/ 60000] '
                  f'Loss: {avg_loss:.3f} | Accuracy: {accuracy:.1f}%')
            running_loss = 0.0
        
        

In [10]:
# now evaluate the model
def evaluate(
    model,
    test_loader,
    device
):
    # set to eval mode
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        return 100.0 * correct / total

In [11]:
num_epochs = 10
for epoch in range(num_epochs):
    print(f'\nEpoch: {epoch + 1}')
    train_epoch(
        model,
        train_loader,
        loss_function,
        optimizer,
        device    
    )
    accuracy = evaluate(
        model,
        train_loader,
        device
    )
    print(f'Test Accuracy: {accuracy:.2f}%')


Epoch: 1
	[6400/ 60000] Loss: 1.620 | Accuracy: 49.6%
	[12800/ 60000] Loss: 1.050 | Accuracy: 57.3%
	[19200/ 60000] Loss: 0.927 | Accuracy: 60.8%
	[25600/ 60000] Loss: 0.874 | Accuracy: 63.3%
	[32000/ 60000] Loss: 0.780 | Accuracy: 65.2%
	[38400/ 60000] Loss: 0.739 | Accuracy: 66.6%
	[44800/ 60000] Loss: 0.668 | Accuracy: 67.9%
	[51200/ 60000] Loss: 0.620 | Accuracy: 69.0%
	[57600/ 60000] Loss: 0.629 | Accuracy: 69.9%
	[64000/ 60000] Loss: 0.592 | Accuracy: 70.7%
	[70400/ 60000] Loss: 0.547 | Accuracy: 71.5%
	[76800/ 60000] Loss: 0.551 | Accuracy: 72.2%
	[83200/ 60000] Loss: 0.559 | Accuracy: 72.8%
	[89600/ 60000] Loss: 0.523 | Accuracy: 73.3%
	[96000/ 60000] Loss: 0.496 | Accuracy: 73.9%
	[102400/ 60000] Loss: 0.478 | Accuracy: 74.3%
	[108800/ 60000] Loss: 0.502 | Accuracy: 74.7%
	[115200/ 60000] Loss: 0.449 | Accuracy: 75.2%
	[121600/ 60000] Loss: 0.472 | Accuracy: 75.5%
Test Accuracy: 83.47%

Epoch: 2
	[6400/ 60000] Loss: 0.436 | Accuracy: 82.9%
	[12800/ 60000] Loss: 0.417 | Accura

## How to see what's inside the mode?

In [12]:
print(model.parameters())

<generator object Module.parameters at 0x10f1c25e0>


In [15]:
for param in model.parameters():
    print(param.shape)

torch.Size([128, 784])
torch.Size([128])
torch.Size([26, 128])
torch.Size([26])


In [17]:
# how to get the total parameters
total_params = sum(param.numel() for param in model.parameters())
print(f"Total parameters: {total_params}")

Total parameters: 103834


In [18]:
# named parameters to understand how each layer looks like.
for name, param in model.named_parameters():
    print(f"{name}: {param.shape}")

layers.0.weight: torch.Size([128, 784])
layers.0.bias: torch.Size([128])
layers.2.weight: torch.Size([26, 128])
layers.2.bias: torch.Size([26])


PyTorch allows for `children` and `modules` for the parameters. Think of the model of a folder structure. `children` will just show the first level. `module` will show everything.