In [1]:
# Import PyTorch

import torch
from torch import nn
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor

# Import matplotlib

import matplotlib.pyplot as plt


In [2]:
# Getting a dataset
train_data = datasets.FashionMNIST(
    root="data", # where to download data
    train=True,
    download=True, # download yes/no?
    transform=torchvision.transforms.ToTensor(), # transform on the data
    target_transform=None # how we want to transform the label
)

test_data = datasets.FashionMNIST(
    root="data", # where to download data
    train=False,
    download=True, # download yes/no?
    transform=torchvision.transforms.ToTensor(), # transform on the data
    target_transform=None # how we want to transform the label
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:08<00:00, 3242379.53it/s] 


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 300540.86it/s]


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 5452027.69it/s]


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 14779108.14it/s]


Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw



In [3]:
class_name = train_data.classes

In [4]:
# See the first training example
image, label = train_data[0]
class_to_idx = train_data.class_to_idx


In [5]:
# Check shape of image

print(f"Image Shape: {image.shape} -> ( colour channels, height, width) | Label: {label}")

Image Shape: torch.Size([1, 28, 28]) -> ( colour channels, height, width) | Label: 9


In [6]:
# # Plot more images
# torch.manual_seed(42)

# fig = plt.figure(figsize=(9,9))
# rows, cols = 4,4
# for i in range(1, rows*cols+1):
#     random_idx = torch.randint(0, len(train_data), size=[1]).item()
#     img, label = train_data[random_idx]
#     fig.add_subplot(rows, cols, i)
#     plt.imshow(img.squeeze(), cmap="gray")
#     plt.title(class_name[label])
#     plt.axis(False);

In [7]:
# Prepare DataLoader - turns our dataset into a python iterable. We want to turn our model into small batches.
from torch.utils.data import DataLoader
train_dataloader = DataLoader(
    dataset=train_data,
    batch_size=32,
# # Plot more images
# torch.manual_seed(42)

# fig = plt.figure(figsize=(9,9))
# rows, cols = 4,4
# for i in range(1, rows*cols+1):
#     random_idx = torch.randint(0, len(train_data), size=[1]).item()
#     img, label = train_data[random_idx]
#     fig.add_subplot(rows, cols, i)
#     plt.imshow(img.squeeze(), cmap="gray")

[ ]

    shuffle=True
)

test_dataloader = DataLoader(
    dataset=test_data,
    batch_size=32,
    shuffle=False
)

In [8]:
train_features_batch, train_labels_batch = next(iter(train_dataloader))
train_features_batch.shape, train_labels_batch.shape

(torch.Size([32, 1, 28, 28]), torch.Size([32]))

In [9]:
# # Show a sample
# torch.manual_seed(42)
# random_idx = torch.randint(0, len(train_features_batch), size=[1]).item()
# img, label = train_features_batch[random_idx], train_labels_batch[random_idx]
# plt.imshow(img.squeeze(), cmap="gray")
# plt.title(class_name[label])
# plt.axis(False);

In [10]:
flatten_model = nn.Flatten()

# Get a single sample
x = train_features_batch[0]

output = flatten_model(x)
output.shape

torch.Size([1, 784])

In [11]:
from torch import nn
class FashionMNISTModelV0(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.Linear(in_features=hidden_units, out_features=output_shape)

        )

    def forward(self, x):
        return self.layer_stack(x)

In [12]:
torch.manual_seed(42)

# Setup model with input parameters
model_0 = FashionMNISTModelV0(input_shape=784,
                              hidden_units=10,
                              output_shape=len(class_name))

model_0.to('cpu')

FashionMNISTModelV0(
  (layer_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=10, bias=True)
  )
)

In [13]:
dummy_x = torch.rand([1,1,28,28])
model_0(dummy_x)

tensor([[-0.0315,  0.3171,  0.0531, -0.2525,  0.5959,  0.2112,  0.3233,  0.2694,
         -0.1004,  0.0157]], grad_fn=<AddmmBackward0>)

In [14]:
# Defining Loss function and Optimizer

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)
def accuracy_fn(predictions, labels):
    correct = torch.eq(predictions, labels).sum().item()
    acc = (correct / len(predictions)) * 100
    return acc

In [15]:
# Creating a function to time our expirements

from timeit import default_timer as timer
def print_train_time(start: float,
                     end: float,
                     device: torch.device=None):
    """
    Prints difference betweens start and end time
    """
    total_time = end - start
    print(f"Total time on device {device}: {total_time} seconds")
    return total_time



In [16]:
start_time = timer()

end_time = timer()

print(print_train_time(start_time, end_time))

Total time on device None: 6.290900000749389e-05 seconds
6.290900000749389e-05


In [17]:
import torch


In [19]:
# Creating a training loop
# Loop through epochs, loop through training batches, perform training steps, calculate train loss per batch
# Import tqdm
from tqdm.auto import tqdm

# Set the seed and start the timer
torch.manual_seed(42)
train_time_start_on_cpu = timer()

epochs = 3

# Create training and test loop
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n----------")

    train_loss = 0
    # Add a loop
    for batch, (X,y) in enumerate(train_dataloader):
        print(f"Batch {batch} out of {len(train_dataloader)}")
        model_0.train()
        # 1. Forward Pass
        y_pred = model_0(X)

        # 2. Calculate loss (per batch)
        loss = loss_fn(y_pred, y)
        train_loss += loss

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        if batch % 400 == 0:
            print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples.")

        # Divide total train loss by length of train dataloader
        train_loss /= len(train_dataloader)

        # Testing
        test_loss, test_acc = 0, 0
        model_0.eval()
        with torch.inference_mode():
            for X_test, y_test in test_dataloader:

                # 1. Forward Pass
                test_pred = model_0(X_test)

                # 2. Calculate loss
                test_loss += loss_fn(test_pred, y_test)

                # 3. Calculate accuracy
                test_acc += accuracy_fn(labels=y_test, predictions=test_pred.argmax(dim=1))

            # Calculate avg test loss per batch
            test_loss /= len(test_dataloader)

            # calculate the test acc per batch
            test_acc /= len(test_dataloader)
print(f"\nTrain Loss: {train_data: .4f} | Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

train_time_end_on_cpu = timer()
total_train_time_model_0 = print_train_time(start=train_time_start_on_cpu, end=train_time_end_on_cpu, device=next(model_0.parameters()).device)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
----------
Batch 0 out of 1875
Looked at 0/60000 samples.
Batch 1 out of 1875
Batch 2 out of 1875
Batch 3 out of 1875
Batch 4 out of 1875
Batch 5 out of 1875
Batch 6 out of 1875
Batch 7 out of 1875
Batch 8 out of 1875
Batch 9 out of 1875
Batch 10 out of 1875
Batch 11 out of 1875
Batch 12 out of 1875
Batch 13 out of 1875
Batch 14 out of 1875
Batch 15 out of 1875
Batch 16 out of 1875
Batch 17 out of 1875
Batch 18 out of 1875
Batch 19 out of 1875
Batch 20 out of 1875
Batch 21 out of 1875


KeyboardInterrupt: 