In [None]:
# Import PyTorch
import torch
from torch import nn

# Import torchvision
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor

# Import matplotlib for visualization
import matplotlib.pyplot as plt

# Check version
print(f"PyTorch version: {torch.__version__}\ntorchvision version: {torchvision.__version__}")

# Device agnostic
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [None]:
image, label = train_data[0]
print(f"image shape: {image.shape}, label shape: {label}")
print(f"Train data length: {len(train_data.data)}, Train targets length: {len(train_data.targets)}")
print(f"Test data length: {len(test_data.data)}, Test targets length: {len(test_data.targets)}")
class_names = train_data.classes
print(f"Class names: {class_names}")
plt.imshow(image.permute(1,2,0), cmap='gray')
plt.title(class_names[label]);

In [None]:
# Plot more images
torch.manual_seed(42)
fig = plt.figure(figsize=(9,9))
rows, cols = 4, 4
for i in range(1, rows * cols + 1):
  random_idx = torch.randint(0, len(train_data), size=[1]).item()
  img, label = train_data[random_idx]
  fig.add_subplot(rows, cols, i)
  plt.imshow(img.squeeze(), cmap='gray')
  plt.title(class_names[label])
  plt.axis(False);

In [None]:
from torch.utils.data import DataLoader

# Setup the batch size hyperparameter
BATCH_SIZE = 32

# Turn datasets into iterables (batches)
train_dataloader = DataLoader(
    train_data,
    batch_size=BATCH_SIZE,
    shuffle=True
)

test_dataloader = DataLoader(
    test_data,
    batch_size=BATCH_SIZE,
    shuffle=False
)

# Let's check out what we've created
print(f"Dataloaders: {train_dataloader, test_dataloader}")
print(f"Length of train dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}")
print(f"Length of test dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}")

# Checkout what's inside the training dataloader
train_features_batch, train_labels_batch = next(iter(train_dataloader))
print(f"Features batch shape: {train_features_batch.shape}, Labels batch shape: {train_labels_batch.shape}")

# And we can see that the data remains unchanged by checking a single sample
# Show a sample
torch.manual_seed(42)
random_idx = torch.randint(0, len(train_features_batch), size=[1]).item()
img, label = train_features_batch[random_idx], train_labels_batch[random_idx]
plt.imshow(img.permute(1,2,0), cmap='gray')
plt.title(class_names[label])
plt.axis("Off");
print(f"Image size: {img.shape}")
print(f"Label: {label}, label size: {label.shape}")

In [None]:
### Base Model
#create a flatten layer
flatten_model = nn.Flatten() # all nn modules funciton as a model (can do a forward pass)

# get a single sample
x = train_features_batch[0]

# flatten the sample
output = flatten_model(x)

# Print out what happened
print(f"Shape before flattening: {x.shape} -> [color_channels, height, width]")
print(f"Shape after flattening: {output.shape} -> [color_channels, height*width]")

#print(x)
#print(output)

In [None]:
class FashionMNISTModelV0(nn.Module):
  def __init__(self, in_shape: int, hidden_units: int, out_shape: int):
    super().__init__()
    self.layer_stack = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=in_shape, out_features=hidden_units),
        nn.Linear(in_features=hidden_units, out_features=out_shape)
    )

  def forward(self, x):
    return self.layer_stack(x)

# Create a model with non-linear and linear layers
class FashionMNISTModelV1(nn.Module):
  def __init__(self, in_shape: int, hidden_units: int, out_shape: int):
    super().__init__()
    self.layer_stack = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=in_shape, out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units, out_features=out_shape),
        nn.ReLU()
    )

  def forward(self, x: torch.Tensor):
    return self.layer_stack(x)

# Create a convolutional neural network
class FashionMNISTModelV2(nn.Module):
  def __init__(self, in_shape: int, hidden_units: int, out_shape: int):
    super().__init__()
    self.block1 = nn.Sequential(
        nn.Conv2d(in_channels=in_shape,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2,
                     stride=2)
    )
    self.block2 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2,
                     stride=2)
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_units*7*7,
                  out_features=out_shape)
    )

  def forward(self, x: torch.Tensor):
    return self.classifier(self.block2(self.block1(x)))

In [None]:
torch.manual_seed(42)

model_0 = FashionMNISTModelV0(in_shape=784,
                              hidden_units=10,
                              out_shape=len(class_names))
model_1 = FashionMNISTModelV1(in_shape=784,
                              hidden_units=10,
                              out_shape=len(class_names))
model_2 = FashionMNISTModelV2(in_shape=1,
                              hidden_units=10,
                              out_shape=len(class_names))
print(model_0.to(device))
print(f"Model on Device: {next(model_0.parameters()).device}")
print(model_1.to(device))
print(f"Model on Device: {next(model_1.parameters()).device}")
print(model_2.to(device))
print(f"Model on Device: {next(model_2.parameters()).device}")

In [None]:
import time
def print_train_time(start: float, end: float, device=None):
  total_time = end - start
  print(f"Train time on {device}: {total_time:.3f} seconds")
  return total_time

In [None]:
from tqdm.auto import tqdm

# Set the seed and start the timer
torch.manual_seed(42)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)

t_start = time.time()

# Set the number of epoch
epochs = 3

# Create training and testing loop
for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch}\n------")
  ### Training
  train_loss = 0
  model_0.to(device)
  model_0.train()
  # Add a loop to loop through training batches
  for batch, (img, label) in enumerate(train_dataloader):
    # Send data to device
    img, label = img.to(device), label.to(device)
    # 1. Forward pass
    pred = model_0(img)

    # 2. Calculate the loss (per batch)
    loss = loss_fn(pred, label)
    train_loss += loss # accumulatively add up the loss per epoch

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backward
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

    # Print out how many samples have been seen
    if batch % 400 == 0:
      print(f"Looked at {batch * len(img)}/{len(train_dataloader.dataset)} samples")

  # Divide total train loss by length of train dataloader (average loss per batch per epoch)
  train_loss /= len(train_dataloader)

  ### Testing
  # Setup variables for accumulatively adding up loss and accuracy
  test_loss, test_acc = 0, 0
  model_0.eval()
  with torch.inference_mode():
    for img, label in test_dataloader:
      # Send data to device
      img, label = img.to(device), label.to(device)

      # 1. Forward pass
      test_pred = model_0(img)

      # 2. Calculate the loss/acc
      test_loss += loss_fn(test_pred, label)
      test_acc += (torch.eq(test_pred.softmax(dim=1).argmax(dim=1), label).sum().item()/len(label))*100

    # Calculations on test metrics need to happen inside toch.inference_mode()
    # Divide total test loss by length of test dataloader (per batch)
    test_loss /= len(test_dataloader)

    # Divide total accuracy by length of test dataloader (per batch)
    test_acc /= len(test_dataloader)

  ## Print out what's heppening
  print(f"\nTrain loss: {train_loss:.4f} | Test loss: {test_loss:.4f}, Test acc: {test_acc:.2f}%\n")

  # Calculate training time
  t_end = time.time()
  #print(f"Train time on {device}: {t_end-t_start:.3f} seconds")
train_time_model_0 = print_train_time(t_start, time.time(), device)


In [None]:
# Functionizing training and test loops
def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device):
  train_loss, train_acc = 0, 0
  model.to(device)
  for batch, (img, label) in enumerate(dataloader):
    # Send data to device
    img, label = img.to(device), label.to(device)

    # 1. Forward pass
    pred = model(img)

    # 2. Calculate accumulatively loss/acc
    loss = loss_fn(pred, label)
    train_loss += loss
    train_acc += (torch.eq(pred.softmax(dim=1).argmax(dim=1), label).sum().item()/len(label))*100

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backward
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

  # Calculate loss and accuracy per epoch and print out what's heppening
  train_loss /= len(dataloader)
  train_acc /= len(dataloader)
  print(f"Train loss: {train_loss:.4f} | Train accuracy: {train_acc:.2f}%")

def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device: torch.device):
  test_loss, test_acc = 0, 0
  model.to(device)
  model.eval()
  with torch.inference_mode():
    for batch, (img, label) in enumerate(dataloader):
      # Send data to device
      img, label = img.to(device), label.to(device)

      # 1. Forward pass
      pred = model(img)

      # 2. Calculate accumulatively loss/acc
      test_loss += loss_fn(pred, label)
      test_acc += (torch.eq(pred.softmax(dim=1).argmax(dim=1), label).sum().item()/len(label))*100

    # Calculate loss and accuracy per epoch and print out what's happening
    test_loss /= len(dataloader)
    test_acc /= len(dataloader)
    print(f"Test loss: {test_loss:.4f} | Test accuracy: {test_acc:.2f}%")


In [None]:
from tqdm.auto import tqdm
torch.manual_seed(42)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_1.parameters(), lr=0.1)

t_start = time.time()

epochs = 3

for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch}\n------")
  train_step(model=model_1,
             dataloader=train_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             device=device)
  test_step(model=model_1,
            dataloader=test_dataloader,
            loss_fn=loss_fn,
            device=device)

train_time_model_1 = print_train_time(start=t_start,
                                      end=time.time(),
                                      device=device)

In [None]:
from tqdm.auto import tqdm
torch.manual_seed(42)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_2.parameters(), lr=0.1)
t_start = time.time()

epochs = 3
for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch}\n------")
  train_step(model=model_2,
             dataloader=train_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             device=device)
  test_step(model=model_2,
            dataloader=test_dataloader,
            loss_fn=loss_fn,
            device=device)

train_time_model_2 = print_train_time(start=t_start,
                                      end=time.time(),
                                      device=device)

In [None]:
### Eval model
torch.manual_seed(42)
def eval_model(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               device: torch.device):
  loss, acc = 0, 0
  model.to(device)
  model.eval()
  with torch.inference_mode():
    for batch, (img, label) in enumerate(dataloader):
      # Send data to device
      img, label = img.to(device), label.to(device)

      # 1. Forward pass/ Make predictions
      pred = model(img)

      # 2. Accumulate loss and accuracy per batch
      loss += loss_fn(pred, label)
      acc += (torch.eq(pred.softmax(dim=1).argmax(dim=1), label).sum().item()/len(label))*100

    # Scale loss/acc to find the average loss/acc per batch
    loss /= len(dataloader)
    acc /= len(dataloader)

  return {"model_name": model.__class__.__name__, # only works when model was created with a class
          "model_loss": loss.item(),
          "model_acc": acc}


In [None]:
# Calculate model 0 results on test dataset
model_0_results = eval_model(model=model_0,
                             dataloader=test_dataloader,
                             loss_fn=loss_fn,
                             device=device)
print(f"\nModel0 results: {model_0_results}")
model_1_results = eval_model(model=model_1,
                dataloader=test_dataloader,
                loss_fn=loss_fn,
                device=device)
print(f"\nModel1 results: {model_1_results}")
model_2_results = eval_model(model=model_2,
                             dataloader=test_dataloader,
                             loss_fn=loss_fn,
                             device=device)
print(f"\nModel2 results: {model_2_results}")

In [None]:
import pandas as pd
compare_results = pd.DataFrame([model_0_results, model_1_results, model_2_results])
print(compare_results)
# Add training times to results comparison
compare_results["training_time"] = [train_time_model_0,
                                    train_time_model_1,
                                    train_time_model_2]
print(compare_results)
compare_results.set_index("model_name")["model_acc"].plot(kind="barh")
plt.xlabel("accuracy (%)")
plt.ylabel("model");

In [None]:
# Make predictions
def make_predictions(model: torch.nn.Module,
                     data: list,
                     device: torch.device):
  pred_probs = []
  model.eval()
  with torch.inference_mode():
    for sample in data:
      # Prepare sample
      sample = sample.unsqueeze(dim=0).to(device) # Add an extra dimension and send sample to device

      # Forward pass (model outputs raw logits)
      pred_logit = model(sample)

      # Get prediction probability (logit -> prediction probability)
      pred_prob = pred_logit.squeeze().softmax(dim=0) # perform softmax on the "logits" dimension, not "batch" dimension (in this case we have batch size of 1, so can perform on dim=0)

      # Send data to cpu
      pred_probs.append(pred_prob.cpu())

  # Stack the pred_probs to turn list into a tensor
  return torch.stack(pred_probs)

In [None]:
import random
random.seed(42)
test_samples = []
test_labels = []
for sample, label in random.sample(list(test_data), k=9):
  test_samples.append(sample)
  test_labels.append(label)

print(len(test_samples))

# View the first test sample shape and label
print(f"Test sample image shape: {test_samples[0].shape}\nTest sample label: {test_labels[0]} ({class_names[test_labels[0]]})")

# Make predictions on test samples with model 2
pred_probs = make_predictions(model=model_2,
                              data=test_samples,
                              device=device)

# View first two prediction probabilities list
print(pred_probs[:2])

# Turn the prediction probabilities into prediction labels by taking the argmax()
pred_classes = pred_probs.argmax(dim=1)
#print(pred_classes)

# Are predictions in the same form as test labels?
print(f"Test labels: {test_labels}\nPredicted labels: {pred_classes}")

In [None]:
# Plot predictions
plt.figure(figsize=(9,9))
nrows, ncols = 3, 3
for i, sample in enumerate(test_samples):
  # Create a subplot
  plt.subplot(nrows, ncols, i+1)

  # Plot the target image
  plt.imshow(sample.permute(1,2,0), cmap='gray')

  # Find the prediction label (in text form, e.g. "Sandal")
  pred_label = class_names[pred_classes[i]]

  # Get the truth label (in text form, e.g. "T-shirt")
  truth_label = class_names[test_labels[i]]

  # Create the title text of the plot
  title_text = f"Pred: {pred_label} | Truth: {truth_label}"

  # Check for equality and change title color accordingly
  if pred_label == truth_label:
    plt.title(title_text, fontsize=10, c='g') # green text if correft
  else:
    plt.title(title_text, fontsize=10, c='r') # red text if wrong
  plt.axis(False);

In [None]:
### Save model
from pathlib import Path

# Create models directory (if it doesn't already exist)
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True,
                 exist_ok=True)

# Create model save path
MODEL_NAME = "fashion.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# Save the model state dict
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_2.state_dict(), # only saving the state_dict() only saves the learned parameters
           f=MODEL_SAVE_PATH)

In [None]:
# Create new instance of Model
loaded_model_2 = FashionMNISTModelV2(in_shape=1,
                                    hidden_units=10,
                                    out_shape=len(class_names))

# Load in the saved state_dict()
loaded_model_2.load_state_dict(torch.load(f=MODEL_SAVE_PATH))

# Send model to device
loaded_model_2 = loaded_model_2.to(device)

# Evaluate loaded model
torch.manual_seed(42)

loaded_model_2_results = eval_model(model=loaded_model_2,
                                    dataloader=test_dataloader,
                                    loss_fn=loss_fn,
                                    device=device)

print(loaded_model_2_results)

# Check to see if results are close to each other (if they are very far aay, there may be an error)
close = torch.isclose(torch.tensor(model_2_results["model_loss"]),
              torch.tensor(loaded_model_2_results["model_loss"]),
              atol=1e-08,
              rtol=0.0001)
print("yes" if close.item() else "No")