In [None]:
import torch
import torch.nn as nn

#import torchvision
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor

#import plot funcs
import matplotlib.pyplot as plt

## 1. Getting Dataset

we're gonna use the FashionMNIST dataset and import it using the built in datasets from torchvision

In [None]:
train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

In [None]:
len(train_data), len(test_data)

### 1.1 Checking Data Shape

In [None]:
#see first training example
img, label = train_data[0]
img, label

In [None]:
class_names = train_data.classes
class_names

In [None]:
class_to_idx = train_data.class_to_idx
class_to_idx

In [None]:
#check image shape
img.shape

In [None]:
#img classication
label, class_names[label]

### 1.2 Visualizing Data

In [None]:
import matplotlib.pyplot as plt

img, label = train_data[0]
print(f"Image shape: {img.shape}")
plt.imshow(img.squeeze())
img.squeeze().shape, img.shape

In [None]:
plt.imshow(img.squeeze(), cmap="gray")

In [None]:
#plot more images
torch.manual_seed(42)
fig = plt.figure(figsize=(20,20))

rows, cols = 5,5
for  i in range(1, rows * cols+1):
    random_idx = torch.randint(0, len(train_data), size = [1]).item()
    img, label = train_data[random_idx]
    fig.add_subplot(rows, cols, i)
    plt.imshow(img.squeeze(), cmap="gray")
    plt.title(class_names[label])



## 2. Prepare DataLoader

right now our data is in the form of PyTorch Datasets

dataloader turns ut dataset into a python iterable

more specifically, we cant to turn out data into batches (or mini-batches)

we do this because:

1. it is more computationally efficient to break our dataset into smaller batches
2. it gives our NN more changes to update its gradients per epoch




In [None]:
from torch.utils.data import DataLoader

#setup batch size hyperparameter
BATCH_SIZE = 32 

#turn datasets into iterables (batches)
train_dataloader = DataLoader(dataset=train_data, 
                              batch_size=BATCH_SIZE, 
                              shuffle=True)
test_dataloader = DataLoader(dataset=test_data, 
                              batch_size=BATCH_SIZE, 
                              shuffle=False)
train_dataloader,test_dataloader

In [None]:
#check out what we've created
print(f"Dataloaders: {train_dataloader, test_dataloader}")
print(f"Length of train_dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}")
print(f"Length of train_dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}")

In [None]:
#checkout whats inside the training dataloader
train_features_batch, train_labels_batch = next(iter(train_dataloader))
train_features_batch.shape, train_labels_batch.shape

In [None]:
#show a sample
torch.manual_seed(42)
random_idx = torch.randint(0, len(train_features_batch), size=[1]).item()
img, label = train_features_batch[random_idx], train_labels_batch[random_idx]
plt.imshow(img.squeeze(), cmap="gray")
plt.title(class_names[label])
print(f"Image size: {img.shape}")
print(f"Label: {label}, label size: {label.shape}")

## 3. Model 0: Building a Baseline Model

when making ML models, it's best practice to make a baseline model

a baseline model is a simple model that we'll try to improve upon with subsequent models

we start simple and add complexity when necessary

In [None]:
#create a flatten layer
flatten_model = nn.Flatten()

#get a single sample
x = train_features_batch[0]

#flatten sample
output = flatten_model(x)

print(f"OG Shape: {x.shape}")
print(f"Shape after flattening: {output.shape}")
x, output

In [None]:
from torch import nn
class fasionMNISTModel(nn.Module):
    def __init__(self, 
                 input_shape: int,
                 hidden_units: int,
                 output_shape = int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),

            nn.Linear(in_features=hidden_units, out_features=output_shape)
        )

    def forward(self, x):
        return self.layer_stack(x)

In [None]:
#create instance
torch.manual_seed(42)

model0 = fasionMNISTModel(input_shape=784, #pixels in 28 x 28 img 
                          hidden_units=8, #units in hidden layer
                          output_shape=len(class_names)) #output probabilities for every class

model0

In [None]:
test_x = torch.rand([1,1,28,28])
model0(test_x)

## 3.1 Setup Loss, Optimizer, and Evaluation Metrics

* Loss Func - since we're workign with multi-class data, our loss function will be `nn.CrossEntropyLoss()`
* Optimizer - `torch.optim.SGD()`
* Eval Metric - since we're using classification, we'll use accuracy as our optimization metric

In [None]:
from helper_functions import accuracy_fn

#set up loss func and optimizer
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(params=model0.parameters(),
                            lr = 0.01)

In [None]:
#create a function to time how long our model takes
from timeit import default_timer as timer

def print_train_time(start:float, 
                     end:float, 
                     device: torch.device = None):
    """
    prints difference between start and end time
    """
    total_time = end-start
    print(f"Train time on: {device}: {total_time} seconds")
    return total_time

### 3.3 Creating a Training Loop on Batches of Data

1. Loop through Epochs
2. Loop through training batches, perform training steps, calculate the train loss per batch
3. Loop through the testing batches, perform testing steps, calculate the test loss per batch
4. Print out what's happening
5. Time our model

In [None]:
#import tgdm progress bar
from tqdm.auto import tqdm

#set seed and start timer
torch.manual_seed(42)
torch.cuda.manual_seed(42)

train_time_start_cpu = timer()

#set number of epochs (we'll keep this small for training time)
epochs = 5

#create training and test loop
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n")

    #Training
    train_loss = 0
    #add loop to loop through training batches
    for batch, (X, y) in enumerate(train_dataloader):
        model0.train()

        #1. forward step
        train_pred = model0(X)

        #2. calc loss
        loss = loss_fn(train_pred, y)
        train_loss += loss
        
        #3. optim zero grad
        optimizer.zero_grad()

        #4. back prop
        loss.backward()

        #5. optim step
        optimizer.step()

        if (batch % 100) == 0:
            print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples.")        

    #divide total train loss by length of train dataloader
    train_loss /= len(train_dataloader)

    ###Testing
    test_loss = 0
    test_acc = 0
    model0.eval()
    with torch.inference_mode():
        for X_test, y_test in test_dataloader:
            #1. forward pass
            test_pred = model0(X_test)

            #2. calculate loss
            test_loss += loss_fn(test_pred, y_test)

            #3. calculate accuracy
            test_acc += accuracy_fn(y_true=y_test, y_pred=test_pred.argmax(dim=1))

        test_loss /= len(test_data)
        test_loss /= len(test_dataloader)
    #print out whats happenign
    print(f"\nTrain Loss: {train_loss} | Test Loss: {test_loss}, Test Acc = {test_acc}")

train_time_end_cpu = timer()

total_train_time_mod_0 = print_train_time(start = train_time_start_cpu,
                                          end = train_time_end_cpu,
                                          device=str(next(model0.parameters())))

### 4. Make Predictions for Model0

In [None]:
torch.manual_seed(42)

def eval_model(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               accuracy_fn):
    
    "returns a dict containing the results of the model predicting on data_loader"
    loss = 0
    acc = 0

    with torch.inference_mode():
        for X, y in data_loader:
            #make pred
            y_pred = model(X)

            #loss and acc
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y, 
                               y_pred=y_pred.argmax(dim=1))
            
        #scale loss and acc to find average values
        loss /= len(data_loader)

        acc /= len(data_loader)

    return {"model name: ": model.__class__.__name__,
            "model loss: ": loss.item(),
            "model acc: ": acc}

#calc model 0 results on test dataset
model0_results = eval_model(model=model0,
           data_loader=test_dataloader,
           loss_fn=loss_fn,
           accuracy_fn=accuracy_fn)

model0_results

## 5. Setup Device Agnostic Code

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

## Better Model with Non-Linearity


In [None]:
from torch import nn
class fasionMNISTModel2(nn.Module):
    def __init__(self, 
                 input_shape: int,
                 hidden_units: int,
                 output_shape = int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            nn.ReLU()
        )

    def forward(self, x):
        return self.layer_stack(x)
    
model1 = fasionMNISTModel2(input_shape=28*28,
                           hidden_units=8,
                           output_shape=len(class_names)
                           ).to(device)

model1

In [None]:
next(model1.parameters()).device

In [None]:
from helper_functions import accuracy_fn

#set up loss func and optimizer
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(params=model0.parameters(),
                            lr = 0.01)

In [None]:
#training loop function

def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module, 
               optimizer: torch.optim.Optimizer, 
               accuracy_fn, 
               device: torch.device = device):
    train_loss = 0
    train_acc = 0
    model.train()

    #add loop to loop through training batches
    for batch, (X, y) in enumerate(data_loader):
        #put data on target device
        X = X.to(device)
        y = y.to(device)

        #1. forward step
        train_pred = model(X)

        #2. calc loss
        loss = loss_fn(train_pred, y)
        train_loss += loss
        train_acc += accuracy_fn(y_true=y, 
                                 y_pred=train_pred.argmax(dim=1)
                                 )
        
        #3. optim zero grad
        optimizer.zero_grad()

        #4. back prop
        loss.backward()

        #5. optim step
        optimizer.step()                    

    #divide total train loss and accuracy by length of train dataloader
    train_loss /= len(data_loader)
    train_acc  /= len(data_loader)
    print(f"Train loss: {train_loss} | Train Acc: {train_acc}")

In [None]:
#testing loop function
def test_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module, 
               optimizer: torch.optim.Optimizer, 
               accuracy_fn, 
               device: torch.device = device):
    test_loss = 0
    test_acc = 0

    model.eval()

    with torch.inference_mode():
        #add loop to loop through training batches
        for batch, (X, y) in enumerate(data_loader):
            #put data on target device
            X = X.to(device)
            y = y.to(device)

            #1. forward step
            test_pred = model(X)

            #2. calc loss
            loss = loss_fn(test_pred, y)
            test_loss += loss
            test_acc += accuracy_fn(y_true=y, 
                                    y_pred=test_pred.argmax(dim=1)
                                    )               

    #divide total train loss and accuracy by length of train dataloader
    test_loss = test_loss /len(data_loader)
    test_acc  = test_acc/len(data_loader)
    print(f"Train loss: {test_loss} | Train Acc: {test_acc}")

In [None]:
torch.manual_seed(42)

train_step(model=model1,
           data_loader=train_dataloader, 
           loss_fn=loss_fn,
           optimizer=optimizer,
           accuracy_fn=accuracy_fn,
           device=device)    

In [None]:
test_results = test_step(model=model1,
           data_loader=test_dataloader, 
           loss_fn=loss_fn,
           optimizer=optimizer,
           accuracy_fn=accuracy_fn,
           device=device)

In [None]:
test_results

In [None]:
# Create a model with non-linear and linear layers
class FashionMNISTModelV1(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(), # flatten inputs into single vector
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            nn.ReLU()
        )
    
    def forward(self, x: torch.Tensor):
        return self.layer_stack(x)
        

In [None]:
torch.manual_seed(42)
model_1 = FashionMNISTModelV1(input_shape=784, # number of input features
    hidden_units=10,
    output_shape=len(class_names) # number of output classes desired
).to(device) # send model to GPU if it's available
next(model_1.parameters()).device # check model device

In [None]:
from helper_functions import accuracy_fn
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_1.parameters(), 
                            lr=0.1)

In [None]:
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device):
    train_loss, train_acc = 0, 0
    model.to(device)
    for batch, (X, y) in enumerate(data_loader):
        # Send data to GPU
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss
        train_acc += accuracy_fn(y_true=y,
                                 y_pred=y_pred.argmax(dim=1)) # Go from logits -> pred labels

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

    # Calculate loss and accuracy per epoch and print out what's happening
    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%")

def test_step(data_loader: torch.utils.data.DataLoader,
              model: torch.nn.Module,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device = device):
    test_loss, test_acc = 0, 0
    model.to(device)
    model.eval() # put model in eval mode
    # Turn on inference context manager
    with torch.inference_mode(): 
        for X, y in data_loader:
            # Send data to GPU
            X, y = X.to(device), y.to(device)
            
            # 1. Forward pass
            test_pred = model(X)
            
            # 2. Calculate loss and accuracy
            test_loss += loss_fn(test_pred, y)
            test_acc += accuracy_fn(y_true=y,
                y_pred=test_pred.argmax(dim=1) # Go from logits -> pred labels
            )
        
        # Adjust metrics and print out
        test_loss /= len(data_loader)
        test_acc /= len(data_loader)
        print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")
        

In [None]:
torch.manual_seed(42)

# Measure time
from timeit import default_timer as timer
train_time_start_on_gpu = timer()

epochs = 5
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train_step(data_loader=train_dataloader, 
        model=model_1, 
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn
    )
    test_step(data_loader=test_dataloader,
        model=model_1,
        loss_fn=loss_fn,
        accuracy_fn=accuracy_fn
    )

train_time_end_on_gpu = timer()
total_train_time_model_1 = print_train_time(start=train_time_start_on_gpu,
                                            end=train_time_end_on_gpu,
                                            device=device)


In [None]:
# Move values to device
torch.manual_seed(42)
def eval_model(model: torch.nn.Module, 
               data_loader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               accuracy_fn, 
               device: torch.device = device):
    """Evaluates a given model on a given dataset.

    Args:
        model (torch.nn.Module): A PyTorch model capable of making predictions on data_loader.
        data_loader (torch.utils.data.DataLoader): The target dataset to predict on.
        loss_fn (torch.nn.Module): The loss function of model.
        accuracy_fn: An accuracy function to compare the models predictions to the truth labels.
        device (str, optional): Target device to compute on. Defaults to device.

    Returns:
        (dict): Results of model making predictions on data_loader.
    """
    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            # Send data to the target device
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))
        
        # Scale loss and acc
        loss /= len(data_loader)
        acc /= len(data_loader)
    return {"model_name": model.__class__.__name__, # only works when model was created with a class
            "model_loss": loss.item(),
            "model_acc": acc}

# Calculate model 1 results with device-agnostic code 
model_1_results = eval_model(model=model_1, data_loader=test_dataloader,
    loss_fn=loss_fn, accuracy_fn=accuracy_fn,
    device=device
)
model_1_results


## Model 2: Build a CNN 

CNNs are also known as ConvNets, they're used to find patterns in visual data

In [None]:
#create a CNN
class FashionMNISTV2(nn.Module):
    def __init__(self, input_shape:int,
                 hidden_units:int,
                 output_shape:int):
        super().__init__()
        self.conv_block_1 = nn.Sequential(
            #create a conv later
            nn.Conv2d(in_channels=input_shape, 
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1), #values we can set outselves in our NN are called hyperparameters
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels = hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=3, 
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=3, 
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=int(hidden_units * 49/8),
                      out_features=output_shape,
                      )
        )

    def forward(self, x):
        x = self.conv_block_1(x)
        print(f"conv block 1 output shape: {x.shape}")
        x = self.conv_block_2(x)
        print(f"output shape of blkc 2: {x.shape}")
        x = self.classifier(x)
        return x


In [None]:
torch.manual_seed(42)

model2 = FashionMNISTV2(input_shape=1,
                        hidden_units=8,
                        output_shape=len(class_names)).to(device)

### 7.1 Stepping Through NN.Conv2d


In [None]:
torch.manual_seed(42)

#create a batch of images
images = torch.randn(size=(32, 3, 64,64))
test_image = images[0]

print(f"Image batch shape: {images.shape}")
print(f"Single image shape: {test_image.shape}")
print(f"Test Image:\n {test_image}")


In [None]:
model2.state_dict()

In [None]:
#create a single conv layer
conv_layer = nn.Conv2d(in_channels=3,
                       out_channels=8,
                       kernel_size=(3,3), #kernel = filter, size of the filer that we're passing over our image for convolution
                       stride=1,
                       padding=0)

#pass the data through the conv layer
conv_output = conv_layer(test_image)
conv_output, conv_output.shape

In [None]:
test_image.shape

### 7.2 Steeping Through nn.MaxPool2d()

In [None]:
#print original image shape without unsqueezed dimension
print(f"Test image og shape: {test_image.shape}")
print(f"Test image with unsqueezed dimension: {test_image.unsqueeze(0).shape}")

#create a sample nn.MaxPool() layer
max_pool_layer = nn.MaxPool2d(kernel_size=2)

#pass data through just the conv layer
test_img_through_conv = conv_layer(test_image.unsqueeze(dim=0))
print(f"Test image through conv_layer(): {test_img_through_conv.shape}")

#pass data through maxpool layer
test_image_through_conv_and_max_pool = max_pool_layer(test_img_through_conv)
print(f"Shape after going through both: {test_image_through_conv_and_max_pool.shape}")

In [None]:
torch.manual_seed(42)

#create a random tensor with a similar number of dimensions
random_tensor = torch.randn(size=(1,1,2,2))
random_tensor

In [None]:
#create a max pool layer
max_pool_layer = nn.MaxPool2d(kernel_size=2)

#pass rand tensor through max pool
max_pool_tensor = max_pool_layer(random_tensor)
print(f"\nMax pool tensor: \n{max_pool_tensor}")
print(f"\nMax pool tensor shape: {max_pool_tensor.shape}")

print(f"\nrandom tensor: {random_tensor.shape}")
print(f"\ngrandom tensor shape: {random_tensor.shape}")

In [None]:
img.shape

In [None]:
plt.imshow(img.squeeze(), cmap="gray")

In [None]:
rand_image_tensor = torch.randn(size=(1,28,28))
rand_image_tensor.shape

In [None]:
model2(rand_image_tensor.to(device))

### 7.3 Setup Loss Function and Optimizer for CNN

In [None]:
from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(params= model2.parameters(),
                            lr = 0.01)

In [None]:
model2.state_dict()

### 7.4 Training and Testing Model 2 Using Training and Testing Functions

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

#train and test model
epochs = 3

for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}")
    train_step(model=model2, 
               data_loader=train_dataloader,
               loss_fn=loss_fn,
               optimizer=optimizer,
               accuracy_fn=accuracy_fn,
               device=device)

In [None]:
# Create a convolutional neural network 
class FashionMNISTModelV2(nn.Module):
    """
    Model architecture copying TinyVGG from: 
    https://poloclub.github.io/cnn-explainer/
    """
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, 
                      out_channels=hidden_units, 
                      kernel_size=3, # how big is the square that's going over the image?
                      stride=1, # default
                      padding=1),# options = "valid" (no padding) or "same" (output has same shape as input) or int for specific number 
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2) # default stride value is same as kernel_size
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            # Where did this in_features shape come from? 
            # It's because each layer of our network compresses and changes the shape of our inputs data.
            nn.Linear(in_features=hidden_units*7*7, 
                      out_features=output_shape)
        )
    
    def forward(self, x: torch.Tensor):
        x = self.block_1(x)
        # print(x.shape)
        x = self.block_2(x)
        # print(x.shape)
        x = self.classifier(x)
        # print(x.shape)
        return x

torch.manual_seed(42)
model_2 = FashionMNISTModelV2(input_shape=1, 
    hidden_units=10, 
    output_shape=len(class_names)).to(device)
model_2

In [None]:
torch.manual_seed(42)

# Create sample batch of random numbers with same size as image batch
images = torch.randn(size=(32, 3, 64, 64)) # [batch_size, color_channels, height, width]
test_image = images[0] # get a single image for testing
print(f"Image batch shape: {images.shape} -> [batch_size, color_channels, height, width]")
print(f"Single image shape: {test_image.shape} -> [color_channels, height, width]") 
print(f"Single image pixel values:\n{test_image}")

In [None]:
torch.manual_seed(42)

# Create a convolutional layer with same dimensions as TinyVGG 
# (try changing any of the parameters and see what happens)
conv_layer = nn.Conv2d(in_channels=3,
                       out_channels=10,
                       kernel_size=3,
                       stride=1,
                       padding=0) # also try using "valid" or "same" here 

# Pass the data through the convolutional layer
conv_layer(test_image) # Note: If running PyTorch <1.11.0, this will error because of shape issues (nn.Conv.2d() expects a 4d tensor as input) 

In [None]:
torch.manual_seed(42)
# Create a new conv_layer with different values (try setting these to whatever you like)
conv_layer_2 = nn.Conv2d(in_channels=3, # same number of color channels as our input image
                         out_channels=10,
                         kernel_size=(5, 5), # kernel is usually a square so a tuple also works
                         stride=2,
                         padding=0)

# Pass single image through new conv_layer_2 (this calls nn.Conv2d()'s forward() method on the input)
conv_layer_2(test_image.unsqueeze(dim=0)).shape

In [None]:
# Check out the conv_layer_2 internal parameters
print(conv_layer_2.state_dict())

In [None]:
torch.manual_seed(42)

# Measure time
from timeit import default_timer as timer
train_time_start_model_2 = timer()

# Train and test model 
epochs = 3
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train_step(data_loader=train_dataloader, 
        model=model_2, 
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn,
        device=device
    )
    test_step(data_loader=test_dataloader,
        model=model_2,
        loss_fn=loss_fn,
        accuracy_fn=accuracy_fn,
        device=device
    )

train_time_end_model_2 = timer()
total_train_time_model_2 = print_train_time(start=train_time_start_model_2,
                                           end=train_time_end_model_2,
                                           device=device)
                                           

In [None]:
# Get model_2 results 
model_2_results = eval_model(
    model=model_2,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn
)
model_2_results

## 8. Comparing Model Results and Training Time


In [None]:
import pandas as pd
compare_results = pd.DataFrame([model0_results,
                                model_1_results,
                                model_2_results])
compare_results

In [None]:
def make_predictions(model: torch.nn.Module,
                     data:list,
                     device: torch.device = device):
    pred_probs = []
    model.to(device)
    model.eval()
    with torch.inference_mode():
        for sample in data:
            sample = torch.unsqueeze(sample, dim=0).to(device)

            pred_logits = model(sample)

            pred_prob = torch.softmax(pred_logits.squeeze(), dim=0)

            #move pred probs to cpu
            pred_probs.append(pred_prob.cpu())

    #stack turns list into a tensor
    return torch.stack(pred_probs)

In [None]:
import random
random.seed(42)
test_samples = []
test_labels = []

for sample, label in random.sample(list(test_data), k=9):
    test_samples.append(sample)
    test_labels.append(label)

#view first sample sehape
test_samples[0].shape, type(test_samples[0]), type(test_data[0][0])

In [None]:
#make predictions
pred_probs = make_predictions(model=model_2,
                              data=test_samples)

pred_probs[0]

In [None]:
#conver probabilities to labels
pred_classes = pred_probs.argmax(dim=1)
pred_classes

In [None]:
# Plot predictions
plt.figure(figsize=(9, 9))
nrows = 3
ncols = 3
for i, sample in enumerate(test_samples):
  # Create a subplot
  plt.subplot(nrows, ncols, i+1)

  # Plot the target image
  plt.imshow(sample.squeeze(), cmap="gray")

  # Find the prediction label (in text form, e.g. "Sandal")
  pred_label = class_names[pred_classes[i]]

  # Get the truth label (in text form, e.g. "T-shirt")
  truth_label = class_names[test_labels[i]] 

  # Create the title text of the plot
  title_text = f"Pred: {pred_label} | Truth: {truth_label}"
  
  # Check for equality and change title colour accordingly
  if pred_label == truth_label:
      plt.title(title_text, fontsize=10, c="g") # green text if correct
  else:
      plt.title(title_text, fontsize=10, c="r") # red text if wrong
  plt.axis(False);

In [None]:
# Create a convolutional neural network 
class FashionMNISTModelV2(nn.Module):
    """
    Model architecture copying TinyVGG from: 
    https://poloclub.github.io/cnn-explainer/
    """
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, 
                      out_channels=hidden_units, 
                      kernel_size=3, # how big is the square that's going over the image?
                      stride=1, # default
                      padding=1),# options = "valid" (no padding) or "same" (output has same shape as input) or int for specific number 
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2) # default stride value is same as kernel_size
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            # Where did this in_features shape come from? 
            # It's because each layer of our network compresses and changes the shape of our inputs data.
            nn.Linear(in_features=hidden_units*7*7, 
                      out_features=output_shape)
        )
    
    def forward(self, x: torch.Tensor):
        x = self.block_1(x)
        # print(x.shape)
        x = self.block_2(x)
        # print(x.shape)
        x = self.classifier(x)
        # print(x.shape)
        return x

torch.manual_seed(42)
model_2 = FashionMNISTModelV2(input_shape=1, 
    hidden_units=10, 
    output_shape=len(class_names)).to(device)
model_2

In [None]:
# Setup loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_2.parameters(), 
                             lr=0.01)

In [None]:
torch.manual_seed(42)

# Measure time
from timeit import default_timer as timer
train_time_start_model_2 = timer()

# Train and test model 
epochs = 3
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train_step(data_loader=train_dataloader, 
        model=model_2, 
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn,
        device=device
    )
    test_step(data_loader=test_dataloader,
        model=model_2,
        loss_fn=loss_fn,
        accuracy_fn=accuracy_fn,
        device=device
    )

train_time_end_model_2 = timer()
total_train_time_model_2 = print_train_time(start=train_time_start_model_2,
                                           end=train_time_end_model_2,
                                           device=device)

In [None]:
# Get model_2 results 
model_2_results = eval_model(
    model=model_2,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn
)
model_2_results

In [None]:
import random
random.seed(42)
test_samples = []
test_labels = []
for sample, label in random.sample(list(test_data), k=9):
    test_samples.append(sample)
    test_labels.append(label)

# View the first test sample shape and label
print(f"Test sample image shape: {test_samples[0].shape}\nTest sample label: {test_labels[0]} ({class_names[test_labels[0]]})")

In [None]:
# Make predictions on test samples with model 2
pred_probs= make_predictions(model=model_2, 
                             data=test_samples)

# View first two prediction probabilities list
pred_probs[:2]

In [None]:
# Make predictions on test samples with model 2
pred_probs= make_predictions(model=model_2, 
                             data=test_samples)

# View first two prediction probabilities list
pred_probs[:2]

In [None]:
# Turn the prediction probabilities into prediction labels by taking the argmax()
pred_classes = pred_probs.argmax(dim=1)
pred_classes

In [None]:
# Plot predictions
plt.figure(figsize=(9, 9))
nrows = 3
ncols = 3
for i, sample in enumerate(test_samples):
  # Create a subplot
  plt.subplot(nrows, ncols, i+1)

  # Plot the target image
  plt.imshow(sample.squeeze(), cmap="gray")

  # Find the prediction label (in text form, e.g. "Sandal")
  pred_label = class_names[pred_classes[i]]

  # Get the truth label (in text form, e.g. "T-shirt")
  truth_label = class_names[test_labels[i]] 

  # Create the title text of the plot
  title_text = f"Pred: {pred_label} | Truth: {truth_label}"
  
  # Check for equality and change title colour accordingly
  if pred_label == truth_label:
      plt.title(title_text, fontsize=10, c="g") # green text if correct
  else:
      plt.title(title_text, fontsize=10, c="r") # red text if wrong
  plt.axis(False);