# Vision

Format of the input: **[batch_size, no_of_channels, height, width]**

---

In [None]:
# Check GPU information
!nvidia-smi

In [None]:
import torch
from torch import nn
from scripts.utils import plot_predictions, plot_train_test_loss, print_train_time, eval_model_classification
from pathlib import Path
import torchvision
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
RANDOM_SEED = 42

In [None]:
print(torch.__version__)
print(torchvision.__version__)

## 1. Get data ready (turn into tensor)

### 1.1 Get the data

In [None]:
# Get the data

train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=None
)
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

In [None]:
# Check details about data

class_names = train_data.classes
print(class_names)
print(train_data.class_to_idx)
print()

image, label = train_data[0]
print(f"Image shape: {image.shape} -> [color_channels, height, width]")      # [color_channels, height, width]
print(f"Image label: {class_names[label]}")

In [None]:
# Visualize the data (randomly take some images)

fig = plt.figure(figsize=(9, 9))
torch.manual_seed(RANDOM_SEED)
train_data_len = len(train_data)
rows, cols = 4, 4
for i in range(1, rows * cols + 1):
    rand_idx = torch.randint(0, train_data_len, size=[1]).item()
    image, label = train_data[rand_idx]
    fig.add_subplot(rows, cols, i)
    plt.imshow(image.squeeze(), cmap="gray")
    plt.title(f"{label}: {train_data.classes[label]}")
    plt.axis(False)

### 1.2 Prepare DataLoader

In [None]:
from torch.utils.data import DataLoader


BATCH_SIZE = 32

train_dataloader = DataLoader(
    dataset=train_data,
    batch_size=BATCH_SIZE,
    shuffle=True
)
test_dataloader = DataLoader(
    dataset=test_data,
    batch_size=BATCH_SIZE,
    shuffle=False
)

In [None]:
# Visualize one sample from train_dataloader

train_features_batch, train_labels_batch = next(iter(train_dataloader))
print(f"train_features_batch: {train_features_batch.shape}")
print(f"train_labels_batch: {train_labels_batch.shape}")

torch.manual_seed(RANDOM_SEED)
rand_idx = torch.randint(0, len(train_features_batch), size=[1]).item()
img, label = train_features_batch[rand_idx], train_labels_batch[rand_idx]

plt.imshow(img.squeeze(), cmap="gray")
plt.title(f"{label}: {class_names[label]}")
plt.axis(False)
print(f"Image size: {img.shape}")
print(f"Label size: {label.shape}")

## 2. Build or pick a pretrained model for training

When starting to build a series of machine learning modelling experiments, it's best practice to start with a baseline model. A baseline model is a simple model you will try and improve upon with subsequent models/experiments. 

In [None]:
class FashionMnistModelV0(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            # nn.Softmax(dim=1)
        )
    
    def forward(self, X):
        return self.layer_stack(X)


model_0 = FashionMnistModelV0(input_shape=28*28, hidden_units=10, output_shape=len(class_names))
model_0.to(DEVICE)
model_0.state_dict()

In [None]:
# Check if model is correctly built

dummy_x = torch.rand([1, 1, 28, 28])
model_0(dummy_x)

### 2.1 Pick loss function and optimizer

In [None]:
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)

### 2.2 Build a training loop to train the model

In [None]:
from timeit import default_timer as timer
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score


torch.manual_seed(RANDOM_SEED)

EPOCHS = 3
epoch_count = []
train_loss_arr = []
test_loss_arr = []
test_acc_arr = []

start_timer = timer()

for epoch in tqdm(range(EPOCHS)):
    print(f"Epoch: {epoch}\n----------")

    # train
    train_loss = 0
    model_0.train()
    for batch, (X, y) in enumerate(train_dataloader):
        y_logit = model_0(X)
        y_prob = torch.softmax(y_logit, dim=1)
        y_pred = y_prob.argmax(dim=1)
        loss = loss_fn(y_logit, y)
        train_loss += loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print out what's happening
        if batch % 400 == 0:
            print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples.")
    train_loss /= len(train_dataloader)

    # test
    test_loss, test_acc = 0, 0
    model_0.eval()
    with torch.inference_mode():
        for batch, (X, y) in enumerate(test_dataloader):
            y_logit = model_0(X)
            y_prob = torch.softmax(y_logit, dim=1)
            y_pred = y_prob.argmax(dim=1)
            test_loss += loss_fn(y_logit, y)
            test_acc += accuracy_score(y.cpu().numpy(), y_pred.cpu().numpy())
        test_loss /= len(test_dataloader)
        test_acc /= len(test_dataloader)

    epoch_count.append(epoch)
    train_loss_arr.append(train_loss)
    test_loss_arr.append(test_loss)
    test_acc_arr.append(test_acc)

end_timer = timer()
train_time_0 = print_train_time(start_timer, end_timer, DEVICE)

plot_train_test_loss(epoch_count, train_loss_arr, test_loss_arr)

## 3. Make prediction

## 4. Evaluate the model

In [None]:
model_0_res = eval_model_classification(
    model=model_0,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_score
)
print(model_0_res)

## 5. Improve through experimentation

### 5.1 Build new model

In [None]:
class FashionMnistModelV1(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            # nn.Softmax(dim=1)
        ).to(device=DEVICE)
    
    def forward(self, X):
        return self.model(X)


# CNN Explained: https://poloclub.github.io/cnn-explainer/
class FashionMnistModelV2(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            # trick to calculate in_features - use print in forward function before using this layer
            # pass in one tensor to see the shape and then calculate
            nn.Linear(in_features=hidden_units * 7 * 7, out_features=output_shape)
        )

    def forward(self, X):
        X = self.conv_block_1(X)
        X = self.conv_block_2(X)
        # print(X.shape)          # this will help to find out `in_features` for the classifier layer
        X = self.classifier(X)
        return X

model_1 = FashionMnistModelV1(input_shape=28*28, hidden_units=10, output_shape=len(class_names))
model_1.to(DEVICE)
# model_1.state_dict()

# input_shape now refers to the number of color channels in CNN
model_2 = FashionMnistModelV2(input_shape=1, hidden_units=10, output_shape=len(class_names))
model_2.to(DEVICE)
# model_2.state_dict()

In [None]:
# this cell wil fail, but the printed shape is going to help us decide `in_features` for the classifier layer
# rand_tensor = torch.randn(*train_data[0][0].size())
# model_2(rand_tensor)

### 5.2 Pick loss function and optimizer

In [None]:
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(params=model_1.parameters(), lr=0.1)
optimizer_2 = torch.optim.SGD(params=model_2.parameters(), lr=0.1)

### 5.3 Build training loop

In [None]:
# For model_1

from timeit import default_timer as timer
from scripts.utils import train, plot_loss_curves


torch.manual_seed(RANDOM_SEED)

EPOCHS = 3
epoch_count = []
train_loss_arr = []
train_acc_arr = []
test_loss_arr = []
test_acc_arr = []

start_timer = timer()
model_1_res = train(
    model=model_1,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    loss_fn=loss_fn,
    optimizer=optimizer,
    epochs=EPOCHS,
    device=DEVICE
)

end_timer = timer()
train_time_1 = print_train_time(start_timer, end_timer, DEVICE)

print(model_1_res)
plot_loss_curves(model_1_res)

In [None]:
# For model_2

from timeit import default_timer as timer
from scripts.utils import train, plot_loss_curves


torch.manual_seed(RANDOM_SEED)

EPOCHS = 3

start_timer = timer()
model_2_res = train(
    model=model_2,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    loss_fn=loss_fn,
    optimizer=optimizer_2,
    epochs=EPOCHS,
    device=DEVICE
)
end_timer = timer()
train_time_2 = print_train_time(start_timer, end_timer, DEVICE)

print(model_2_res)
plot_loss_curves(model_2_res)

### 5.4 Evaluate the new model

In [None]:
# For model_1

model_1_res = eval_model_classification(
    model=model_1,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_score
)
model_1_res

In [None]:
# For model_2
from sklearn.metrics import accuracy_score

model_2_res = eval_model_classification(
    model=model_2,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_score
)
model_2_res

In [None]:
# Evaluate all models
import pandas as pd


all_result = pd.DataFrame([
    model_0_res,
    model_1_res,
    model_2_res
])
all_result["training_time"] = [train_time_0, train_time_1, train_time_2]
all_result

In [None]:
# Visualize the result
all_result.set_index("model_name")["model_accuracy"].plot(kind="barh")
plt.xlabel("Accuracy")
plt.ylabel("Models")

In [None]:
# Visualize random predictions
from scripts.utils import make_predictions

# random.seed(RANDOM_SEED)
y_preds = make_predictions(model_2, test_dataloader)

rows, cols = 3, 3
fig = plt.figure(figsize=(9, 9))
for i in range(1, rows * cols + 1):
  rand_idx = torch.randint(0, len(test_data), size=[1]).item()
  X, y_truth = test_data[rand_idx]
  y_truth = class_names[y_truth]
  y_pred = class_names[y_preds[rand_idx]]
  fig.add_subplot(rows, cols, i)
  plt.imshow(X.squeeze(), cmap="gray")
  if y_pred == y_truth:
    plt.title(f"Truth: {y_truth} | Pred: {y_pred}", c="g")
  else:
    plt.title(f"Truth: {y_truth} | Pred: {y_pred}", c="r")
  plt.axis(False)

In [None]:
from scripts.utils import make_predictions


# Make predictions with trained model
y_pred_tensor = make_predictions(model_2, test_dataloader, DEVICE)
y_pred_tensor

In [None]:
from torchmetrics import ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix

# 2. Setup confusion instance and compare predictions to targets
confmat = ConfusionMatrix(task='multiclass', num_classes=len(class_names))
confmat_tensor = confmat(preds=y_pred_tensor,
                         target=test_data.targets)

# 3. Plot the confusion matrix
fig, ax = plot_confusion_matrix(
    conf_mat=confmat_tensor.numpy(), # matplotlib likes working with numpy
    class_names=class_names,
    figsize=(10, 7)
)

## 6. Save and reload trained model

In [None]:
# Save the model
model_folder = Path("models")
model_folder.mkdir(parents=True, exist_ok=True)
model_name = "FashionMnistModelV2.pt"
model_path = model_folder / model_name

model_2.to(device=DEVICE)
torch.save(obj=model_2.state_dict(), f=model_path)

In [None]:
# Load the model
loaded_model = FashionMnistModelV2(input_shape=1, hidden_units=10, output_shape=len(class_names))
loaded_model.load_state_dict(torch.load(f=model_path))
loaded_model.to(device=DEVICE)
# loaded_model.state_dict()

In [None]:
# Evaluate loaded model
torch.manual_seed(RANDOM_SEED)

loaded_model_2_results = eval_model_classification(
    model=loaded_model,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_score
)

loaded_model_2_results

In [None]:
# Check if model results are close to each other
torch.isclose(torch.tensor(model_2_res["model_loss"]),
              torch.tensor(loaded_model_2_results["model_loss"]),
              atol=1e-02)