import inspect # view the source code using ---> getsourcelines(thing)

import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor

import random

print(f"PyTorch version: {torch.__version__}\ntorchvision version: {torchvision.__version__}")

train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=None,
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
    target_transform=None,
)

class_names = train_data.classes
print(class_names)

len(train_data.data), len(test_data.data)

# iterator example

for x in np.random.rand(5): print(x)

print(), print()

stuff = np.random.rand(5)
print(stuff), print()
it = iter(stuff)

while True:
    try:
        item = next(it)
        print(item)
    except StopIteration:
        print("StopIteration exception raised")
        break
        

stuff = np.random.rand(5)
print(), print(), print(stuff), print()
it = iter(stuff)

# this will raise exception
#while True:
#    item = next(it)
#    print(item)

image, label = train_data[0]
image = image.detach().numpy()
plt.imshow(image.squeeze(), cmap="gray")

rows, cols = 4, 4
fig = plt.figure(figsize=(6,6))
for i in range(rows * cols):
    image, label = train_data[i]
    image = image.detach().numpy().squeeze()
    fig.add_subplot(rows, cols, i+1)
    plt.imshow(image, cmap="gray")
    plt.title(f"[{i}] {train_data.classes[label]}")
fig.tight_layout()

from torch.utils.data import DataLoader

# Setup the batch size hyperparameter
BATCH_SIZE = 32

# Turn datasets into iterables (batches)
train_dataloader = DataLoader(train_data, # dataset to turn into iterable
    batch_size=BATCH_SIZE, # how many samples per batch? 
    shuffle=True # shuffle data every epoch?
)

test_dataloader = DataLoader(test_data,
    batch_size=BATCH_SIZE,
    shuffle=False # don't necessarily have to shuffle the testing data
)

# Let's check out what we've created
print(f"Dataloaders: {train_dataloader, test_dataloader}") 
print(f"Length of train dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}")
print(f"Length of test dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}")

a = iter(train_dataloader)

b = next(a)

print(f"b[0] (batch of samples, length: {len(b[0])})\nEach sized: {b[0][0].shape}\n")
print(f"b[1] (batch of labels, length: {len(b[1])}): \n{b[1]}\n")

# next(a)
# [0] (e.g. train_features_batch)
#    ---> array of tensors (X data),   length: batch size
# [1] (e.g. train_labels_batch)
#    ---> array of tensors (y labels), length: batch size

# second index: current sample in current batch

img = b[0][0].detach().numpy().squeeze()
lab = b[1][0].detach().numpy()
print(f"b[1][0] (current label: {lab}), type: {type(lab)}")

plt.imshow(img, cmap="gray")
plt.title(f"{lab}, {train_data.classes[lab]}")

train_features_batch, train_labels_batch = next(iter(train_dataloader))
train_features_batch.shape, train_labels_batch.shape

flatten_model = nn.Flatten()

x = train_features_batch[0]

output = flatten_model(x)

print(f"shape before flatten: {x.shape}      -> [colour channels, height, width]")
print(f"shape after flatten:  {output.shape} -> [colour channels, height*width]")

class FashionMNISTModelV0(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(), # will take 28x28 image and output 28*28 (784) vector
            nn.Linear(in_features=input_shape, out_features=hidden_units), # will take 784 vector as input (input_shape arg)
            nn.Linear(in_features=hidden_units, out_features=output_shape)
        )
        
    def forward(self, x):
        return self.layer_stack(x)

torch.manual_seed(42)

model_0 = FashionMNISTModelV0(input_shape=28*28, hidden_units=10, output_shape=len(class_names))
model_0.to("cpu")

def accuracy_fn(y_pred, y_true):
    #prediction_index = y_pred.detach().numpy().argmax(axis=1)
    #true_index = y_true.detach().numpy()
    # must be converted before the function
    l = len(y_true)
    return np.sum(y_pred==y_true)/l

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)

test_batch = next(iter(test_dataloader))
test_batch_samples = test_batch[0]
test_batch_labels = test_batch[1]


print(len(test_batch))

model_0.eval()
with torch.inference_mode():
    
    preds = model_0(test_batch_samples)

    print(preds.shape)
    
    loss = loss_fn(preds, test_batch_labels)
    print(loss.item())
    
    acc = accuracy_fn(y_pred=preds.detach().numpy().argmax(axis=1), y_true=test_batch_labels.detach().numpy())
    print(acc)
    
preds_np = preds.detach().numpy().argmax(axis=1)
trues_np = test_batch_labels.detach().numpy()

print(preds_np.shape, trues_np.shape)
    
print(f"predictions: {preds_np}")

print(f"true vals  : {trues_np}")

print(f"accuracy   : {accuracy_fn(y_pred=preds_np, y_true=trues_np)}")
    



print(),print(),print()
preds = preds[0].detach().numpy().argmax()
print(preds)
labels = test_batch_labels[0].detach().numpy()
print(labels)



epochs = 3

for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n----------")
    
    train_loss = 0
    
    # n_batch is the current batch
    # (X, y) since train_dataloader has 2 items - data and labels, both are tensor arrays
    for n_batch, (X, y) in enumerate(train_dataloader):
        
        y_pred = model_0(X)
        
        loss = loss_fn(y_pred, y)
        
        train_loss += loss # accumulate loss for each epoch
        
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()
        
        if n_batch % 400 == 0:
            print(f"Looked at {n_batch * len(X)}/{len(train_dataloader.dataset)} samples")
            
    train_loss /= len(train_dataloader) # get train loss percentage
    
    losses, accuracies = [], []
    test_loss, test_acc = 0, 0
    model_0.eval()
    with torch.inference_mode():
        for X, y in test_dataloader:
            test_pred = model_0(X)
            
            loss = loss_fn(test_pred, y)
            
            test_loss += loss
            
            test_acc += accuracy_fn(y_pred=test_pred.detach().numpy().argmax(axis=1), y_true=y.detach().numpy())
            #accuracies.append(test_acc)
            a = test_acc/len(test_dataloader)*100
            #print(a)
            accuracies.append(a)
        
            
        test_loss /= len(test_dataloader)
        test_acc /= len(test_dataloader)
        #accuracies.append(test_acc*100)

plt.plot(accuracies)

print(test_acc*100) # accuracy percentage
print(test_loss)

def eval_model(model: torch.nn.Module,
                data_loader: torch.utils.data.DataLoader, # samples & labels batches for testing
                loss_fn: torch.nn.Module,
                accuracy_fn,
                should_plot: bool):
    
    losses, accuracies = [], []
    
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            
            predictions = model(X)
            
            loss = loss_fn(predictions, y)
            losses.append(loss.item())
            
            acc = accuracy_fn(predictions.detach().numpy().argmax(axis=1), y.detach().numpy())*100
            accuracies.append(acc)           
    
    last_loss = losses[len(losses)-1]
    last_acc = accuracies[len(accuracies)-1]
    
    if should_plot:
        plt.plot(accuracies, c='b')
        plt.plot([*range(len(accuracies))], [np.mean(accuracies) for _ in range(len(accuracies))], c="r")
        plt.plot(np.exp(losses), c='g')
        plt.show()
        
        print(f"Loss: {last_loss}")
        print(f"Accuracy: {last_acc}")
    
    return {"model_name": model.__class__.__name__,
            "model_loss": last_loss,
            "model_accuracy": last_acc}

loss_fn = nn.CrossEntropyLoss()
model_0_results = eval_model(model_0, test_dataloader, loss_fn, accuracy_fn, True)
print(model_0_results)

class FashionMNISTModelV1(nn.Module):
    def __init__(self, input_shape, hidden_units, output_shape):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            nn.ReLU()
        )
        
    def forward(self, x):
        return self.layer_stack(x)

def train_model(model: nn.Module,
                loss_fn,
                optimizer, 
                dataloader: torch.utils.data.DataLoader,
                n_epochs: int,
                batch_size: int):
  
    losses = []
    accuracies = []
    
    model.train()
    
    for epoch in tqdm(range(n_epochs)):
        
        print(f"Epoch: {epoch}/{n_epochs}---------------")
    
        for batch_n, (X, y) in enumerate(dataloader):
            
            if batch_n % 500 == 0: print(f"Batch: {batch_n}/{len(dataloader)}---------------")

            y_predictions = model(X)
            
            loss = loss_fn(y_predictions, y)
            losses.append(loss.item())

            acc = accuracy_fn(y_predictions.detach().numpy().argmax(axis=1), y.detach().numpy())*100
            accuracies.append(acc)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
                
    plt.plot(accuracies)
    plt.plot(np.exp(losses))
    plt.ylim(0, 100)
    plt.show()
    print(f"Accuracy: {accuracies[len(accuracies)-1]}")
    print(f"Loss: {losses[len(losses)-1]}")

model_1 = FashionMNISTModelV1(28*28, 10, len(class_names))

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_1.parameters(), lr=0.01)

train_model(model_1, loss_fn, optimizer, train_dataloader, 3, 32)

model_1_results = eval_model(model_1, test_dataloader, loss_fn, accuracy_fn, True)
model_1_results

model_0_results

In [1]:
class FashionMNISTModelV2(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=3, # how big is the window going over the image
                      stride=1,      # default
                      padding=1),    # # options = "valid" (no padding) or "same" (output has same shape as input) or int for specific number 
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2)
        )
        
        self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=3,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*7*7,
                      out_features=output_shape)
        )
    
    def forward(self, x: torch.Tensor):
        x = self.block_1(x) #x.shape = torch.Size([32, 10, 14, 14])
        x = self.block_2(x) #x.shape = torch.Size([32, 10, 7, 7])
        x = self.classifier(x) #x.shape = torch.Size([32, 10])
        return x

NameError: name 'nn' is not defined

model_2 = FashionMNISTModelV2(input_shape=1,
                              hidden_units=10,
                              output_shape=len(class_names))
model_2

test_batch = next(iter(test_dataloader))
test_batch_samples = test_batch[0]
test_batch_labels = test_batch[1]

print(len(test_batch))

model_2.eval()
with torch.inference_mode():
    
    preds = model_2(test_batch_samples)
    
    preds_np = preds.detach().numpy()
    print(f"Test predictions: {preds_np.argmax(axis=1)}")  
    print(f"Test labels     : {test_batch_labels.detach().numpy()}")
    
    loss = loss_fn(preds, test_batch_labels)
    print(f"Loss: {loss.item()}")
    
    acc = accuracy_fn(y_pred=preds_np.argmax(axis=1),y_true=test_batch_labels.detach().numpy())*100
    print(f"Accuracy: {acc}")


eval_model(model_2, test_dataloader, loss_fn, accuracy_fn, True)
            

# getting a better understanding of dimensionality
# represented in numpy arrays

x = np.arange(200, dtype=np.float32)
print(f"Numpy arange: {x}")
print(f"Numpy arange (16) shape: {x.shape}\n\n")

x = x.reshape(2, 4, 5, 5)  # bs, channels, height, width
print(f"Arange size 16 after reshape:\n{x}")
print(f"Numpy arange after reshape: {x.shape}\n\n")

print(), print()

# x[0] is the "first" group of matrices
for g in range(2):
    for a, i in enumerate(x[g]):
        print(a, i[2][2], i.shape)

# intuitively: reshape(a, b, c, d)
# a = number of groups of matrices
# b = number of matrices in each group
# x size of each matrix
# y size of each matrix

# above example:
# 1 group of 4 matrices, sized 5x5

x = np.arange(32, dtype=np.float32)
print(f"Numpy arange: {x}")
print(f"Numpy arange (16) shape: {x.shape}\n\n")

x = x.reshape(1, 2, 4, 4)  # bs, channels, height, width
print(f"Arange size 16 after reshape:\n{x}")
print(f"Numpy arange after reshape: {x.shape}\n")

X = torch.tensor(x, dtype=torch.float32).to("cpu")
print("\nSource input: ")
print(X)

pool1 = nn.MaxPool2d(2, stride=1)
z1 = pool1(X)
print("\nMaxPool with kernel=2, stride=1: ")
print(z1)

pool2 = nn.MaxPool2d(2, stride=2)
z2 = pool2(X)
print("\nMaxPool with kernel=2, stride=2: ")
print(z2)

print("\nEnd max pooling demo ")

images = torch.randn(size=(32, 3, 64, 64)) # [batch_size, color_channels, height, width]
test_image = images[0] # first "group" of 3 channels (colours) of 64x64 pixels

conv_layer = nn.Conv2d(in_channels=3,
                       out_channels=10,
                       kernel_size=3,
                       stride=1,
                       padding=0)

# unsqueeze(dim=0) adds an extra dimension
print(test_image.shape) # original shape
print(test_image.unsqueeze(dim=0).shape) # shape with extra dimension (its' own group)

print(), print()

print(test_image.shape) # original shape
output = conv_layer(test_image.unsqueeze(dim=0)) # output of conv layer
output.shape # output shape
#print(output)

conv_layer_2 = nn.Conv2d(in_channels=3,
                         out_channels=10,
                         kernel_size=(2,2),
                         stride=2.
                         padding=0)


conv_layer_2(test_image.unsqueeze(dim=0)).shape

# Get shapes of weight and bias tensors within conv_layer_2
print(f"conv_layer_2 weight shape: \n{conv_layer_2.weight.shape} -> [out_channels=10, in_channels=3, kernel_size=5, kernel_size=5]")
print(f"\nconv_layer_2 bias shape: \n{conv_layer_2.bias.shape} -> [out_channels=10]")

torch.manual_seed(42)

# getting a better understanding of the convolutional 2d layer in torch
# example using a 2x2 matrix, with a single digit kernel

a = np.arange(4).reshape(2,2)
a = torch.from_numpy(a.astype(np.float32)).unsqueeze(dim=0).unsqueeze(dim=1)
b = nn.Conv2d(in_channels=1,out_channels=1,kernel_size=(1),stride=1,padding=0)

print("---- Original tensor ----")
print(a.shape), print(a), print()

output = b(a)
print("---- New tensor (OUTPUT) ----")
print(output.shape), 
print(output)

print(),print()

print("---- Conv2d layer ----")
print("Weight:")
print(b.weight.shape)
print(b.weight)
print()
print("Bias:")
print(b.bias)

print(),print()
for i in range(2):
    print(f"index {i}: {a[0,0,0,i].detach().numpy()} * {b.weight.detach().numpy().squeeze()} + {b.bias.detach().numpy().squeeze()} = {(a[0,0,0,i] * b.weight + b.bias).detach().numpy().squeeze()}")
    
for i in range(2):
    print(f"index {2 + i}: {a[0,0,1,i].detach().numpy()} * {b.weight.detach().numpy().squeeze()} + {b.bias.detach().numpy().squeeze()} = {(a[0,0,1,i] * b.weight + b.bias).detach().numpy().squeeze()}")

print(f"compared to output: \n{output.detach().numpy()}")
print(), print()


print("Equals the same as the output")
print()
print("Conv2d layer will multiply its' own randomly initialised weights")
#print("If the kernel size is bigger

torch.manual_seed(42)
print("Improving my understanding of the Conv2d layer in torch.")
print("This example demonstrates the calculation of the Conv2d layer,\
\nwhich in this case has a kernel_size matching the input size.")



a = np.arange(4).reshape(2,2)
a = torch.from_numpy(a.astype(np.float32)).unsqueeze(dim=0).unsqueeze(dim=1)
a.requires_grad = False
b = nn.Conv2d(in_channels=1,out_channels=1,kernel_size=((2,2)),stride=1,padding=0)
print(f"\n{b}\n")

print("---- Original tensor ----")
print(a.shape), print(a), print()

output = b(a)
print("---- New tensor (OUTPUT) ----")
print(output.shape), 
print(output)

print(),print()

print("---- Conv2d layer ----")
print("Weight:")
print(b.weight.shape)
print(b.weight)
print()
print("Bias:")
print(b.bias)

new_matrix = []

print(),print()
for i in range(2):
    x = (a[0,0,0,i] * b.weight + b.bias).detach().numpy().squeeze()
    print(f"index {i}: {a[0,0,0,i].detach().numpy()} * {b.weight.detach().numpy().squeeze()} + {b.bias.detach().numpy().squeeze()} = {x}")
    new_matrix.append(x)
for i in range(2):
    x = (a[0,0,1,i] * b.weight + b.bias).detach().numpy().squeeze()
    print(f"index {2 + i}: {a[0,0,1,i].detach().numpy()} * {b.weight.detach().numpy().squeeze()} + {b.bias.detach().numpy().squeeze()} = {x}")
    new_matrix.append(x)
    
print()
print(f"compared to output: \n{output.detach().numpy()} <------- figure out how it gets this output")
print(), print()

print()
print("Conv2d layer will multiply its' own randomly initialised weights")
#print("If the kernel size is bigger


print(f"output: {torch.sum(output).detach().numpy()}")
print(x, np.sum(x))


print(np.multiply(b.weight.detach().numpy().squeeze().reshape(4), a.detach().numpy().squeeze().reshape(4)).sum() + b.bias.detach().numpy())

print(b.weight.detach().numpy().squeeze().reshape(4).T.dot((a.squeeze().reshape(4))))

s = 0
for i in range(4): s += b.weight.detach().numpy().squeeze().reshape(4)[i] * a.detach().numpy().squeeze().reshape(4)[i]  
print(str(s) + "   + bias, this is correct")

print(np.sum(b.weight.detach().numpy().squeeze().dot(a.detach().numpy().squeeze())), end="")
print(" <----- this is wrong")

print("\n\n\
Completely forgot about the bias, aside from the last 2x2.dot(2x2) dot product,\nthe previous calculations are correct\
 it seems to do an element-wise dot product,\
 or it first flattens the input and then does a.T.dot(b) + bias,\nwhich is equivalent to an element-wise dot product?\
 \n\n")


print("Result of the final calculations which mimic the conv2d layer:")


# therefore when the kernel is the same size as the original image, and has a stride of 1,
# the calculation is

# dot product the flatten vectors 
result = b.weight.detach().numpy().squeeze().reshape(4).T.dot(a.detach().numpy().squeeze().reshape(4))
# now add the bias to the result
result += b.bias.detach().numpy()
print(result, output.detach().numpy().squeeze())

torch.manual_seed(42)

# note:
# the layer will have a tensor of weights with the SAME dimensions as the kernel size.
##################

# gain a better understanding of padding in the Conv2d layer.
# few examples
# padding will ADD pixels to the image, and default them as zero,
# before running the calculation on the entire image, including the zeros.

a = np.arange(9).reshape(3,3)
a = torch.from_numpy(a.astype(np.float32)).unsqueeze(dim=0).unsqueeze(dim=1)
b = nn.Conv2d(in_channels=1,out_channels=1,kernel_size=(1,1),stride=(1,1),padding=0) 
print(f"b: {b}"), print()
print(f"w: {b.weight}"), print(f"b: {b.bias}")
print(), print()

print("----- without padding -----")
output = b(a)
print(a), print()
print("Output:\n", output), print()



# copy the weight and bias, since the next example will have different value
weight, bias = b.state_dict()["weight"].data.detach(), b.state_dict()["bias"].data.detach()
print(f"b parameters: {weight}, {bias}")


print(), print()


print("----- with padding (1) -----")
b = nn.Conv2d(in_channels=1,out_channels=1,kernel_size=(3,3),stride=(1,1),padding=0)
#b.weight.data = #weight
b.weight.data = torch.ones((3,3)).unsqueeze(0).unsqueeze(1)
b.bias.data = bias
#b.bias.data = torch.zeros(1)
print(f"b: {b}"), print()
print(f"w: {b.weight}"), print(f"b: {b.bias}")
print(), print()
output = b(a)
print(a), print()
print("Output:\n", output)
print(output.shape)

print()
print()
print("The padding will add zeros to the outside of the image.")
print("The Conv2d layer will then perform the flattened.T dot product")
print("on the values on the outside, then add the bias")
print("In this case the outside values are 0.8300, matching the bias.")
print("If the bias is zero, the outside values will be zero after the calculations.")
print("\n\n")
print("Attempt to replicate kernel_size=(1,1), stride=(1,1), padding=(1)")
print("without the Conv2d layer:")

print("\n\n")

a = a.detach().numpy().squeeze()
print("a:\n", a), print()

weights = torch.ones(3,3).unsqueeze(0).unsqueeze(1)
bias = torch.Tensor([0.8300]) # hard coded values for testing
print(weights, bias)

a = a.flatten().T.dot(weights.squeeze().flatten()) + bias
print(a, "<----- matching the automated result above")

"""

#padding
#a = a.reshape((3,3))
# add padding
a = np.append(a, np.zeros((3,1)), axis=1)
a = np.append(np.zeros((3,1)), a, axis=1)
a = np.append(a, np.zeros((1,5)))
a = np.append(np.zeros((1,5)), a)
a = a.reshape(5,5)
print(a)
"""

torch.manual_seed(42)

# more padding examples and exploration
# (for further understanding)


data = torch.arange(36).type(torch.float32)
data = data.reshape(6,6).unsqueeze(0).unsqueeze(1)
print(data), print()

KERNEL_SIZE = (2,2)
STRIDE = (2,2)

layer1 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=KERNEL_SIZE, stride=STRIDE, padding=0)

# get the first layer's weights
weights, bias = layer1.weight.data, layer1.bias.data

layer2 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=KERNEL_SIZE, stride=STRIDE, padding=2)

# copy it into the second layer's weights
layer2.weight.data = weights
layer2.bias.data = bias

# check if the layers have matching weights
print(torch.equal(layer1.weight.data, layer2.weight.data) and torch.equal(layer1.bias.data, layer2.bias.data)), print()

result1 = layer1(data)
print("result1\n", result1), print()

addpad = nn.ReplicationPad2d(1)
result1_pad = addpad(result1)
print("result1pad\n", result1_pad), print()

result2 = layer2(data)
print("result2\n", result2), print()

print(torch.eq(result1_pad, result2)), print()

#nn.functional.pad(input=result1, pad=(1,1,1,1), value=0)


#########
# now with numpy
print("###### now with numpy\n")

data = nn.functional.pad(input=data, pad=(2,2,2,2), value=0)
data = data.detach().numpy()
weights = weights.detach().numpy()
bias = bias.detach().numpy()


data = data.squeeze()
print(data),print()

output = np.array([])

for x in range(0, 10, 2):
    for y in range(0, 10, 2):
        cur2x2 = data[x:x+2,y:y+2]
        res = cur2x2.flatten().T.dot(weights.flatten()) + bias
        output = np.append(output, res.squeeze())    
        
output = output.reshape(5,5)  
print(output), print()
print(output == result2.detach().numpy().squeeze())

print("\nIt works and matches the previously calculated result2.")
print("Each row is flattened and transposed, then")
print("it is dot product'd with the weights (nxn)=kernel_size")
print("then the bias is added.")

# continuing with learnpytorch.io
# 03: PyTorch Computer Vision

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_2.parameters(), lr=0.1)

torch.manual_seed(42)

epochs = 3


train_model(model_2, loss_fn, optimizer, train_dataloader, 3, 32)

model_2_results = eval_model(model_2, test_dataloader, loss_fn, accuracy_fn, True)

import pandas as pd
final_results = pd.DataFrame([
    model_0_results,
    model_1_results,
    model_2_results])

final_results

final_results.set_index("model_name")["model_accuracy"].plot(kind="barh")
plt.xlabel("accuracy (%)")
plt.ylabel("model")

# return numpy array
def model_do_predictions(model: torch.nn.Module,
                data_loader: torch.utils.data.DataLoader) -> np.ndarray:
    predictions = np.array([])
    model.eval()
    with torch.inference_mode():
        for batch, (X, _) in enumerate(data_loader):
            
            pred = model(X)
            predictions = np.append(predictions, pred.detach().numpy().argmax(axis=1))  
        
    return predictions

test_predictions = model_do_predictions(model_2, test_dataloader)

from torchmetrics import ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix

from sklearn.metrics import confusion_matrix
import seaborn as sn

conf_mat = confusion_matrix(test_predictions, test_data.targets.detach().numpy())

plt.imshow(conf_mat)

df_cm = pd.DataFrame(conf_mat, index=class_names, columns=class_names)

sn.heatmap(df_cm, annot=True, fmt='g', annot_kws={"size": 10}, cmap='Blues')

df_cm