https://deeplizard.com/learn/video/0LhiS6yu2qQ


In [29]:
import torch 
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [30]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)
train_loader = torch.utils.data.DataLoader(train_set
    ,batch_size=100
    ,shuffle=True
)

In [31]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)#linear, dense, and fully connected layer all are same
        self.out = nn.Linear(in_features=60, out_features=10)
    def forward(self, t):
    # (1) input layer
        t = t

        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (4) hidden linear layer
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)

        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)

        # (6) output layer
        t = self.out(t)
        #t = F.softmax(t, dim=1)

        return t

In [32]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [33]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

for epoch in range(10):
    
    total_loss = 0
    total_correct = 0
    
    for batch in train_loader: # Get Batch
        images, labels = batch 

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print(
        "epoch", epoch, 
        "total_correct:", total_correct, 
        "loss:", total_loss
    )

epoch 0 total_correct: 46798 loss: 348.02489763498306
epoch 1 total_correct: 51010 loss: 242.41272142529488
epoch 2 total_correct: 51756 loss: 221.96503888070583
epoch 3 total_correct: 52254 loss: 210.60134153068066
epoch 4 total_correct: 52312 loss: 206.92148055136204
epoch 5 total_correct: 52545 loss: 201.33067940175533
epoch 6 total_correct: 52681 loss: 197.51400363445282
epoch 7 total_correct: 52761 loss: 195.38509641587734
epoch 8 total_correct: 52869 loss: 193.54135808348656
epoch 9 total_correct: 52868 loss: 191.56061612069607


In [34]:
len(train_set)

60000

In [35]:
len(train_set.targets)

60000

# Building a Function to get Predictions for ALL Samples

We'll create a function called get_all_preds(), and we'll pass a model and a data loader. The model will be used to obtain the predictions, and the data loader will be used to provide the batches from the training set.

Note at the top, we have annotated the function using the @torch.no_grad() PyTorch decoration. This is because we want this functions execution to omit gradient tracking.

This is because gradient tracking uses memory, and during inference (getting predictions while not training) there is no need to keep track of the computational graph. The decoration is one way of locally turning off the gradient tracking feature while executing specific functions

We specifically need the gradient calculation feature anytime we are going to calculate gradients using the backward() function. Otherwise, it is a good idea to turn it off because having it off will reduce memory consumption for computations, e.g. when we are using networks for predicting (inference).

In [36]:
@torch.no_grad()
def get_all_preds(model, loader):
    all_preds = torch.tensor([])
    for batch in loader:
        images, labels = batch

        preds = model(images)
        all_preds = torch.cat(
            (all_preds, preds)
            ,dim=0
        )
    return all_preds


We can disable gradient computations for specific or local spots in our code, e.g. like what we just saw with the annotated function. As another example, we can use Python's with context manger keyword to specify that a specify block of code should exclude gradient computations.

Both of these options are valid. Let's keep both of these and get our predictions.

In [37]:
with torch.no_grad():
    prediction_loader = torch.utils.data.DataLoader(train_set, batch_size=10000)
    train_preds = get_all_preds(network, prediction_loader)

In [38]:
preds_correct = get_num_correct(train_preds, train_set.targets)
print('total correct:', preds_correct)
print('accuracy:', preds_correct / len(train_set))

total correct: 52979
accuracy: 0.8829833333333333


# Building the Confusion Matrix

In [39]:
train_set.targets

tensor([9, 0, 0,  ..., 3, 0, 5])

In [40]:
train_preds.argmax(dim=1)

tensor([9, 0, 0,  ..., 3, 0, 5])

In [41]:
# Let's stack these two tensors along the second dimension so we can have 60,000 ordered pairs.

stacked = torch.stack(
    (
        train_set.targets
        ,train_preds.argmax(dim=1)
    )
    ,dim=1
)

In [42]:
stacked.shape

torch.Size([60000, 2])

In [43]:
stacked

tensor([[9, 9],
        [0, 0],
        [0, 0],
        ...,
        [3, 3],
        [0, 0],
        [5, 5]])

Now, we can iterate over these pairs and count the number of occurrences at each position in the matrix. Let's create the matrix. Since we have ten prediction categories, we'll have a ten by ten matrix

In [44]:
cmt = torch.zeros(10,10, dtype=torch.int64)

In [45]:
cmt

tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [46]:
#Now, we'll iterate over the prediction-target pairs 
#and add one to the value inside the matrix each time the particular position occurs.

for p in stacked:
    tl, pl = p.tolist()
    cmt[tl, pl] = cmt[tl, pl] + 1

In [47]:
cmt

tensor([[5286,    6,   43,   92,   15,    1,  513,    0,   44,    0],
        [  22, 5856,    2,   88,   12,    0,   13,    0,    7,    0],
        [ 102,    1, 4396,   63,  903,    1,  512,    0,   22,    0],
        [ 287,   32,    4, 5341,  182,    1,  145,    0,    6,    2],
        [  10,    4,  221,  175, 5180,    0,  383,    0,   27,    0],
        [   1,    0,    0,    0,    0, 5557,    3,  332,   26,   81],
        [1012,    3,  379,  102,  525,    0, 3897,    1,   81,    0],
        [   0,    0,    0,    0,    1,   13,    0, 5762,   12,  212],
        [  13,    4,   14,   21,   20,    7,   40,    2, 5874,    5],
        [   0,    0,    0,    0,    0,   11,    0,  154,    5, 5830]])

# Plotting the Confusion Matrix

In [52]:
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
from resources.plotcm import plot_confusion_matrix

ModuleNotFoundError: No module named 'resources.plotcm'

In [50]:
cm = confusion_matrix(train_set.targets, train_preds.argmax(dim=1))
print(type(cm))
cm

<class 'numpy.ndarray'>


array([[5286,    6,   43,   92,   15,    1,  513,    0,   44,    0],
       [  22, 5856,    2,   88,   12,    0,   13,    0,    7,    0],
       [ 102,    1, 4396,   63,  903,    1,  512,    0,   22,    0],
       [ 287,   32,    4, 5341,  182,    1,  145,    0,    6,    2],
       [  10,    4,  221,  175, 5180,    0,  383,    0,   27,    0],
       [   1,    0,    0,    0,    0, 5557,    3,  332,   26,   81],
       [1012,    3,  379,  102,  525,    0, 3897,    1,   81,    0],
       [   0,    0,    0,    0,    1,   13,    0, 5762,   12,  212],
       [  13,    4,   14,   21,   20,    7,   40,    2, 5874,    5],
       [   0,    0,    0,    0,    0,   11,    0,  154,    5, 5830]])

In [53]:
from plotcm import plot_confusion_matrix

ModuleNotFoundError: No module named 'plotcm'

In [None]:
names = (
    'T-shirt/top'
    ,'Trouser'
    ,'Pullover'
    ,'Dress'
    ,'Coat'
    ,'Sandal'
    ,'Shirt'
    ,'Sneaker'
    ,'Bag'
    ,'Ankle boot'
)
> plt.figure(figsize=(10,10))
> plot_confusion_matrix(cm, names)