# The training process


1-Get batch from the training set.
#
2-Pass batch to network.
#
3-Calculate the loss (difference between the predicted values and the true values). - LOSS FUNCTION
#
4-Calculate the gradient of the loss function w.r.t the network's weights. - BACK PROPAGATION
#
5-Update the weights using the gradients to reduce the loss.  - OPTIMIZATION ALGO
#
6-Repeat steps 1-5 until one epoch is completed.
#
7-Repeat steps 1-6 for as many epochs required to reach the minimum loss.

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim #gets access for optimizer to update the weights

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth = 120) #display option for output
torch.set_grad_enabled(True) #already on by default

<torch.autograd.grad_mode.set_grad_enabled at 0x120faa7adf0>

In [14]:
print(torch.__version__)
print(torchvision.__version__)

1.6.0
0.7.0


In [28]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [36]:
class Network(nn.Module):
        
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)


    def forward(self, t):
        
        # (1) input layer
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride =2)

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride =2)

        # (4) hidden linear layer
        t = t.reshape(-1, 12*4*4)
        t = self.fc1(t)
        t = F.relu(t)
       
        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t) 

        # (6) ouput layer
        t = self.out(t)
        return t

In [37]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'  
    ,train=True    
    ,download=True 
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [38]:
network = Network()

# Data loader

In [40]:
#train_loader is a variable and pytorch data loader - get ability to get batches from training set 

train_loader = torch.utils.data.DataLoader(train_set, batch_size = 100)
batch = next(iter(train_loader)) #pulling batch from the train_loader
images, labels = batch

# Calculating the loss

During the training process, after we pass a batch to the network, we use the predicted values and the labels to

In [43]:
preds = network(images)
loss = F.cross_entropy(preds, labels) #calculating the loss using cross_entropy loss function  - returns a tensor
loss.item()


2.315242290496826

# Calculating Gradients

In [44]:
print(network.conv1.weight.grad) #no grad in weight tensor

None


In [45]:
loss.backward() #calc the gradients 

#now grads updated for each of weight tensor in the grad attribute in the weights 

In [46]:
network.conv1.weight.grad.shape #grad tensor and wt tensor has the same shape

torch.Size([6, 1, 5, 5])

# Updating the network weight

In [53]:
optimizer = optim.Adam(network.parameters(),  lr = 0.01) #lr is the learning rate
#network parameters are the weights, it can update the weight during training process

In [59]:
loss.item()

2.2583253383636475

In [55]:
get_num_correct(preds, labels) #no of correct predictions 

4

In [60]:
optimizer.step() #updating the weights
#call step on optimizer - step in the direction of the loss fnc min

In [57]:
preds = network(images)
loss = F.cross_entropy(preds, labels)

In [58]:
loss.item()

2.2583253383636475

In [62]:
get_num_correct(preds, labels) #they did go up 

15

# Train Using A Single Batch

In [64]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

batch = next(iter(train_loader)) # Get Batch
images, labels = batch

preds = network(images) # Pass Batch
loss = F.cross_entropy(preds, labels) # Calculate Loss

loss.backward() # Calculate Gradients
optimizer.step() # Update Weights

print('loss1:', loss.item())
preds = network(images)
loss = F.cross_entropy(preds, labels)
print('loss2:', loss.item())

loss1: 2.309319496154785
loss2: 2.283515214920044
