In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim 
import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth = 120) 
torch.set_grad_enabled(True) 

<torch.autograd.grad_mode.set_grad_enabled at 0x141d06b50a0>

In [2]:
print(torch.__version__)
print(torchvision.__version__)

1.6.0
0.7.0


In [4]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [5]:
class Network(nn.Module):
        
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)


    def forward(self, t):
        
        # (1) input layer
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride =2)

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride =2)

        # (4) hidden linear layer
        t = t.reshape(-1, 12*4*4)
        t = self.fc1(t)
        t = F.relu(t)
       
        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t) 

        # (6) ouput layer
        t = self.out(t)
        return t

In [8]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'  
    ,train=True    
    ,download=True 
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

# Training with single batch : Review 

In [11]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
#
optimizer = optim.Adam(network.parameters(), lr=0.01)
#
batch = next(iter(train_loader)) # Get Batch

#
images, labels = batch
#
preds = network(images) # Pass Batch
#
loss = F.cross_entropy(preds, labels) # Calculate Loss
#
loss.backward() # Calculate Gradients
#
optimizer.step() # Update Weights
#
print('loss1:', loss.item())
#
preds = network(images)
#
loss = F.cross_entropy(preds, labels)
#
print('loss2:', loss.item())

loss1: 2.3131484985351562
loss2: 2.2871596813201904


# Training with all batches : a single epoch

In [19]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

total_loss = 0
total_correct = 0

#batch = next(iter(train_loader)) # Get a single batch
for batch in train_loader: #get batch
    images, labels = batch

    preds = network(images) #pass batch
    loss = F.cross_entropy(preds, labels) #calculate loss
    
    optimizer.zero_grad() #pytorch accumulates the grad after each pass of the batch so we make sure it's zero
    loss.backward() #calc gradients 
    #After we call the backward() method on our loss tensor, the gradients will be calculated and added to grad attibutes of our network's parameters.
    optimizer.step() #update weights #each time we pass a batch the weights are updates, so if we have 100 batches, it updates 100 times and takes 100 steps towards the minimum of the loss function

    total_loss += loss.item()
    total_correct += get_num_correct(preds, labels)

print('epoch :', 0, 'total_correct:', total_correct, 'loss :', total_loss)

epoch : 0 total_correct: 47767 loss : 321.65259540081024


In [16]:
total_correct/len(train_set)

0.11791666666666667

# Training with multiple epochs : The complete training loop

In [22]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

for epoch in range(5):
    
    total_loss = 0
    total_correct = 0
    
#batch = next(iter(train_loader)) # Get a single batch

    for batch in train_loader: #get batch
        images, labels = batch

        preds = network(images) #pass batch
        loss = F.cross_entropy(preds, labels) #calculate loss
    
        optimizer.zero_grad() #pytorch accumulates the grad after each pass of the batch so we make sure it's zero
        loss.backward() #calc gradients 
        optimizer.step() #update weights #each time we pass a batch the weights are updates, so if we have 100 batches, it updates 100 times and takes 100 steps towards the minimum of the loss function

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print('epoch :', epoch, 'total_correct:', total_correct, 'loss :', total_loss)

epoch : 0 total_correct: 47550 loss : 330.71892105042934
epoch : 1 total_correct: 51445 loss : 230.24427154660225
epoch : 2 total_correct: 52070 loss : 213.1625354886055
epoch : 3 total_correct: 52544 loss : 202.1854057163
epoch : 4 total_correct: 52701 loss : 196.6302040219307


In [None]:
total_correct/len(train_set)