In [1]:
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class CNN(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=6, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride = (2,2))
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.fc1 = nn.Linear(16*7*7, num_classes)
        self.initialize_weights()

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        
        return x
    
    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_uniform_(m.weight)
                
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)

In [3]:
model = CNN(in_channels=3,num_classes=10)
    
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[[[-0.4067,  0.2057,  0.2564],
          [-0.2316, -0.3393, -0.1873],
          [ 0.3620,  0.1881,  0.3866]],

         [[-0.3538,  0.2011, -0.0127],
          [ 0.2834, -0.1071, -0.2290],
          [-0.1022,  0.2650, -0.1552]],

         [[ 0.1053, -0.0792,  0.2082],
          [ 0.1035,  0.1869, -0.1288],
          [ 0.2265,  0.4284, -0.0118]]],


        [[[-0.1061,  0.4519, -0.4632],
          [ 0.3361, -0.4315, -0.0564],
          [ 0.3939,  0.2048, -0.1372]],

         [[ 0.4195, -0.2691,  0.0321],
          [-0.4083, -0.1920, -0.0708],
          [-0.4428,  0.0920,  0.4047]],

         [[ 0.2877,  0.1767,  0.4338],
          [-0.2054,  0.0160, -0.1153],
          [ 0.0926, -0.0484, -0.2973]]],


        [[[ 0.3534,  0.3302, -0.1137],
          [ 0.2720, -0.0982, -0.4061],
          [ 0.1347,  0.1247, -0.0497]],

         [[-0.2471,  0.3199, -0.1784],
          [ 0.0222,  0.1091,  0.1778],
          [ 0.3753,  0.4514,  0.3771]],

         [[ 0.4664,  0

In [4]:
for module in model.modules():
    print(module)

CNN(
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=784, out_features=10, bias=True)
)
Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Linear(in_features=784, out_features=10, bias=True)


In [5]:
print(model.modules)

<bound method Module.modules of CNN(
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=784, out_features=10, bias=True)
)>


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
criterion = nn.CrossEntropyLoss() 
learning_rate = 1e-3
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Note a learning rate scheduler decreases the learning rate if the global min hasn't changed for a number of epochs
# Decreases the LR 0.1 times if the LR hasn't changed for 5 epochs
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=0.1, patience=5, verbose=True)

In [None]:
# During Training

for epoch in range(1, num_epochs):
    losses = []
    
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.reshape(data.shape[0], -1)
        data = data.to(device=device)
        targets = targets.to(device=device)
        
        scores = model(data)
        loss = criterion(scores, targets)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    # Calculating mean loss after each epoch
    mean_loss = sum(losses)/len(losses)
    
    # Rate scheduler here will change the LR if the mean_loss is same for 5 times
    # Change by 'factor'
    # After 'patience' number of epochs 
    scheduler.step(mean_loss)
    print(f'Cost at epoch {epoch} is {mean_loss}')
