<font size=+4><b>TP CNN with MNIST - Using PyTorch</b></font>

In [26]:
import torch
# all nn libraries nn.layer, convs and loss functions
import torch.nn as nn
import time
# visualisation
import torchvision
from torchvision import transforms
from torchview import draw_graph

In [16]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

## 1. Preparing dataset

##### 1.1. Transformation of data

In [17]:
data_transform = {
    "train": transforms.Compose([transforms.ToTensor(),
                                 transforms.Normalize((0.5,), (0.5,))]),
    "test": transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5,), (0.5,))])}

In [37]:
train_dataset = torchvision.datasets.MNIST(root='../data',train=True,
                                        download=True, transform=data_transform["train"])
test_dataset = torchvision.datasets.MNIST(root='../data',train=False,
                                       download=True,transform=data_transform["test"])

In [44]:
trainDataset, valDataset = torch.utils.data.random_split(train_dataset, [0.8, 0.2])

In [51]:
len(valDataset)

12000

In [49]:
train_loader = torch.utils.data.DataLoader(
    trainDataset,
    batch_size=100, shuffle=True)

val_loader = torch.utils.data.DataLoader(
    valDataset,
    batch_size=100, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=1000, shuffle=True)

In [50]:
examples = enumerate(train_loader)
batch_idx, (example_data, example_targets) = next(examples)

## 2. Build GoogleNet - Inception_V1

#### ConvolutionBlock

In [20]:
class Convolution2D(nn.Module):
    """
    Creates a convolution 2D layers by BatchNorm and ReLU without bias
    
    Args:
        in_channels (int): input channel of convolution layer
        out_channels (int): output channel of convolution layer
        **kwargs
    """
    
    def __init__(self, in_channels, out_channels, **kwargs) -> None:
        super(Convolution2D, self).__init__()
        
        # Conv2D
        self.conv2d = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, **kwargs)
        
        # Batchnorm
        self.batchnorm2d = nn.BatchNorm2d(out_channels)

        # ReLU layer
        self.relu = nn.ReLU(inplace= True)
    def forward(self, x):
        return self.relu(self.batchnorm2d(self.conv2d(x)))


def testConvBlock():
    x = torch.randn(64,1,28,28)
    model = Convolution2D(1,3,kernel_size = 3, stride = 1, padding = 1)
    print(model(x).shape)
    del model

testConvBlock()

torch.Size([64, 3, 28, 28])


#### Inception Block

In [21]:
class Inception(nn.Module):
    '''

    Building block of inception-v1 architecture. creates following 4 branches and concatenate them
    -  branch1: 1x1 conv
    -  branch2: 1x1 conv followed by 3x3 conv
    -  branch3: 1x1 conv followed by 5x5 conv
    -  branch4: Maxpool2d followed by 1x1 conv

    Args:
       in_channels (int) : # of input channels
       out_1x1 (int) : number of output channels for branch 1
       red_3x3 (int) : reduced 3x3 referring to output channels of 1x1 conv just before 3x3 in branch2
       out_3x3 (int) : number of output channels for branch 2
       red_5x5 (int) : reduced 5x5 referring to output channels of 1x1 conv just before 5x5 in branch3
       out_5x5 (int) : number of output channels for branch 3
       out_1x1_pooling (int) : number of output channels for branch 4

    '''
    def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1_pooling) -> None:
        super(Inception, self).__init__()
        
        # Branch 1  
        self.branch1 = Convolution2D(in_channels=in_channels, out_channels=out_1x1, kernel_size = 1, stride = 1, padding = 0)                               #conv 1x1
        
        # Branch 2
        self.branch2 = nn.Sequential(Convolution2D(in_channels=in_channels, out_channels=red_3x3, kernel_size = 1, stride = 1, padding = 0),                #conv 1x1
                                     Convolution2D(in_channels=red_3x3, out_channels=out_3x3, kernel_size = 3, stride = 1, padding = 1))                    #conv 3x3
        
        # Branch 3
        self.branch3 = nn.Sequential(Convolution2D(in_channels=in_channels, out_channels=red_5x5, kernel_size = 1, stride = 1, padding = 0),                #conv 1x1
                                     Convolution2D(in_channels=red_5x5, out_channels=out_5x5, kernel_size = 5, stride = 1, padding = 2))                    #conv 5x5
        
        # Branch 4
        self.branch4 = nn.Sequential(nn.MaxPool2d(kernel_size = 3, stride = 1, padding = 1),                                                                #maxpool 3x3
                                     Convolution2D(in_channels=in_channels, out_channels=out_1x1_pooling, kernel_size = 1, stride = 1, padding = 0))        #conv 1x1
        
    def forward(self, x):
        return torch.cat([self.branch1(x), self.branch2(x), self.branch3(x), self.branch4(x)], dim=1)

In [22]:
def testInceptionBlock():
    x = torch.randn((32,1,28,28))
    model = Inception(1,64,96,128,16,32,32)
    print(model(x).shape)
    return model

model = testInceptionBlock()

architecture = 'InceptionBlock'
model_graph = draw_graph(model, input_size=(1,1,28,28), graph_dir ='TB' , roll=True, expand_nested=True, graph_name=f'self_{architecture}',save_graph=True,filename=f'self_{architecture}')
# model_graph.visual_graph

torch.Size([32, 256, 28, 28])





- Building the early softmax output in order to avoid the vanishing gradient

#### EarlyOutput

In [31]:
class EarlyOutput(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(EarlyOutput, self).__init__()
        self.averagePool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.conv = Convolution2D(in_channels, 128, kernel_size=1)

        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        
        self.dropout = nn.Dropout(p=0.5)
        self.relu = nn.ReLU(inplace= True)
    def forward(self, x):
        
        x = self.averagePool(x)       
        x = self.conv(x)       
        x = torch.flatten(x, 1)
        # x = torch.nn.functional.dropout(x, 0.5, training=self.training)             
        # x = torch.nn.functional.relu(self.fc1(x), inplace=True)
        # x = torch.nn.functional.dropout(x, 0.5, training=self.training)      
        x = self.dropout(x)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)  
        x = self.fc2(x)
        
        return x

#### Inception_V1

In [None]:
class Inception_V1(nn.Module):
    '''
    Building the inceptionv1 architecture. Using testInceptionv1 to evaluate the dimensions of output after each layer and deciding the padding number.

    Args:
        in_channels (int) : input channels. 3 for RGB image
        num_classes : number of classes of training dataset

    '''
    
    def __init__(self, in_channels, num_class, init_weights = False) -> None:
        super(Inception_V1, self).__init__()
        
        self.conv1 = Convolution2D(in_channels=in_channels, out_channels=64, kernel_size = 7, stride = 2, padding = 3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride = 2, padding=1)
        
        self.conv2 = nn.Sequential(Convolution2D(in_channels=64, out_channels=64, kernel_size = 1, stride = 1, padding = 0),
                                   Convolution2D(in_channels=64, out_channels=192, kernel_size = 3, stride = 1, padding = 1))
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride = 2, padding=1)
        
        # in_channels , out_1x1 , red_3x3 , out_3x3 , red_5x5 , out_5x5 , out_1x1_pooling
        self.inception_3a = Inception(192, 64, 96, 128, 16, 32, 32)
        self.inception_3b = Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride = 2, padding=1)
        
        self.inception_4a = Inception(480, 192, 96, 208, 16, 48, 64)
        self.inception_4b = Inception(512, 160, 112, 224, 24, 64, 64)
        self.inception_4c = Inception(512, 128, 128, 256, 24, 64, 64)
        self.inception_4d = Inception(512, 112, 144, 288, 32, 64, 64)
        self.inception_4e = Inception(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride = 2, padding=1)
        
        self.inception_5a = Inception(832, 256, 160, 320, 32, 128, 128)
        self.inception_5b = Inception(832, 384, 192, 384, 48, 128, 128)
        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
        
        self.dropout = nn.Dropout(p=0.4)
        self.fc1 = nn.Linear(1024, num_class)
        
        self.early1 = EarlyOutput(512, num_class)         # After inception 4a
        self.early2 = EarlyOutput(528, num_class)         # After inception 4d
        
        if init_weights:
            self.weight_initialize()
            
    def forward(self, x):
        x = self.conv1(x)
        print('conv1',x.shape)
        x = self.maxpool1(x)
        print('maxpool1',x.shape)

        x = self.conv2(x)
        print('conv2',x.shape)
        x = self.maxpool2(x)
        print('maxpool2',x.shape)

        x = self.inception_3a(x)
        print('3a',x.shape)
        x = self.inception_3b(x)
        print('3b',x.shape)
        x = self.maxpool3(x)
        print('3bmax',x.shape)

        x = self.inception_4a(x)
        print('4a',x.shape)
        
        if self.training:
            early1 = self.early1(x)
        
        x = self.inception_4b(x)
        print('4b',x.shape)
        x = self.inception_4c(x)
        print('4c',x.shape)
        x = self.inception_4d(x)
        print('4d',x.shape)
        
        if self.training:
            early2 = self.early2(x)
        
        x = self.inception_4e(x)
        print('4e',x.shape)
        x = self.maxpool4(x)
        print('maxpool',x.shape)

        x = self.inception_5a(x)
        print('5a',x.shape)
        x = self.inception_5b(x)
        print('5b',x.shape)

        x = self.avgpool(x)
        print('AvgPool',x.shape)
        
        x = torch.flatten(x, start_dim=1)
        x = self.dropout(x)
        x = self.fc1(x)

        if self.training:
            return x, early2, early1

        return x
    
    def weight_initialize(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

In [32]:
# def testInceptionv1():
x = torch.randn((32,3,224,224))
model = Inception_V1(3,1000)
# print(model(x).shape)
# return model
# model = testInceptionv1()



architecture = 'googlenet'
model_graph = draw_graph(model, input_size=(1,3,224,224), roll=True, expand_nested=True, graph_name=f'self_{architecture}',save_graph=True,filename=f'self_{architecture}')
# model_graph.visual_graph


conv1 torch.Size([1, 64, 112, 112])
maxpool1 torch.Size([1, 64, 56, 56])
conv2 torch.Size([1, 192, 56, 56])
maxpool2 torch.Size([1, 192, 28, 28])
3a torch.Size([1, 256, 28, 28])
3b torch.Size([1, 480, 28, 28])
3bmax torch.Size([1, 480, 14, 14])
4a torch.Size([1, 512, 14, 14])
4b torch.Size([1, 512, 14, 14])
4c torch.Size([1, 512, 14, 14])
4d torch.Size([1, 528, 14, 14])
4e torch.Size([1, 832, 14, 14])
maxpool torch.Size([1, 832, 7, 7])
5a torch.Size([1, 832, 7, 7])
5b torch.Size([1, 1024, 7, 7])
AvgPool torch.Size([1, 1024, 1, 1])





#### Training part

In [None]:
train_loss = []
train_acc = []
net = Inception_V1(in_channels=1, num_class=10, init_weights=True)
net.to(device)

loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr= 0.005)

epochs = 50
start_time = time.time()

for epoch in range(epochs):
    net.train()
    training_loss = 0.0
    corrected_data = 0
    
    for step, data in enumerate(train_loader, start=0):
        images, labels = data
        optimizer.zero_grad()
        final_outputs, early_outputs2, early_outputs1 = net(images.to(device))
        _, y_pred = torch.max(final_outputs, dim=1)
        loss_final = loss_function(final_outputs, labels.to(device))   
        loss_early2 = loss_function(early_outputs2, labels.to(device))   
        loss_early1 = loss_function(early_outputs1, labels.to(device))
        total_loss = loss_final + 0.3 * loss_early2 + 0.3 * loss_early1
        total_loss.backward()
        optimizer.step()   
        
        training_loss += total_loss.item()
        corrected_data += (y_pred == labels.to(device)).sum().item()
        
        rate = (step + 1) / len(train_loader)
        a = "*" * int(rate * 50)
        b = "." * int((1 - rate) * 50)
        print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
        print()
        accurate_train = corrected_data / len(train_dataset)
        train_loss.append(training_loss / len(train_loader))
        train_acc.append(accurate_train)
        
    net.eval()
    training_loss = 0.0
    corrected_data = 0
    

data:  [tensor([[[[-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          ...,
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.]]],


        [[[-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          ...,
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.]]],


        [[[-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          ...,
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.]]],


        ...,


        [[[-1., -1., -1.,  ..., -1., -1., -1.],
        

KeyboardInterrupt: 

In [35]:
# Example of target with class indices
# loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
print(input)
print(target)
# output = loss(input, target)
# output.backward()
# Example of target with class probabilities
# input = torch.randn(3, 5, requires_grad=True)
# target = torch.randn(3, 5).softmax(dim=1)
# output = loss(input, target)
# output.backward()

tensor([[ 1.2323, -0.5034, -0.6367,  0.3553, -0.4628],
        [ 0.7688,  0.7587,  1.1700,  2.0688, -1.1804],
        [-1.7970,  0.4634,  0.4803,  0.4655, -1.0734]], requires_grad=True)
tensor([0, 4, 0])
