# Libraries

In [8]:
import torch
from torch import nn, optim
from google.colab import drive

!pip install torchinfo
from torchinfo import summary

from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm
import time

drive.mount('/content/drive')
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Inception Model

In [21]:
from torch.nn.modules.dropout import Dropout2d
class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super().__init__()
        self.convblock = nn.Sequential(nn.Conv2d(in_channels, out_channels, bias=False, **kwargs),
                                       nn.BatchNorm2d(out_channels),
                                       nn.ReLU())

    def forward(self, x):
        output = self.convblock(x)
        return output

class InceptionNet(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool):
        super().__init__()
        self.path1 = nn.Sequential(BasicConv2d(in_channels, ch1x1, kernel_size=1))
        self.path2 = nn.Sequential(BasicConv2d(in_channels, ch3x3red, kernel_size=1),
                                   BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1))
        self.path3 = nn.Sequential(BasicConv2d(in_channels, ch5x5red, kernel_size=1),
                                   BasicConv2d(ch5x5red, ch5x5, kernel_size=3, padding=1))
        self.path4 = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
                                   BasicConv2d(in_channels, pool, kernel_size=1))

    def forward(self, x):
        output1 = self.path1(x)
        output2 = self.path2(x)
        output3 = self.path3(x)
        output4 = self.path4(x)
        output = [output1, output2, output3, output4]
        return torch.concat(output, dim=1) # dim=1, num*ch*row*col

class InceptionAux(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.path = nn.Sequential(nn.AvgPool2d(kernel_size=5, stride=3),
                                  BasicConv2d(in_channels, 128, kernel_size=1),
                                  nn.Flatten(start_dim=1),
                                  nn.Linear(2048, 1028, bias=False),
                                  nn.ReLU(),
                                  nn.Dropout2d(p=0.7),
                                  nn.Linear(1028, num_classes))

    def forward(self, x):
        output = self.path(x)
        return output

class InceptionNet_V1(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3)
        self.max1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = BasicConv2d(64, 192, kernel_size=3, stride=1)

        self.max2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.incept3a = InceptionNet(192, 64, 96, 128, 16, 32, 32)
        self.incept3b = InceptionNet(256, 128, 128, 192, 32, 96, 64)

        self.max3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.incept4a = InceptionNet(480, 192, 96, 208, 16, 48, 64)
        self.incept4b = InceptionNet(512, 160, 112, 224, 24, 64, 64)
        self.incept4c = InceptionNet(512, 128, 128, 256, 24, 64, 64)
        self.incept4d = InceptionNet(512, 112, 144, 288, 32, 64, 64)
        self.incept4e = InceptionNet(528, 256, 160, 320, 32, 128, 128)

        self.max4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.incept5a = InceptionNet(832, 256, 160, 320, 32, 128, 128)
        self.incept5b = InceptionNet(832, 384, 192, 384, 48, 128, 128)
        self.avg = nn.AvgPool2d(kernel_size=7, stride=1)
        self.linear = nn.Linear(1024, num_classes)

        if self.training:
            self.aux1 = InceptionAux(512, num_classes)
            self.aux2 = InceptionAux(528, num_classes)
        else:
            self.aux1 = None
            self.axu2 = None


    def forward(self, x):

        x = self.conv1(x)    # 64x112x112
        x = self.max1(x)     # 64x56x56
        x = self.conv2(x)    # 192x56x56

        x = self.max2(x)     # 192x28x28
        x = self.incept3a(x) # 256x28x28
        x = self.incept3b(x) # 480x28x28

        x = self.max3(x)     # 480x14x14
        x = self.incept4a(x) # 512x14x14

        if self.aux1 is not None and self.training:
            aux1 = self.aux1(x)
        x = self.incept4b(x) # 512x14x14
        print('forward working')
        x = self.incept4c(x) # 512x14x14
        x = self.incept4d(x) # 528x14x14

        if self.aux2 is not None and self.training:
            aux2 = self.aux2(x)

        x = self.incept4e(x) # 832x14x14

        x = self.max4(x)     # 832x7x7
        x = self.incept5a(x) # 832x7x7
        x = self.incept5b(x) # 1024x7x7

        x = self.avg(x)      # 1024x1x1
        x = torch.flatten(x, start_dim=1)
        x = torch.dropout(x, p=0.7, train=True)
        x = self.linear(x) # 1000x1x1

        if self.training:
            return aux1, aux2, x
        else:
            return x

In [18]:
a = InceptionNet_V1(1000)
a.train()
summary(a, input_size=(10,3,224,224))

working
forward working


Layer (type:depth-idx)                        Output Shape              Param #
InceptionNet_V1                               [10, 1000]                6,402,320
├─BasicConv2d: 1-1                            [10, 64, 112, 112]        --
│    └─Sequential: 2-1                        [10, 64, 112, 112]        --
│    │    └─Conv2d: 3-1                       [10, 64, 112, 112]        9,408
│    │    └─BatchNorm2d: 3-2                  [10, 64, 112, 112]        128
│    │    └─ReLU: 3-3                         [10, 64, 112, 112]        --
├─MaxPool2d: 1-2                              [10, 64, 56, 56]          --
├─BasicConv2d: 1-3                            [10, 192, 54, 54]         --
│    └─Sequential: 2-2                        [10, 192, 54, 54]         --
│    │    └─Conv2d: 3-4                       [10, 192, 54, 54]         110,592
│    │    └─BatchNorm2d: 3-5                  [10, 192, 54, 54]         384
│    │    └─ReLU: 3-6                         [10, 192, 54, 54]         --
├─M

In [13]:
x = torch.rand([10,3,224,224], dtype=torch.float)
aux1, aux2, output = a(x)



forward working


In [14]:
print(aux1.shape)
print(aux2.shape)
print(output.shape)

torch.Size([10, 1000])
torch.Size([10, 1000])
torch.Size([10, 1000])


# hyper parameters

In [24]:
root = '/content/drive/MyDrive/Colab Notebooks/data'
BATCH_SIZE = 64
LR = 2e-3
LR_STEP = 8
GAMMA = 4e-2
EPOCH = 5
criterion = nn.CrossEntropyLoss()
MOMENTUM = 0.9
num_classes = 1000
dataset = ''

In [25]:
# # 이미지넷을 사용하기에는 데이터가 너무 커서 틀만 만들고 마무리

# transform = transforms.ToTensor()
# train_DS = datasets.ImageNet(root=root, split='train', download=True, transform=transform)
# test_DS = datasets.ImageNet(root=root, split='val', download=True, transform=transform)
# train_DL = torch.utils.data.DataLoader(train_DS, batch_size=BATCH_SIZE, shuffle=True)
# test_DL = torch.utils.data.DataLoader(test_DS, batch_size=BATCH_SIZE, shuffle=True)

# Train and Test

In [None]:
def Train(model, train_DL, criterion, optimizer,
          EPOCH, BATCH_SIZE, TRAIN_RATIO, **kwargs):

    if 'LR_STEP' in kwargs:
        scheduler = StepLR(optimizer, stop_size=kwargs['LR_STEP'], gamma=kwargs['LR_GAMMA'])
    else:
        # scheduler 변수가 참조될 때, 선언이 안되었을 경우에 대비해서 만듬
        scheduler = None

    loss_history = {'train': [], 'val': []}
    acc_history = {'train': [], 'val': []}
    best_loss = 9999

    for ep in range(EPOCH):
        epoch_start = time.time()
        current_lr = optimizer.param_groups[0]['lr']
        print(f'Epoch: {ep+1}, current_LR = {current_lr}')

        model.train()
        train_loss, train_acc, _ = loss_epoch(model, train_DL, criterion, optimizer)
        loss_history['train'] += [train_loss]
        acc_history['train'] += [train_acc]

        for x_batch, y_batch in train_DL:
            x_batch = x_batch.to(DEVICE)
            y_batch = y_batch.to(DEVICE)
            # inference
            aux1, aux2, y_hat = model(x_batch)
            # loss
            loss = criterion(y_hat, y_batch) + 0.3*criterion(aux1, y_batch) + 0.3*criterion(aux2, y_batch)
            # update
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # loss accumulation
            loss_b = loss.item() * x_batch.shape[0]
            rloss += loss_b

        # print loss
        loss_e = rloss/NoT
        loss_history += [loss_e]
        print(f"Epoch: {ep+1}, train loss: {round(loss_e,3)}")
        print("-"*20)

    return loss_history

def loss_epoch(model, DL, criterion, optimizer=None):
    N = len(DL.dataset)
    eloss = 0; correct = 0

    for x_batch, y_batch in tqdm(DL, leave=False):
        x_batch = x_batch.to(DEVICE); y_batch = y_batch.to(DEVICE)
        # inference
        aux1, aux2, y_hat = model(x_batch)
        # loss
        loss = loss = criterion(y_hat, y_batch) + 0.3*criterion(aux1, y_batch) + 0.3*criterion(aux2, y_batch)
        # update
        if model.training and optimizer is not None:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # loss accumulateion
        bloss = loss.item * x_batch.shape[0]
        sloss += bloss
        # correct
        correct += torch.sum(y_batch == y_hat.argmax(dim=1).item())

    eloss = sloss/N
    accuracy = correct/N * 100

    return eloss, accuracy, correct

In [22]:
model = InceptionNet_V1(num_classes)

In [23]:
optimizer = optim.SGD(model.parameters(), lr=LR, momentum = MOMENTUM)

In [30]:
x = torch.tensor([1.], requires_grad=True)
y = x**2
z = 3*y

y.backward()
print(x.grad)

tensor([2.])
