#code for compare base model, residual model, residual+bottenlock model

#import module

In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import tensorflow as tf
import torchvision
import torchvision.transforms as transforms

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# three blocks for compare

In [2]:
class Residual_Block(nn.Module):
    def __init__(self, in_dim, mid_dim, out_dim):
        super(Residual_Block,self).__init__()
        # Residual Block
        self.residual_block = nn.Sequential(
                nn.Conv2d(in_dim, mid_dim, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.Conv2d(mid_dim, out_dim, kernel_size=3, padding=1),
            )            
        self.relu = nn.ReLU()
                  
    def forward(self, x):
        out = self. residual_block(x)  # F(x)
        out = out + x  # F(x) + x
        out = self.relu(out)
        return out

In [3]:
class Residual_Block_with_bottenlock(nn.Module):
    def __init__(self, in_dim, mid_dim, out_dim):
        super(Residual_Block_with_bottenlock,self).__init__()
        # Residual Block
        self.residual_block = nn.Sequential(
                nn.Conv2d(in_dim, mid_dim, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(mid_dim,mid_dim, kernel_size=3, padding=1),
                #nn.ReLu(),
                nn.Conv2d(mid_dim, out_dim, kernel_size=1),
            )            
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self. residual_block(x)  # F(x)
        out = out + x  # F(x) + x
        out = self.relu(out)
        return out

In [4]:
class Base_Block(nn.Module):
    def __init__(self, in_dim, mid_dim, out_dim):
        super(Base_Block,self).__init__()
        # Residual Block
        self.residual_block = nn.Sequential(
                nn.Conv2d(in_dim, mid_dim, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.Conv2d(mid_dim, out_dim, kernel_size=3, padding=1),
            )            
        self.relu = nn.ReLU()
                  
    def forward(self, x):
        out = self. residual_block(x)  # F(x)
        out = out  # F(x)
        out = self.relu(out)
        return out

#build model

In [17]:
class Base_Model(nn.Module):
  def __init__(self,selected_block='base'):
    super(Base_Model,self).__init__()
    self.selected_block = selected_block
    if(self.selected_block=="base"):
      self.layer1 = Base_Block(1,4,4) #28x28 ->28x28
      self.layer2 = Base_Block(4,4,4) #14x14 -> 14x14
      self.layer3 = Base_Block(4,4,4) #7x7 -> 7x7
      self.layer4 = Base_Block(4,4,4) #3x3 -> 3x3
    elif(self.selected_block=='residual'):
      self.layer1 = Residual_Block(1,4,4)
      self.layer2 = Residual_Block(4,4,4)
      self.layer3 = Residual_Block(4,4,4)
      self.layer4 = Residual_Block(4,4,4)
    else:
      self.layer1 = Residual_Block_with_bottenlock(1,4,4)
      self.layer2 = Residual_Block_with_bottenlock(4,4,4)
      self.layer3 = Residual_Block_with_bottenlock(4,4,4)
      self.layer4 = Residual_Block_with_bottenlock(4,4,4)
    
    self.last_layer = nn.Linear(36,10)
    
  def forward(self, x):
      out = self.layer1(x)
      #print(out.size())
      out = nn.AvgPool2d(2)(out)
      #print(out.size())
      out = self.layer2(out)
      #print(out.size())
      out = nn.AvgPool2d(2)(out)
      #print(out.size())
      out = self.layer3(out)
      #print(out.size())
      out = nn.AvgPool2d(2)(out)
      #print(out.size())
      out = self.layer4(out)
      #print(out.size())
      out = out.view(16,-1) #64 is batch_Size
      out = self.last_layer(out)
      return out

#load data

In [6]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

In [7]:
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=16,
                                         shuffle=False, num_workers=2)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



#see result

In [16]:
def eval_model(model,criterion,optimizer):
  num_epochs=5
  for epoch in range(num_epochs):
      # Train the model for one epoch
      model.train()
      train_loss = 0.0
      train_correct = 0
      train_total = 0
      for inputs, labels in trainloader:
          inputs, labels = inputs.to(device), labels.to(device)
          optimizer.zero_grad()
          outputs = model(inputs)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          # Update the training loss and accuracy
          train_loss += loss.item() * inputs.size(0)
          _, predicted = torch.max(outputs.data, 1)
          train_correct += (predicted == labels).sum().item()
          train_total += labels.size(0)

      # Compute the validation accuracy
      model.eval()
      val_correct = 0
      val_total = 0
      with torch.no_grad():
          for inputs, labels in testloader:
              inputs, labels = inputs.to(device), labels.to(device)
              outputs = model(inputs)
              _, predicted = torch.max(outputs.data, 1)
              val_correct += (predicted == labels).sum().item()
              val_total += labels.size(0)
      val_acc = val_correct / val_total

      # Print the training loss and validation accuracy for this epoch
      train_loss = train_loss / len(trainloader.dataset)
      train_acc = train_correct / train_total
      print(f'Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.4f} Train Acc: {train_acc:.4f} Val Acc: {val_acc:.4f}')

In [None]:
import time
model = Base_Model('base').to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

start = time.time()
eval_model(model,criterion,optimizer)
fin = time.time()
print(f"total time: {fin-start:.2f} second")

Epoch 1/5: Train Loss: 2.3018 Train Acc: 0.1114 Val Acc: 0.1135
Epoch 2/5: Train Loss: 2.3014 Train Acc: 0.1124 Val Acc: 0.1135
Epoch 3/5: Train Loss: 2.3014 Train Acc: 0.1124 Val Acc: 0.1135
Epoch 4/5: Train Loss: 2.3014 Train Acc: 0.1124 Val Acc: 0.1135
Epoch 5/5: Train Loss: 2.3013 Train Acc: 0.1124 Val Acc: 0.1135
total time: 205.11 second


In [None]:
import time
model = Base_Model('residual').to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

start = time.time()
eval_model(model,criterion,optimizer)
fin = time.time()
print(f"total time: {fin-start:.2f} second")

Epoch 1/5: Train Loss: 0.9464 Train Acc: 0.6869 Val Acc: 0.9262
Epoch 2/5: Train Loss: 0.2057 Train Acc: 0.9369 Val Acc: 0.9536
Epoch 3/5: Train Loss: 0.1421 Train Acc: 0.9563 Val Acc: 0.9639
Epoch 4/5: Train Loss: 0.1120 Train Acc: 0.9647 Val Acc: 0.9683
Epoch 5/5: Train Loss: 0.0995 Train Acc: 0.9684 Val Acc: 0.9742
total time: 211.52 second


In [None]:
import time
model = Base_Model('residual_with_bottenlock').to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

start = time.time()
eval_model(model,criterion,optimizer)
fin = time.time()
print(f"total time: {fin-start:.2f} second")

Epoch 1/5: Train Loss: 1.1405 Train Acc: 0.6259 Val Acc: 0.8550
Epoch 2/5: Train Loss: 0.4335 Train Acc: 0.8647 Val Acc: 0.8951
Epoch 3/5: Train Loss: 0.3531 Train Acc: 0.8928 Val Acc: 0.9051
Epoch 4/5: Train Loss: 0.3136 Train Acc: 0.9019 Val Acc: 0.9034
Epoch 5/5: Train Loss: 0.2917 Train Acc: 0.9077 Val Acc: 0.9148
total time: 223.22 second


In [None]:
model = Base_Model('base').to(device)
num_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {num_params}")

Number of parameters: 1446


In [None]:
model = Base_Model('residual').to(device)
num_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {num_params}")

Number of parameters: 1446


In [None]:
model = Base_Model('residual_with_bottenlock').to(device)
num_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {num_params}")

Number of parameters: 1110


residual구조가 없는 base model은 8개라는 적은 층에도 불구하고 학습이 이루어지지 않음.

모델이 작기 때문에 bottenlock 구조와 residual구조의 파라미터 개수 차이가 적어 유의미한 속도 차이가 없는 것으로 보임

#additional research

https://arxiv.org/abs/1605.06431

Residual Networks Behave Like Ensembles of Relatively Shallow Networks

resnet의 residual block들이 앙상블처럼 활동한다는 논문이 있어 직접 실험해봄.
가장 처음 layer1을 제외한, layer2,3,4들을 하나씩 빼보면서 val_score을 구해보았다.

In [32]:
#model.forward를 override하기 위한 새로운 forward생성성
def dropout_forward(self, x, skip_layers=[False, False, False]):
      out = self.layer1(x)
      out = nn.AvgPool2d(2)(out)

      if not skip_layers[0]:
          out = self.layer2(out)
      out = nn.AvgPool2d(2)(out)
      
      if not skip_layers[1]:
          out = self.layer3(out)
      out = nn.AvgPool2d(2)(out)
      
      if not skip_layers[2]:
          out = self.layer4(out)
      
      out = out.view(16,-1)
      out = self.last_layer(out)
      return out

In [21]:
#val_accuracy만 구하는 함수 작성
def dropout_eval(model,skip_layers):
      model.eval()
      val_correct = 0
      val_total = 0
      with torch.no_grad():
          for inputs, labels in testloader:
              inputs, labels = inputs.to(device), labels.to(device)
              outputs = model(inputs,skip_layers)
              _, predicted = torch.max(outputs.data, 1)
              val_correct += (predicted == labels).sum().item()
              val_total += labels.size(0)
      val_acc = val_correct / val_total
      print(f' Val Acc: {val_acc:.4f}')

In [20]:
#base_model을 일단 학습
import time
model = Base_Model('residual').to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

start = time.time()
eval_model(model,criterion,optimizer)
fin = time.time()
print(f"total time: {fin-start:.2f} second")

Epoch 1/5: Train Loss: 0.9350 Train Acc: 0.6996 Val Acc: 0.9141
Epoch 2/5: Train Loss: 0.2130 Train Acc: 0.9359 Val Acc: 0.9350
Epoch 3/5: Train Loss: 0.1573 Train Acc: 0.9533 Val Acc: 0.9618
Epoch 4/5: Train Loss: 0.1321 Train Acc: 0.9604 Val Acc: 0.9635
Epoch 5/5: Train Loss: 0.1138 Train Acc: 0.9659 Val Acc: 0.9673
total time: 223.30 second


In [34]:
#override model.forward
model.forward = dropout_forward.__get__(model,Base_Model)

In [37]:
#정상적으로 override되었는지 확인
dropout_eval(model,skip_layers=[False,False,False])

 Val Acc: 0.9673


In [38]:
#layers를 하나씩 빼봄
dropout_eval(model,skip_layers=[True,False,False])
dropout_eval(model,skip_layers=[False,True,False])
dropout_eval(model,skip_layers=[False,False,True])

 Val Acc: 0.6217
 Val Acc: 0.5639
 Val Acc: 0.9345


In [39]:
#layer를 2개 뺀다면?
dropout_eval(model,skip_layers=[True,True,False])
dropout_eval(model,skip_layers=[False,True,True])
dropout_eval(model,skip_layers=[True,False,True])

 Val Acc: 0.2357
 Val Acc: 0.4983
 Val Acc: 0.5913


In [40]:
#다 빼버린다면..?
dropout_eval(model,skip_layers=[True,True,True])

 Val Acc: 0.2245


층의 개수가 적어 생각보다 유의미한 결과는 내지 못함