<a href="https://colab.research.google.com/github/goldcece/uap-paper_review-study/blob/main/VGG_NET.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## VGG-Net(2014)

- 합성곱층의 파라미터 수를 줄이고 훈련시간을 개선하기 위함

- 깊이의 영향만을 최대한 확인하고자, 필터의 크기를 가장 작은 3*3 으로 고정

- 네트워크 계층의 총 개수에 따라 여러 유형의 VGGNet이 있음
  - e.g. VGG16, VGG19

- 모든 합성곱 커널의 크기는 3 * 3
- 최대 풀링 커널의 크기는 2 * 2
- stride = 2

- 64개의 224 * 224 특성맵(224 * 224 * 64)이 생성

- 마지막 16번째 layer(softmax()) 를 제외, 모든 층에 ReLU 활성화 함수가 적용

### VGG11 구현하기

In [None]:
# 필요한 라이브러리 호출
import copy # 객체복사를 위해 사용
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as Datasets

In [None]:
# copy libary 사용 예시

# 단순 객체 복사
original = [1, 2, 3]
copy_o = original
print(copy_o)

copy_o[2] = 10 # copy_o 의 3번 째 값을 10으로 변경
print("copy_o: ", copy_o)
print("original: ", original) # original 도 바뀜

print('-'*30)

# 얕은 객체 복사 shallow copy
original = [[1,2], 3]
copy_o = copy.copy(original) # original 값을 copy_o 에 얕은 복사(copy.copy())
copy_o[0] = 100
print("copy_o: ", copy_o)
print("original: ", original) # original 변화 안함

append = copy.copy(original)
append[0].append(4) # 첫 번째 리스트([1,2])에 4를 추가
print("append: ", append)
print("original: ", original)

print('-'*30)

# 깊은 객체 복사 deep copy : 원본에 영향을 주지 않음

original = [[1,2], 3]
copy_o = copy.deepcopy(original)
print(copy_o)
copy_o[0]= 100
print("copy_o: ", copy_o)
print("original: ", original)

append = copy.deepcopy(original)
append[0].append(4)
print("append: ", append)
print("original: ", original)


[1, 2, 3]
copy_o:  [1, 2, 10]
original:  [1, 2, 10]
------------------------------
copy_o:  [100, 3]
original:  [[1, 2], 3]
append:  [[1, 2, 4], 3]
original:  [[1, 2, 4], 3]
------------------------------
[[1, 2], 3]
copy_o:  [100, 3]
original:  [[1, 2], 3]
append:  [[1, 2, 4], 3]
original:  [[1, 2], 3]


In [None]:
# VGG 모델 정의
class VGG(nn.Module) :
  def __init__(self, features, output_dim):
    super().__init__()
    self.features = features #VGG 모델에 대한 매개변수에서 받아온 feature 값을 self.features에 넣는다.
    self.avgpool = nn.AdaptiveAvgPool2d(7)
    self.Classifier = nn.Sequential(
        nn.Linear(512*7*7, 4096),
        nn.ReLU(inplace=True),
        nn.Dropout(0.5),
        nn.Linear(4096, 4096),
        nn.ReLU(inplace=True),
        nn.Dropout(0.5),
        nn.Linear(4096, output_dim)
 ) # 완전연결층과 출력층 정의


  def forward(self, x):
    x = self.features(x)
    x = self.avgpool(x)
    h = x.view(x.shape[0], -1) # ??
    x = self.classifier(h)
    return x, h


In [None]:
# 모델 유형 정의 [output_dim, Max pooling, output_dim(=다음 층 input_dim)]

# vgg11 : 8 합성곱층 + 3 풀링층 = 11
vgg11_config = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']

# vgg13 : 10 합성곱층 + 3 풀링층 = 13
vgg13_config = [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']

# vgg16 : 13 합성곱층 + 3 풀링층 = 13
vgg16_config = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']

# vgg19 : 16 합성곱층 + 3 풀링층 = 13
vgg19_config = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']


In [None]:
# VGG 계층 정의
def get_vgg_layers(config, batch_norm) :
  layers = []
  in_channels = 3

  for c in config:
    assert c == 'M' or isinstance(c, int) # config 내의 원소가 'M' 또는 int 일 때 True
    # assert : 가정 설정문. 뒤의 조건이 True 가 아니면 error 발생
    # isinstance : 주어진 조건이 True 인지 판단
      # isinstance(A, B) -> A가 B인지 판단


    if c == 'M': # 불러온 값이 'M' 이면 max pooling 적용
      layers += [nn.MaxPool2d(kernel_size=2)]
    else : # 불러온 값이 숫자이면 conv2d 적용
      conv2d = nn.Conv2d(in_channels, c, kernel_size=3, padding=1)
      if batch_norm : # batch normalization 적용할까요?
      # 배치 정규화가 적용될 경우, batch_norm + ReLU 적용하라
        layers += [conv2d, nn.BatchNorm2d(c), nn.ReLU(inplace=True)]
      else :
        layers += [conv2d, nn.ReLU(inplace=True)]

      in_channels = c

  return nn.Sequential(*layers) # 네트워크의 모든 계층을 반환


# 모델 계층 생성
vgg11_layers = get_vgg_layers(vgg11_config, batch_norm=True)
print(vgg11_layers)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU(inplace=True)
  (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU(inplace=True)
  (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (13): ReLU(inplace=True)
  (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, cei

In [None]:
# vgg11 전체에 대한 네트워크
OUTPUT_DIM = 2
model = VGG(vgg11_layers, OUTPUT_DIM)
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [None]:
# import pretrained model
import torchvision.models as models

pretrained_model = models.vgg11_bn(pretrained=True) # vgg11_bn : 기본 모델이 배치정규화가 적용된 모델을 사용
print(pretrained_model)

Downloading: "https://download.pytorch.org/models/vgg11_bn-6002323d.pth" to /root/.cache/torch/hub/checkpoints/vgg11_bn-6002323d.pth
100%|██████████| 507M/507M [00:06<00:00, 81.3MB/s]


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [None]:
# 새로운 모델 config 정의
My_Vgg = [64, 64, 64, 'M', 128, 128, 128, 'M', 256, 256, 256, 'M']

In [None]:
# 이미지 데이터 전처리
mnist_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomRotation(5),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [None]:
# MNIST dataset load

from torchvision.datasets import MNIST

# download path 정의
download_root = './MNIST_DATASET'

train_dataset = MNIST(download_root, transform=mnist_transform, train=True, download=True)
valid_dataset = MNIST(download_root, transform=mnist_transform, train=False, download=True)
test_dataset = MNIST(download_root, transform=mnist_transform, train=False, download=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST_DATASET/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 79068484.11it/s]


Extracting ./MNIST_DATASET/MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST_DATASET/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST_DATASET/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 13942874.52it/s]


Extracting ./MNIST_DATASET/MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST_DATASET/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST_DATASET/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 21495213.22it/s]


Extracting ./MNIST_DATASET/MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST_DATASET/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST_DATASET/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4064546.36it/s]

Extracting ./MNIST_DATASET/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST_DATASET/MNIST/raw






In [None]:
print(len(train_dataset))
print(len(test_dataset))
print(len(valid_dataset))

60000
10000
10000


In [None]:
# ImageFolder 사용해서 dataload
"""

train_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomRotation(5), # 5도 이하로 이미지를 회전시킴
    transforms.RandomHorizontalFlip(0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomRotation(5), # 5도 이하로 이미지를 회전시킴
    transforms.RandomHorizontalFlip(0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_path = # 훈련 데이터셋 경로
test_path = # 테스트 데이터셋 경로


train_data = torchvision.datasets.ImageFolder(
    train_path,
    transform = train_trasnforms
)

test_data = torchvision.datasets.ImageFolder(
    test_path,
    transform = test_transforms
)

print(len(train_data))
print(len(test_data))


# 훈련과 검증 데이터 분할 : 데이터가 데이터로더로 넘어간 이후에는 분리가 불가능 하므로,
## 데이터셋 단계에서 훈련/검증 데이터를 분할해야 한다.
VALID_RATIO = 0.9
n_train_examples = int(len(train_data) * VALID_RATIO)
n_valid_examples = int(len(train_data) - n_train_examples)

train_datas, valid_datas = data.random_split(train_data, [n_train_examples, n_valid_examples])


# 검증데이터 전처리 : 학습데이터셋에서 분리된 valid data 를 변수로 복사 > 'test_transforms 로 전처리
valid_data = copy.deepcopy(valid_data)
valid_data.dataset.transforms = test_transforms

"""

In [None]:
print(f'Number of training examples:{len(train_dataset)}')
print(f'Number of validation examples:{len(valid_dataset)}')
print(f'Number of testing examples:{len(test_dataset)}')

Number of training examples:60000
Number of validation examples:10000
Number of testing examples:10000


In [None]:
# 메모리로 데이터 불러오기
BATCH_SIZE = 128
train_iterator = data.DataLoader(train_dataset,
                                 shuffle=True, # 학습데이터는 랜덤 셔플함
                                 batch_size=BATCH_SIZE
                                 )

valid_iterator = data.DataLoader(valid_dataset,
                                 batch_size=BATCH_SIZE)

test_iterator = data.DataLoader(test_dataset,
                                batch_size=BATCH_SIZE)

In [None]:
# optimizer, lossfuction
optimizer = optim.Adam(model.parameters(), lr=1e-7)
criterion = nn.CrossEntropyLoss()

device = torch.device('cpu')

model = model.to(device)
criterion = criterion.to(device)

In [None]:
# 모델 정확도 측정 함수
def calculate_accuracy(y_pred, y):
  top_pred = y_pred.argmax(1, keepdim=True)
  correct = top_pred.eq(y.view_as(top_pred)).sum()
  # 예측이 정답과 일치하는 경우 그 개수의 합을 correct 변수에 저장
    # view_as(other) 는 other의 텐서 크기를 사용하겠다는 의미 : y에 대한 텐서의 크기를 top_pred의 텐서크기로 변경한다
  acc = correct.float() / y.shape[0]
  return acc

In [None]:
# 모델의 학습 함수 정의
def train(model, iterator, optimizer, criterion, device):
  epoch_loss = 0
  epoch_acc = 0

  model.train()
  for (x, y) in iterator:
    x = x.to(device)
    y = y.to(device)

    optimizer.zero_grad()
    y_pred, _ = model(x)
    loss = criterion(y_pred, y)
    acc = calculate_accuracy(y_pred, y)
    loss.backward()
    optimizer.step()

    epoch_loss += loss.item()
    epoch_acc += acc.item()

  return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
# 모델 성능 측정 함수
def evaluate(model, iterator, criterion, device):
  epoch_loss = 0
  epoch_acc = 0

  model.eval()
  with torch.no_grad():
    for(x, y) in iterator:
      x = x.to(device)
      y = y.to(device)
      y_pred, _ = model(x)
      loss = criterion(y_pred, y)
      acc = calculate_accuracy(y_pred, y)
      epoch_loss += loss.item()
      epoch_acc += acc.item()

  return epoch_loss / len(iterator), epoch_acc / len(iterator)


In [None]:
# 학습 시간 측정 함수
def epoch_time(start_time, end_time):
  elapsed_time = end_time - start_time
  elapsed_mins = int(elapsed_time / 60)
  elapsed_secs = int(elasped_time - (elapsed_mins * 60))
  return elapsed_mins, elapsed_secs


In [None]:
# 모델 학습
EPOCHS = 5
best_valid_loss = float('inf')
for epoch in range(EPOCHS) :
  start_time = time.monotonic()
  train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)

  valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)
  if valid_loss < best_valid_loss :
    best_valid_loss = valid_loss
    torch.save(model.state_dict(), './VGG-model.pt')

  end_time = time.monotonic()
  epoch_mins, epoch_secs = epoch_time(start_time, end_time)


  print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
  print(f'\t Train Loss : {train_loss:3f} | Train Acc: {train_acc*100: .2f}%')
  print(f'\t Valid Loss : {valid_loss:.3f} | Vaid Acc: {valid_acc*100:.2f}%')



NameError: ignored

In [None]:
# 테스트 데이터셋을 이용한 모델 성능 측정
model.load_state_dict(torch.load('./VGG-model.pt'))
test_loss, test_acc = evaluate(model, test_iterator, criterion, device)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')


FileNotFoundError: ignored

In [None]:
# 테스트 데이터셋을 이용한 모델의 예측 확인 함수
def get_predictions(model, iterator) :
  model.eval()
  images = []
  labels = []
  probs = []

  with torch.no_grad():
    for (x, y) in iterator:
      x = x.to(device)
      y_pred, _ = model(x)
      y_prob = F.softmax(y_pred, dim=-1)
      top_pred = y_prob.argmax(1, keepdim=True) # 출력 텐서를 입력과 동일한 크기로 유지한다.
      images.append(x.cpu())
      labels.append(y.cpu())
      probs.append(y_prob.cpu())

  images = torch.cat(images, dim=0) # dim=0 은 axis=0 과 같음
  labels = torch.cat(labels, dim=0)
  probs= torch.cat(probs, dim=0)
  return images, labels, probs

In [None]:
# 모델이 정확하게 예측한 이미지 추출하기
images, labels, probs = get_predictions(model, test_iterator)
pred_labels = torch.argmax(probs, 1)
corrects = torch.eq(labels, pred_labels) # 정답과 예측이 같은지 비교
correct_examples = []

for images, label, prob, correct in zip(images, labels, probs, corrects): # zip은 여러개의 리스트(튜플)을 합쳐서 새로운 튜플타임으로 반환
  if correct :
    correct_examples.append((image, label, prob))

correct_examples.sort(reverse=True, key=lambda x : torch.max(x[2], dim=0).values) # x[2]에서 최대값을 dim=0(row)기준으로 가져와


RuntimeError: ignored

In [None]:
# zip example
a = [1, 2, 3]
b = ['a', 'b', 'c']

for x, y in zip(a, b):
  print(x, y)

1 a
2 b
3 c


In [None]:
"""
일반적인 함수의 형태
def 함수명(매개변수):
  return 반환 결과 값

lambda 함수는 정의와 동시에 사용할 수 있다
 but 함수명이 없고 저장된 변수가 없기 때문에 재사용이 불가함
lambda 매개변수 : 반환될 결과값
"""


In [None]:
x = torch.randn([4, 4])
print(x)

max_elements, max_idxs = torch.max(x[2], dim=0)

print(max_elements)
print(max_idxs)

tensor([[ 0.6197, -0.5377, -1.0071, -0.0397],
        [ 1.0189, -2.4975, -0.2701,  1.1383],
        [-0.8330, -0.8409, -0.6756, -1.1224],
        [ 0.6306, -0.1156,  0.6391,  0.9178]])
tensor(-0.6756)
tensor(2)


In [None]:
# 이미지 출력을 위한 전처리
def normalize_image(image):
  image_min = image.min()
  image_max = image.max()
  image.clamp_(min=image_min, max=image_max) # torch.clamp : 주어진 최소, 최대의 범주에 이미지가 위치하도록 한다

  image.add_(-image_min).div_(image_max-image_min+1e-5)
  return image

  # 뒤에 _ 가 붙으면 -> 기존의 메모리 공간에 있는 값을 새로운 값으로 대체하겠다는 의미

In [None]:
# 모델이 정확하게 예측한 이미지만 출력하는 함수 정의
def plot_most_correct(correct_classes, n_images, normalize=True):
  rows = int(np.sqrt(n_images))
  cols = int(np.sqrt(n_images))
  fig = plt.figure(figsize=(25,20))

  for i in range(rows*cols):
    ax = fig.add_subplot(rows, cols, i+1)
    image, true_label, probs = correct[i]
    image = image.permute(1, 2, 0)
    true_prob = probs[true_label]
    correct_prob, correct_label = torch.max(probs, dim=0)
    true_class = classes[true_label]
    correct_classes = classes[correct_label]

    if normalize :
      image = normalize_image(image)

    ax.imshow(image.cpu().numpy())
    ax.set_title(f'true label: {true_class} ({true_prob:.3f})\n' \
                 f'pred label: {correct_class} ({correct_prob:.3f})')
    ax.axis('off')

  fig.subplots_adjust(hspace=0.4)

classes = test_dataset.classes
N_IMAGES = 5
plot_most_correct(correct_examples, classes, N_IMAGES)

NameError: ignored