In [None]:
# torchvision.models.alexnet, vgg, resnet 해보기

- 이미지 분류를 위한 신경망 6.1 (LeNet-5,  AlexNet, VGGNet, GoogLeNet, ResNet)

## 1. LeNet-5

1. **LeNet-5**
- 모델의 기본 조건: 32*32 img size / 손글씨
- ✅ 모델 구조
  <img src = "https://miro.medium.com/v2/resize:fit:1400/format:webp/1*zxrGm9YBq__CPE3EUZKDOQ.jpeg" width = 500 height = 250>
  <img src = "https://thebook.io/img/080263/202.jpg" width = 400 height = 200>

- ✅ advanced 모델 구조
  - tanh -> relu
  - average pooling -> max pooling
  - softmax 는 제외
  - fc 전에 파라미터 너무 많아지므로 adaptive avg pooling 추가
  - 두 fc 는 다음과 같이 변경

    - [C1] conv1: 5*5 (`kernel_size` = 5), `out_channels` = 6
    - [S1] sub-sampling(= down sampling)
      - 공간 해상도 (H*W) 줄이기
      - maxpool2d: 28 -> 14 가 되어야 하니까 `kernel_size` = 2, `stride` = 2
  $$ out = \lfloor \frac{in + 2p - k}{s} \rfloor + 1 $$
    - [C2] conv2: 5*5 (`kernel_size` = 5), `out_channels` = 16
    - [S2] maxpool2d: (`kernel_size` = 2, `stride` = 2)
    - [C3] conv3: 5*5 (`kenel_size` = 5), `out_channels` = 120
    - [A1] AdaptiveAvgPool2d: `kernel_size` = 5
    - [F1] fc1: `Linear(120*5*5 , 120)` # img size 에 맞게
    - [F2] fc2: `Linear(120, 84)`
    - [F3] fc3: `Linear(84, 2)`
  
- => dog-cat 문제는 이미지 크기 256 로 해서 변경
- ☑️ Result
  - 데이터 수 부족해서 성능 안 좋은 것임 (한 클래스만 찍힘)



1. cuda device
2. data path, transforms.Compose([transforms.]), ImageFolder
- train, test 나누기 (os.listdir, train_test_split)
- train, test 폴더 생성 및 아래 각 클래스 폴더 생성 (label 이름 인식을 위해서)
- 기존 이미지들 이동하기
- test 의 경우 transform 랜덤 변환 X
3. batch size 변수, dataloader
4. 이미지 시각화
	- subplot, imshow, title, axis off, tight_layout, show
5. 모델: LeNet-5
	- 모델 정의 (torchvision.models, timm 모두 없으므로 직접 ✅class 로 모델 만들기✅)
	- 모델 device 에 올리기 및 출력해보기
	- 파라미터 확인하기
6. lr, criterion, optimizer 정의
7. epochs 수 정하기
- epoch for 문
	- train 순서
		- model train 모드로
		- loss, total, correct = 0
		- loader for문
			- img, label device 에 올리기
			- model 에 넣기
			- loss
			- optimizer zero grad, 기울기 계산, 역전파 수행 (train 에서만)
			- 현재의 배치 구하고 loss 누적
			- pred 구하고 correct 구하기
			- total 구하기
		- avg loss, acc 계산
		- plot 그리기 위해 list 에 저장
	- test 순서
		- model eval 모드로
		- loss, total, correct = 0
		- no grad()
			- loader for 문
				- 동일
		- avg loss, acc 계산
		- plot 그리기 위해 list 에 저장
	- loss, acc 출력
	- 모델 갱신되면 저장
8. train, test acc, loss plot
9. 실제, 예측 결과, 이미지 시각화 (마지막 배치에서 일부만) -> 오분류 이미지 확인
10. (feature map 시각화)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
data_path_cat = "/content/drive/MyDrive/딥러닝 파이토치 교과서/6.1_Image Classification/data/dogs-vs-cats/Cat"
data_path_dog = "/content/drive/MyDrive/딥러닝 파이토치 교과서/6.1_Image Classification/data/dogs-vs-cats/Dog"

import os
cat_list = os.listdir(data_path_cat)
dog_list = os.listdir(data_path_dog)

from sklearn.model_selection import train_test_split
train_cat, test_cat = train_test_split(cat_list)
train_dog, test_dog = train_test_split(dog_list)



base_path = "/content/drive/MyDrive/딥러닝 파이토치 교과서/6.1_Image Classification/data/"

train_cat_dir = os.path.join(base_path, 'train', 'cat')
train_dog_dir = os.path.join(base_path, 'train', 'dog')

test_cat_dir = os.path.join(base_path, 'test', 'cat')
test_dog_dir = os.path.join(base_path, 'test', 'dog')



for d in [train_cat_dir, train_dog_dir, test_cat_dir, test_dog_dir]:
  os.makedirs(d, exist_ok = True)

import shutil

def path(file_list, path1, path2):
  for f in file_list:
    shutil.copy(os.path.join(path1, f), os.path.join(path2, f))

path(train_cat, data_path_cat, train_cat_dir)
path(train_dog, data_path_dog, train_dog_dir)
path(test_cat, data_path_cat, test_cat_dir)
path(test_dog, data_path_dog, test_dog_dir)

train_data_path = os.path.join(base_path, 'train')
test_data_path = os.path.join(base_path, 'test')

In [None]:
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize([256, 256]),
    #transforms.RandomResizedCrop(224), # 이걸 사용하려면 모델 넣을 때, 224 로 (test 도 resize 시 224 로)
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.Resize([256, 256]),
    transforms.ToTensor()
])


from torchvision.datasets import ImageFolder
train_dataset = ImageFolder(train_data_path, transform = train_transform)
test_dataset = ImageFolder(test_data_path, transform = test_transform)

In [None]:
batch_size = 32

from torch.utils.data import DataLoader
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False)

In [None]:
train_dataset[0]

In [None]:
train_loader.dataset.class_to_idx

In [None]:
import matplotlib.pyplot as plt
import random

plt.figure(figsize = (10, 10))

for idx in range(16):
  i = random.randint(0, len(train_dataset))
  data = train_dataset[i]
  plt.subplot(4, 4, idx+1)
  plt.imshow(data[0].permute(1, 2, 0))
  if data[1] == 0:
    t = 'cat'
  else:
    t = 'dog'

  plt.title(t)
plt.tight_layout()
plt.show()

In [None]:
import torch.nn as nn

In [None]:
class LeNet5(nn.Module):
  def __init__(self):
    super(LeNet5, self).__init__()
    self.relu = nn.ReLU()

    # transform 에서 resize 로 256
    self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 6, kernel_size = 5) # (256+2*0-5) / 1 + 1 = 252
    self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2) # (252+2*0-2) / 2 + 1 = 126
    self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 16, kernel_size = 5) # (126+2*0-5) / 1 + 1 = 122
    self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2) # (122+2*0-2) / 2 + 1 = 61
    self.conv3 = nn.Conv2d(in_channels = 16, out_channels = 120, kernel_size = 5) # (61+2*0-5) / 1 + 1 = 57
    self.avg  = nn.AdaptiveAvgPool2d((5,5))
    self.fc1 = nn.Linear(120*5*5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 2)

  def forward(self, x):
    out = self.conv1(x)
    out = self.relu(out)
    out = self.pool1(out)

    out = self.conv2(out)
    out = self.relu(out)
    out = self.pool2(out)

    out = self.conv3(out)
    out = self.relu(out) # (120, 57, 57)
    out = self.avg(out) # (120, 5, 5)

    out = self.fc1(out.view(-1, 120*5*5))
    out = self.relu(out)
    out = self.fc2(out)
    out = self.relu(out)
    out = self.fc3(out)

    return out


In [None]:
model = LeNet5()
model.to(device)
print(model)

In [None]:
for name, param in model.named_parameters():
  print(name, param.data)

In [None]:
from torch.nn import CrossEntropyLoss
from torch.optim import Adam

learning_rate = 0.05
criterion = CrossEntropyLoss()
optimizer = Adam(params = model.parameters(), lr = learning_rate)

In [None]:
epochs = 10
train_loss_list = []
train_acc_list = []
test_loss_list = []
test_acc_list = []
test_acc = 0


for epoch in range(epochs):
  model.train()
  train_loss = 0
  train_total = 0
  train_correct = 0

  for img, label in train_loader:
    img, label = img.to(device), label.to(device)
    out = model(img)
    loss = criterion(out, label.long())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    bs = img.size(0)

    train_loss += loss.item() * bs

    pred = out.argmax(dim = 1)
    train_correct += (pred == label).sum().item()

    train_total += bs

  avg_train_loss = train_loss / train_total
  avg_train_acc = train_correct / train_total * 100
  train_loss_list.append(avg_train_loss)
  train_acc_list.append(avg_train_acc)

  model.eval()
  test_loss, test_total, test_correct = 0, 0, 0
  with torch.no_grad():
    for img, label in test_loader:
      img, label = img.to(device), label.to(device)
      out = model(img)
      loss = criterion(out, label.long())

      bs = img.size(0)

      test_loss += loss.item() * bs

      pred = out.argmax(dim = 1)
      test_correct += (pred == label).sum().item()

      test_total += bs

  avg_test_loss = test_loss / test_total
  avg_test_acc = test_correct / test_total * 100
  test_loss_list.append(avg_test_loss)
  test_acc_list.append(avg_test_acc)

  print(
          f"[{epoch+1}/{epochs}] "
          f"train_loss: {avg_train_loss:.4f}, train_acc: {avg_train_acc:.2f}% | "
          f"test_loss: {avg_test_loss:.4f}, test_acc: {avg_test_acc:.2f}%"
      )

  if avg_test_acc > test_acc:
    test_acc = avg_test_acc
    torch.save(model, 'model.pth')

In [None]:
out.size(), label.size()

In [None]:
import matplotlib.pyplot as plt

plt.subplot(2, 2, 1)
plt.plot(train_acc_list)
plt.title('train acc')

plt.subplot(2, 2, 2)
plt.plot(test_acc_list)
plt.title('test acc')

plt.subplot(2, 2, 3)
plt.plot(train_loss_list)
plt.title('train loss')

plt.subplot(2, 2, 4)
plt.plot(test_loss_list)
plt.title('test loss')

plt.show()

In [None]:
img.size(0)

In [None]:
# 마지막 배치의 img, pred 와 label 비교
# test 의 경우 shuffle = False 이므로 순서대로 나오므로 마지막 배치는 dog 만 있음

plt.figure(figsize = (10, 10))

for i in range(20):
  plt.subplot(4, 5, i+1)

  plt.imshow(img[i].permute(1, 2, 0).cpu())
  if pred[i].cpu().item() == 0:
    t_pred = 'cat'
  else:
    t_pred = 'dog'

  if label[i].cpu().item() == 0:
    t_real = 'cat'
  else:
    t_real = 'dog'

  plt.title(t_pred + "(real: " + t_real + ")")

plt.tight_layout()
plt.show()

## 2. AlexNet

2. **AlexNet**
- torchvision.models.alexnet 이 이미 있지만 구현해봄
- ✅ 모델 구조
  <img src = "https://miro.medium.com/v2/resize:fit:1400/format:webp/1*K7GvxwsWRbC_Ms2YsGsAMg.jpeg" width = 450 height = 250>
  <img src = "https://resources-public-blog.modulabs.co.kr/blog/prd/content/259481/Untitled-4.png" width = 400 height = 200>

- ✅ advanced 구조
  - dog-cat 문제 위해 f4, f5 추가 및 softmax 층 제거
  - dropout 층은 동일하게 f1, f2 에만 추가
    - [C1] conv1: 11*11 (`kernel_size` = 11), `stride` = 4, `out_channels` = 96
    - [S1] sub-sampling(= down sampling)
      - 공간 해상도 (H*W) 줄이기
      - maxpool2d: `kernel_size` = 3, `stride` = 2
  $$ out = \lfloor \frac{in + 2p - k}{s} \rfloor + 1 $$
    - [C2] conv2: 5*5 (`kernel_size` = 5), `pad` = 2, `out_channels` = 256
    - [S2] maxpool2d: (`kernel_size` = 3), `stride` = 2
    - [C3] conv3: 3*3 (`kenel_size` = 3), `pad` = 1, `stride` = 1, `out_channels` = 384
    - [C4] conv4: 3*3 (`kernel_size` = 3), `pad` = 1, `stride` = 1, `out_channels` = 384
    - [C5] conv5: 3*3 (`kernel_size` = 3), `pad` = 1, `stride` = 1, `out_channels` = 256
    - [S3] maxpool2d: 3*3 (`kernel_size` = 3), `stride` = 2
    - [F1] fc1: `Linear(9216, 4096)` # 9216 인 이유는 227 size 이면 256* 6 * 6
    - [D1] dropout: `Dropout(0.5)`
    - [F2] fc2: `Linear(4096, 4096)`
    - [D2] dropout: `Dropout(0.5)`
    - [F3] fc3: `Linear(4096, 1000)`
    - [F4] fc4: `Linear(1000, 256)`
    - [F5] fc5: `Linear(256, 2)`

- ☑️ Result
  - 데이터 수 부족해서 성능 안 좋은 것임 (한 클래스만 찍힘)

1. cuda device
2. data path, transforms.Compose([transforms.]), ImageFolder
- train, test 나누기 (os.listdir, train_test_split)
- train, test 폴더 생성 및 아래 각 클래스 폴더 생성 (label 이름 인식을 위해서)
- 기존 이미지들 이동하기
- test 의 경우 transform 랜덤 변환 X
- ✅ AlexNet 의 경우 정규화 필수 (ImageTransform)
	- `transforms.Normalize(mean = [], std = [])`
	- `ToTensor()` 뒤에 적용
	- **mean, std 구하기: 반드시 train 셋만 가지고 구해야함**
		- path 이용해서
		- imagenet 관행
			- mean = [0.485, 0.456, 0.406]
			- std = [0.229, 0.224, 0.225]
3. batch size 변수, dataloader
4. 이미지 시각화
	- subplot, imshow, title, axis off, tight_layout, show
	- Normalize 했을 경우 역정규화해서 imshow
5. 모델: AlexNet
	- 모델 정의 (torchvision.models 에 있지만 ✅class 로 모델 만들기✅)
	- 모델 device 에 올리기 및 출력해보기
	- 파라미터 확인하기
6. lr, criterion, optimizer 정의
7. epochs 수 정하기
- epoch for 문
	- train 순서
		- model train 모드로
		- loss, total, correct = 0
		- loader for문
			- img, label device 에 올리기
			- model 에 넣기
			- loss
			- optimizer zero grad, 기울기 계산, 역전파 수행 (train 에서만)
			- 현재의 배치 구하고 loss 누적
			- pred 구하고 correct 구하기
			- total 구하기
		- avg loss, acc 계산
		- plot 그리기 위해 list 에 저장
	- test 순서
		- model eval 모드로
		- loss, total, correct = 0
		- no grad()
			- loader for 문
				- 동일
		- avg loss, acc 계산
		- plot 그리기 위해 list 에 저장
	- loss, acc 출력
	- 모델 갱신되면 저장
8. train, test acc, loss plot
9. 실제, 예측 결과, 이미지 시각화 (마지막 배치에서 일부만) -> 오분류 이미지 확인
10. (feature map 시각화)



In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
train_data_path = '/content/drive/MyDrive/딥러닝 파이토치 교과서/6.1_Image Classification/data/train/**/*.*'

import os, numpy as np
from PIL import Image
from glob import glob

paths = glob(train_data_path, recursive=True)
sum_c = np.zeros(3, dtype=np.float64)
sq_c  = np.zeros(3, dtype=np.float64)
count = 0

for p in paths:
    img = Image.open(p).convert("RGB")
    img = img.resize((224, 224))                # 모델 입력 크기에 맞춤(권장)
    arr = np.asarray(img, dtype=np.float32) / 255.0  # [H,W,3] in [0,1]
    sum_c += arr.reshape(-1, 3).sum(axis=0)
    sq_c  += (arr.reshape(-1, 3)**2).sum(axis=0)
    count += arr.shape[0] * arr.shape[1]

mean = (sum_c / count).tolist()
std  = np.sqrt(sq_c / count - np.array(mean)**2).tolist()
print("mean =", mean, "\nstd =", std)

In [None]:
train_data_path = '/content/drive/MyDrive/딥러닝 파이토치 교과서/6.1_Image Classification/data/train'
test_data_path = '/content/drive/MyDrive/딥러닝 파이토치 교과서/6.1_Image Classification/data/test'

from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize([227, 227]),
    #transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = mean, std = std)
])

test_transform = transforms.Compose([
    transforms.Resize([227, 227]),
    transforms.ToTensor(),
    transforms.Normalize(mean = mean, std = std)
])


from torchvision.datasets import ImageFolder

train_dataset = ImageFolder(root = train_data_path, transform = train_transform)
test_dataset = ImageFolder(root = test_data_path, transform = test_transform)

In [None]:
batch_size = 32

from torch.utils.data import DataLoader
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

In [None]:
train_dataset[0]

In [None]:
len(train_dataset)

In [None]:
train_loader.dataset.class_to_idx

In [None]:
import matplotlib.pyplot as plt
import random

plt.figure(figsize = (10, 10))


inv_norm = transforms.Normalize(
    mean=[-m/s for m, s in zip(mean, std)],
    std=[1/s for s in std]
)


for i in range(16):
  plt.subplot(4, 4, i+1)
  idx = random.randint(0, len(train_dataset)-1)
  img, label = train_dataset[idx]
  plt.imshow(inv_norm(img).permute(1, 2, 0))
  if label == 0:
    t = 'cat'
  else:
    t = 'dog'
  plt.title(t)

plt.tight_layout()
plt.show()

In [None]:
class AlexNet(nn.Module):
  def __init__(self):
    super(AlexNet, self).__init__()
    self.relu = nn.ReLU()
    self.drop = nn.Dropout(0.5)

    self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 96, kernel_size = 11, stride = 4) # (227-11)/4+1 = 55
    self.pool1 = nn.MaxPool2d(kernel_size = 3, stride = 2) # (55-3)/2+1 = 27

    self.conv2 = nn.Conv2d(in_channels = 96, out_channels = 256, kernel_size = 5, padding = 2) # (27+2*2-5)/1+1 = 27
    self.pool2 = nn.MaxPool2d(kernel_size = 3, stride = 2) # (27-3)/2+1 = 13

    self.conv3 = nn.Conv2d(in_channels = 256, out_channels = 384, kernel_size = 3, padding = 1, stride = 1) # (13+2*1-3)/1+1 = 13
    self.conv4 = nn.Conv2d(in_channels = 384, out_channels = 384, kernel_size = 3, padding = 1, stride = 1) # (13+2*1-3)/1+1 = 13
    self.conv5 = nn.Conv2d(in_channels = 384, out_channels = 256, kernel_size = 3, padding = 1, stride = 1) # (13+2*1-3)/1+1 = 13
    self.pool3 = nn.MaxPool2d(kernel_size = 3, stride = 2) # (13-3)/2 + 1 = 6

    self.fc1 = nn.Linear(256*6*6, 4096)
    self.fc2 = nn.Linear(4096, 4096)
    self.fc3 = nn.Linear(4096, 1000)
    self.fc4 = nn.Linear(1000, 256)
    self.fc5 = nn.Linear(256, 2)

  def forward(self, x):
    out = self.conv1(x)
    out = self.relu(out)
    out = self.pool1(out)

    out = self.conv2(out)
    out = self.relu(out)
    out = self.pool2(out)

    out = self.conv3(out)
    out = self.relu(out)
    out = self.conv4(out)
    out = self.relu(out)
    out = self.conv5(out)
    out = self.relu(out)
    out = self.pool3(out)

    out = self.fc1(out.view(-1, 256*6*6))
    out = self.relu(out)
    out = self.drop(out)

    out = self.fc2(out)
    out = self.relu(out)
    out = self.drop(out)

    out = self.fc3(out)
    out = self.relu(out)

    out = self.fc4(out)
    out = self.relu(out)

    out = self.fc5(out)

    return out

In [None]:
model = AlexNet()
model.to(device)
print(model)

In [None]:
for name, param in model.named_parameters():
  print(name, param)

In [None]:
from torch.nn import CrossEntropyLoss
from torch.optim import Adam

learning_rate = 0.05
criterion = CrossEntropyLoss()
optimizer = Adam(params = model.parameters(), lr = learning_rate)

In [None]:
epochs = 10

train_loss_list = []
train_acc_list = []
test_loss_list = []
test_acc_list = []

test_acc = 0

for epoch in range(epochs):
  model.train()
  train_loss = 0.0
  train_total = 0
  train_correct = 0.0

  for img, label in train_loader:
    img, label = img.to(device), label.to(device)
    out = model(img) # (bs, num_classes) 형태의 raw logits

    # label.long(): (bs, ) 형태의 정답 클래스 인덱스
    loss = criterion(out, label.long())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    bs = img.size(0)

    train_loss += loss.item() * bs

    pred = out.argmax(dim = 1) # 로짓 출력 중 가장 높은
    train_correct += (pred == label).sum().item()

    train_total += bs

  avg_train_loss = train_loss / train_total
  avg_train_acc = train_correct / train_total * 100

  train_loss_list.append(avg_train_loss)
  train_acc_list.append(avg_train_acc)


  model.eval()
  test_loss = 0.0
  test_total = 0
  test_correct = 0.0

  with torch.no_grad():
    for img, label in test_loader:
      img, label = img.to(device), label.to(device)
      out = model(img)

      loss = criterion(out, label.long())

      bs = img.size(0)
      test_loss += loss.item() * bs

      pred = out.argmax(dim = 1)

      test_correct += (pred == label).sum().item()

      test_total += bs

  avg_test_loss = test_loss / test_total
  avg_test_acc = test_correct / test_total * 100

  test_loss_list.append(avg_test_loss)
  test_acc_list.append(avg_test_acc)

  print(f"epoch: {epoch} | train loss: {avg_train_loss} train acc: {avg_train_acc} | test_loss: {avg_test_loss} test_acc: {avg_test_acc}")

  if avg_test_acc > test_acc:
    test_acc = avg_test_acc
    torch.save(model, 'model.pth')

In [None]:
import matplotlib.pyplot as plt

plt.subplot(2, 2, 1)
plt.plot(train_loss_list)
plt.title('train loss')

plt.subplot(2, 2, 2)
plt.plot(train_acc_list)
plt.title('train acc')

plt.subplot(2, 2, 3)
plt.plot(test_loss_list)
plt.title('test loss')

plt.subplot(2, 2, 4)
plt.plot(test_acc_list)
plt.title('test acc')


plt.tight_layout()
plt.show()

In [None]:
# 마지막 배치의 img, pred 와 label 비교
# test 의 경우 shuffle = False 이므로 순서대로 나오므로 마지막 배치는 dog 만 있음

plt.figure(figsize = (10, 10))

for i in range(20):
  plt.subplot(4, 5, i+1)

  plt.imshow(img[i].permute(1, 2, 0).cpu())
  if pred[i].cpu().item() == 0:
    t_pred = 'cat'
  else:
    t_pred = 'dog'

  if label[i].cpu().item() == 0:
    t_real = 'cat'
  else:
    t_real = 'dog'

  plt.title(t_pred + "(real: " + t_real + ")")

plt.tight_layout()
plt.show()

## 3. VGGNet

3. **VGGNet**
- torchvision.models.vgg16, vgg19 가 이미 있지만 구현해봄
- ✅ 모델 구조
  - vgg16

    <img src = "https://miro.medium.com/v2/resize:fit:1400/1*p0QokDZh2Dmct_l4aS97nQ.png" width = 550 height = 350>
  - vgg19

    <img src = "https://www.researchgate.net/publication/314237915/figure/tbl1/AS:667100565745668@1536060577444/Details-on-the-VGG19-architecture-For-each-layer-number-of-filters-parameters-and.png" width = 300 height = 350>
  
  <img src = "https://media5.datahacker.rs/2018/11/vgg-ispravljeno-.png" width = 300 height = 600>

- ✅ advanced 구조 (vgg 16)
  - dog-cat 문제 위해 f4, f5 추가 및 softmax 층 제거
  - dropout 층은 동일하게 f1, f2 에만 추가
    - [C1] conv1: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 64, `padding` = 1
    - [C2] conv2: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 64, `padding` = 1
    - [S1] sub-sampling(= down sampling)
      - 공간 해상도 (H*W) 줄이기
      - maxpool2d: `kernel_size` = 2, `stride` = 2
  $$ out = \lfloor \frac{in + 2p - k}{s} \rfloor + 1 $$
    - [C3] conv3: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 128, `padding` = 1
    - [C4] conv4: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 128, `padding` = 1
    - [S2] maxpool2d: (`kernel_size` = 2), `stride` = 2
    - [C5] conv5: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 256, `padding` = 1
    - [C6] conv6: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 256, `padding` = 1
    - [C7] conv7: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 256, `padding` = 1
    - [S3] maxpool2d: (`kernel_size` = 2), `stride` = 2
    - [C8] conv8: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [C9] conv9: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [C10] conv10: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [S4] maxpool2d: (`kernel_size` = 2), `stride` = 2
    - [C11] conv11: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [C12] conv12: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [C13] conv13: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [S5] maxpool2d: (`kernel_size` = 2), `stride` = 2
    - [F1] fc1: `Linear(25088, 4096)` # 25088 은 224 img size 이므로
    - [D1] dropout: `Dropout(0.5)`
    - [F2] fc2: `Linear(4096, 4096)`
    - [D2] dropout: `Dropout(0.5)`
    - [F3] fc3: `Linear(4096, 1000)`
    - [F4] fc4: `Linear(1000, 256)`
    - [F5] fc5: `Linear(256, 2)`

- ✅ advanced 구조 (vgg 19)
  - dog-cat 문제 위해 f4, f5 추가 및 softmax 층 제거
  - dropout 층은 동일하게 f1, f2 에만 추가
    - [C1] conv1: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 64, `padding` = 1
    - [C2] conv2: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 64, `padding` = 1
    - [S1] sub-sampling(= down sampling)
      - 공간 해상도 (H*W) 줄이기
      - maxpool2d: `kernel_size` = 2, `stride` = 2
  $$ out = \lfloor \frac{in + 2p - k}{s} \rfloor + 1 $$
    - [C3] conv3: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 128, `padding` = 1
    - [C4] conv4: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 128, `padding` = 1
    - [S2] maxpool2d: (`kernel_size` = 2), `stride` = 2
    - [C5] conv5: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 256, `padding` = 1
    - [C6] conv6: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 256, `padding` = 1
    - [C7] conv7: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 256, `padding` = 1
    - [C8] conv8: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 256, `padding` = 1
    - [S3] maxpool2d: (`kernel_size` = 2), `stride` = 2
    - [C9] conv9: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [C10] conv10: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [C11] conv11: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [C12] conv12: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [S4] maxpool2d: (`kernel_size` = 2), `stride` = 2
    - [C13] conv13: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [C14] conv14: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [C15] conv15: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [C16] conv16: 3*3 (`kernel_size` = 3), `stride` = 1, `out_channels` = 512, `padding` = 1
    - [S5] maxpool2d: (`kernel_size` = 2), `stride` = 2
    - [F1] fc1: `Linear(25088, 4096)` # 25088 은 224 img size 이므로
    - [D1] dropout: `Dropout(0.5)`
    - [F2] fc2: `Linear(4096, 4096)`
    - [D2] dropout: `Dropout(0.5)`
    - [F3] fc3: `Linear(4096, 1000)`
    - [F4] fc4: `Linear(1000, 256)`
    - [F5] fc5: `Linear(256, 2)`

- ☑️ Result
  - 데이터 수 부족해서 성능 안 좋은 것임 (한 클래스만 찍힘)

1. cuda device
2. data path, transforms.Compose([transforms.]), ImageFolder
- train, test 나누기 (os.listdir, train_test_split)
- train, test 폴더 생성 및 아래 각 클래스 폴더 생성 (label 이름 인식을 위해서)
- 기존 이미지들 이동하기
- test 의 경우 transform 랜덤 변환 X
- ✅ vgg16,19 의 경우 정규화 필수 (ImageTransform)
	- `transforms.Normalize(mean = [], std = [])`
	- `ToTensor()` 뒤에 적용
	- **mean, std 구하기: 반드시 train 셋만 가지고 구해야함**
		- path 이용해서
		- imagenet 관행
			- mean = [0.485, 0.456, 0.406]
			- std = [0.229, 0.224, 0.225]
3. batch size 변수, dataloader
4. 이미지 시각화
	- subplot, imshow, title, axis off, tight_layout, show
	- Normalize 했을 경우 역정규화해서 imshow
5. 모델: VGG16, VGG19
	- 모델 정의 (torchvision.models 에 있지만 ✅class 로 모델 만들기✅)
	- 모델 device 에 올리기 및 출력해보기
	- 파라미터 확인하기
6. lr, criterion, optimizer 정의
7. epochs 수 정하기
- epoch for 문
	- train 순서
		- model train 모드로
		- loss, total, correct = 0
		- loader for문
			- img, label device 에 올리기
			- model 에 넣기
			- loss
			- optimizer zero grad, 기울기 계산, 역전파 수행 (train 에서만)
			- 현재의 배치 구하고 loss 누적
			- pred 구하고 correct 구하기
			- total 구하기
		- avg loss, acc 계산
		- plot 그리기 위해 list 에 저장
	- test 순서
		- model eval 모드로
		- loss, total, correct = 0
		- no grad()
			- loader for 문
				- 동일
		- avg loss, acc 계산
		- plot 그리기 위해 list 에 저장
	- loss, acc 출력
	- 모델 갱신되면 저장
8. train, test acc, loss plot
9. 실제, 예측 결과, 이미지 시각화 (마지막 배치에서 일부만) -> 오분류 이미지 확인
10. (feature map 시각화)



In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
train_data_path = '/content/drive/MyDrive/딥러닝 파이토치 교과서/6.1_Image Classification/data/train/**/*.*'

import os, numpy as np
from PIL import Image
from glob import glob

paths = glob(train_data_path, recursive=True)
sum_c = np.zeros(3, dtype=np.float64)
sq_c  = np.zeros(3, dtype=np.float64)
count = 0

for p in paths:
    img = Image.open(p).convert("RGB")
    img = img.resize((224, 224))                # 모델 입력 크기에 맞춤(권장)
    arr = np.asarray(img, dtype=np.float32) / 255.0  # [H,W,3] in [0,1]
    sum_c += arr.reshape(-1, 3).sum(axis=0)
    sq_c  += (arr.reshape(-1, 3)**2).sum(axis=0)
    count += arr.shape[0] * arr.shape[1]

mean = (sum_c / count).tolist()
std  = np.sqrt(sq_c / count - np.array(mean)**2).tolist()
print("mean =", mean, "\nstd =", std)

In [None]:
train_data_path = '/content/drive/MyDrive/딥러닝 파이토치 교과서/6.1_Image Classification/data/train'
test_data_path = '/content/drive/MyDrive/딥러닝 파이토치 교과서/6.1_Image Classification/data/test'

from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize(224, 224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = mean, std = std)
])

test_transform = transforms.Compose([
    transforms.Resize(224, 224),
    transforms.ToTensor(),
    transforms.Normalize(mean = mean, std = std)
])

from torchvision.datasets import ImageFolder

train_dataset = ImageFolder(root = train_data_path, transform = train_transform)
test_dataset = ImageFolder(root = test_data_path, transform = test_transform)

In [None]:
batch_size = 32

from torch.utils.data import DataLoader

train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size)

In [None]:
import matplotlib.pyplot as plt
import random

plt.figure(figsize = (10, 10))

inv_norm = transforms.Normalize(
    mean=[-m/s for m, s in zip(mean, std)],
    std=[1/s for s in std]
)

for i in range(20):
  idx = random.randint(0, len(train_dataset)-1)
  plt.subplot(4, 5, i+1)
  img, label = train_dataset[idx]
  plt.imshow(inv_norm(img).permute(1, 2, 0))

  if label == 0:
    t = 'cat'
  else:
    t = 'dog'

  plt.title(t)

plt.tight_layout()
plt.show()

In [None]:
class vgg16(nn.Module):
  def __init__(self):
    super(vgg16, self).__init__()
    self.relu = nn.ReLU()
    self.drop = nn.Dropout(0.5)

    self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 3, stride = 1, padding = 1)
    self.conv2 = nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, stride = 1, padding = 1)
    self.pool = nn.MaxPool2d(kernel_size = 2, stride = 2)
    self.conv3 = nn.Conv2d(64, 128, 3, 1, 1)
    self.conv4 = nn.Conv2d(128, 128, 3, 1, 1)
    self.conv5 = nn.Conv2d(128, 256, 3, 1, 1)
    self.conv6_7 = nn.Conv2d(256, 256, 3, 1, 1)
    self.conv8 = nn.Conv2d(256, 512, 3, 1, 1)
    self.conv9_13 = nn.Conv2d(512, 512, 3, 1, 1) # 9, 10, 11, 12, 13 모두 동일 # img size 49

    self.fc1 = nn.Linear(25088, 4096) # 25088 = 512*7*7
    self.fc2 = nn.Linear(4096, 4096)
    self.fc3 = nn.Linear(4096, 1000)
    self.fc4 = nn.Linear(1000, 256)
    self.fc5 = nn.Linear(256, 2)

  def forward(self, x):
    out = self.conv1(x)
    out = self.relu(out)
    out = self.conv2(out)
    out = self.relu(out)
    out = self.pool(out)

    out = self.conv3(out)
    out = self.relu(out)
    out = self.conv4(out)
    out = self.relu(out)
    out = self.pool(out)

    out = self.conv5(out)
    out = self.relu(out)
    out = self.conv6_7(out)
    out = self.relu(out)
    out = self.conv6_7(out)
    out = self.relu(out)
    out = self.pool(out)

    out = self.conv8(out)
    out = self.relu(out)
    out = self.conv9_13(out)
    out = self.relu(out)
    out = self.conv9_13(out)
    out = self.relu(out)
    out = self.pool(out)

    out = self.conv9_13(out)
    out = self.relu(out)
    out = self.conv9_13(out)
    out = self.relu(out)
    out = self.conv9_13(out)
    out = self.relu(out)
    out = self.pool(out)

    out = self.fc1(out.view(-1, 512*7*7))
    out = self.relu(out)
    out = self.drop(out)

    out = self.fc2(out)
    out = self.relu(out)
    out = self.drop(out)

    out = self.fc3(out)
    out = self.relu(out)

    out = self.fc4(out)
    out = self.relu(out)

    out = self.fc5(out)

    return out

In [None]:
class vgg19(nn.Module):
  def __init__(self):
    super(vgg16, self).__init__()
    self.relu = nn.ReLU()
    self.drop = nn.Dropout(0.5)

    self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 3, stride = 1, padding = 1)
    self.conv2 = nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, stride = 1, padding = 1)
    self.pool = nn.MaxPool2d(kernel_size = 2, stride = 2)
    self.conv3 = nn.Conv2d(64, 128, 3, 1, 1)
    self.conv4 = nn.Conv2d(128, 128, 3, 1, 1)
    self.conv5 = nn.Conv2d(128, 256, 3, 1, 1)
    self.conv6_8 = nn.Conv2d(256, 256, 3, 1, 1)
    self.conv9 = nn.Conv2d(256, 512, 3, 1, 1)
    self.conv10_16 = nn.Conv2d(512, 512, 3, 1, 1) # 9, 10, 11, 12, 13 모두 동일 # img size 49

    self.fc1 = nn.Linear(25088, 4096) # 25088 = 512*7*7
    self.fc2 = nn.Linear(4096, 4096)
    self.fc3 = nn.Linear(4096, 1000)
    self.fc4 = nn.Linear(1000, 256)
    self.fc5 = nn.Linear(256, 2)

  def forward(self, x):
    out = self.conv1(x)
    out = self.relu(out)
    out = self.conv2(out)
    out = self.relu(out)
    out = self.pool(out)

    out = self.conv3(out)
    out = self.relu(out)
    out = self.conv4(out)
    out = self.relu(out)
    out = self.pool(out)

    out = self.conv5(out)
    out = self.relu(out)
    out = self.conv6_8(out)
    out = self.relu(out)
    out = self.conv6_8(out)
    out = self.relu(out)
    out = self.conv6_8(out)
    out = self.relu(out)
    out = self.pool(out)

    out = self.conv9(out)
    out = self.relu(out)
    out = self.conv10_16(out) # 10
    out = self.relu(out)
    out = self.conv10_16(out) # 11
    out = self.relu(out)
    out = self.conv10_16(out) # 12
    out = self.relu(out)
    out = self.pool(out)

    out = self.conv10_16(out) # 13
    out = self.relu(out)
    out = self.conv10_16(out) # 14
    out = self.relu(out)
    out = self.conv10_16(out) # 15
    out = self.relu(out)
    out = self.conv10_16(out) # 16
    out = self.relu(out)
    out = self.pool(out)

    out = self.fc1(out.view(-1, 512*7*7))
    out = self.relu(out)
    out = self.drop(out)

    out = self.fc2(out)
    out = self.relu(out)
    out = self.drop(out)

    out = self.fc3(out)
    out = self.relu(out)

    out = self.fc4(out)
    out = self.relu(out)

    out = self.fc5(out)

    return out

In [None]:
model = vgg16()
model.to(device)

print(model)

In [None]:
for name, param in model.named_parameters():
  print(name, param)

In [None]:
from torch.nn import CrossEntropyLoss
from torch.optim import Adam

learning_rate = 0.05
criterion = CrossEntropyLoss()
optimizer = Adam(params = model.parameters(), lr = learning_rate)

In [None]:
epochs = 10
test_acc = 0.0
train_loss_list = []
train_acc_list = []
test_loss_list = []
test_acc_list = []

for epoch in range(epochs):
  model.train()
  train_loss = 0.0
  train_total = 0
  train_correct = 0.0

  for img, label in train_loader:
    img, label = img.to(device), label.to(device)
    out = model(img)

    loss = criterion(out, label)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    bs = img.size(0)

    train_loss += loss.item() * bs

    pred = out.argmax(dim = 1)

    train_correct += (out == label).sum().item()

    train_total += bs

  avg_train_loss = train_loss / train_total
  avg_train_acc = train_correct / train_total * 100

  train_loss_list.append(avg_train_loss)
  train_acc_list.append(avg_train_acc)

  model.eval()

  test_loss = 0.0
  test_total = 0
  test_correct = 0.0

  with torch.no_grad():
    for img, label in test_loader:
      img, label = img.to(device), label.to(device)

      out = model(img)

      loss = criterion(out, label)

      bs = img.size(0)
      test_loss += loss.item() * bs

      pred = out.argmax(dim = 1)
      test_correct += (pred == label).sum().item()

      test_total += bs

  avg_test_loss = test_loss / test_total
  avg_test_acc = test_correct / test_total * 100

  test_loss_list.append(avg_test_loss)
  test_acc_list.append(avg_test_acc)

  print(f"epoch: {epoch} | train loss: {avg_train_loss}, train acc: {avg_train_acc} | test loss: {avg_test_loss}, test acc: {avg_test_acc}")

  if avg_test_acc > test_acc:
    test_acc = avg_test_acc
    torch.save(model, 'model.pth')

In [None]:
plt.subplot(2, 2, 1)
plt.plot(train_loss_list)
plt.title('train loss')

plt.subplot(2, 2, 2)
plt.plot(train_acc_list)
plt.title('train loss')

plt.subplot(2, 2, 3)
plt.plot(test_loss_list)
plt.title('test loss')

plt.subplot(2, 2, 4)
plt.plot(test_acc_list)
plt.title('test acc')

plt.tight_layout()
plt.show()

In [None]:
# 마지막 배치의 img, pred 와 label 비교
# test 의 경우 shuffle = False 이므로 순서대로 나오므로 마지막 배치는 dog 만 있음

plt.figure(figsize = (10, 10))

for i in range(20):
  plt.subplot(4, 5, i+1)

  plt.imshow(img[i].permute(1, 2, 0).cpu())
  if pred[i].cpu().item() == 0:
    t_pred = 'cat'
  else:
    t_pred = 'dog'

  if label[i].cpu().item() == 0:
    t_real = 'cat'
  else:
    t_real = 'dog'

  plt.title(t_pred + "(real: " + t_real + ")")

plt.tight_layout()
plt.show()

## 4. ResNet

4. **ResNet**
- torchvision.models.resnet18 등이 이미 있지만 구현해봄
- ✅ 모델 구조
  - resnet

    <img src = "https://img1.daumcdn.net/thumb/R800x0/?scode=mtistory2&fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdna%2FbUhp2u%2Fbtq22b5u0gX%2FAAAAAAAAAAAAAAAAAAAAAOepXy_jO_vYOnZ6DHgHOsCkknS_ZydHU1IK1rs3ruwg%2Fimg.png%3Fcredential%3DyqXZFxpELC7KVnFOS48ylbz2pIh7yKj8%26expires%3D1756652399%26allow_ip%3D%26allow_referer%3D%26signature%3DsNZfxmKm5DHIu4vLsdCnLzEBQsw%253D" width = 450 height = 250>

- ✅ advanced 구조 (resnet 18)
  - 마지막 fc 의 num_classes 만 변경
    - [C1] conv1: 7*7 (`kernel_size` = 7), `stride` = 2, `out_channels` = 64, `padding` = 3
    - [B1] BN: BatchNorm2d(64)
    - relu
    - [S1] maxpool2d: (`kernel_size` = 3, `stride` = 2, `padding` = 1)

    - Layer1
      - Basic Block
        - [C2] conv2: 3*3, `stride` = 1, `in_channels` = 64, `out_channels` = 64, `padding` = 1
        - [B2] BN(64)
        - relu
        - [C3] conv3: 3*3, `stride` = 1, `in_channels` = 64, `out_channels` = 64, `padding` = 1
        - [B3] BN(64)
        - skip connection (identity)
        - relu

    - Layer2
      - Basic Block1
        - [C4] conv4: 3*3, `stride` = 2, `in_channels` = 64, `out_channels` = 128, `padding` = 1
        - [B4] BN(128)
        - relu
        - [C5] conv5: 3*3, `stride` = 1, `in_channels` = 128, `out_channels` = 128, `padding` = 1
        - [B5] BN(128)
        - skip connection (downsample: 1*1 conv, stride=2, out=128 + BN)
        - relu
      - Basic Block2
        - [C6] conv6: 3*3, `stride` = 1, `in_channels` = 128, `out_channels` = 128, `padding` = 1
        - [B6] BN(128)
        - relu
        - [C7] conv7: 3*3, `stride` = 1, `in_channels` = 128, `out_channels` = 128, `padding` = 1
        - [B7] BN(128)
        - skip connection (identity)
        - relu

    - Layer3
      - Basic Block1
        - [C8] conv8: 3*3, `stride` = 2, `in_channels` = 128, `out_channels` = 256, `padding` = 1
        - [B8] BN(256)
        - relu
        - [C9] conv9: 3*3, `stride` = 1, `in_channels` = 256, `out_channels` = 256, `padding` = 1
        - [B9] BN(256)
        - skip connection (downsample: 1*1 conv, stride=2, out=256 + BN)
        - relu
      - Basic Block2
        - [C10] conv10: 3*3, `stride` = 1, `in_channels` = 256, `out_channels` = 256, `padding` = 1
        - [B10] BN(256)
        - relu
        - [C11] conv11: 3*3, `stride` = 1, `in_channels` = 256, `out_channels` = 256, `padding` = 1
        - [B11] BN(256)
        - skip connection (identity)
        - relu

    - Layer4
      - Basic Block1
        - [C12] conv12: 3*3, `stride` = 2, `in_channels` = 256, `out_channels` = 512, `padding` = 1
        - [B12] BN(512)
        - relu
        - [C13] conv13: 3*3, `stride` = 1, `in_channels` = 512, `out_channels` = 512, `padding` = 1
        - [B13] BN(512)
        - skip connection (downsample: 1*1 conv, stride=2, out=512 + BN)
        - relu
      - Basic Block2
        - [C14] conv14: 3*3, `stride` = 1, `in_channels` = 512, `out_channels` = 512, `padding` = 1
        - [B14] BN(512)
        - relu
        - [C15] conv15: 3*3, `stride` = 1, `in_channels` = 512, `out_channels` = 512, `padding` = 1
        - [B15] BN(512)
        - skip connection (identity)
        - relu

    - [G1] AdaptiveAvgPool2d((1,1))
    - [F] fc: Linear(512, 2)


- ☑️ Result
  - 데이터 수 부족해서 성능 안 좋은 것임 (한 클래스만 찍힘)

1. cuda device
2. data path, transforms.Compose([transforms.]), ImageFolder
- train, test 나누기 (os.listdir, train_test_split)
- train, test 폴더 생성 및 아래 각 클래스 폴더 생성 (label 이름 인식을 위해서)
- 기존 이미지들 이동하기
- test 의 경우 transform 랜덤 변환 X
- ✅ resnet 의 경우 정규화 필수 (ImageTransform)
	- `transforms.Normalize(mean = [], std = [])`
	- `ToTensor()` 뒤에 적용
	- **mean, std 구하기: 반드시 train 셋만 가지고 구해야함**
		- path 이용해서
		- imagenet 관행
			- mean = [0.485, 0.456, 0.406]
			- std = [0.229, 0.224, 0.225]
3. batch size 변수, dataloader
4. 이미지 시각화
	- subplot, imshow, title, axis off, tight_layout, show
	- Normalize 했을 경우 역정규화해서 imshow
5. 모델: ResNet
	- 모델 정의 (torchvision.models 에 있지만 ✅class 로 모델 만들기✅)
	- 모델 device 에 올리기 및 출력해보기
	- 파라미터 확인하기
6. lr, criterion, optimizer 정의
7. epochs 수 정하기
- epoch for 문
	- train 순서
		- model train 모드로
		- loss, total, correct = 0
		- loader for문
			- img, label device 에 올리기
			- model 에 넣기
			- loss
			- optimizer zero grad, 기울기 계산, 역전파 수행 (train 에서만)
			- 현재의 배치 구하고 loss 누적
			- pred 구하고 correct 구하기
			- total 구하기
		- avg loss, acc 계산
		- plot 그리기 위해 list 에 저장
	- test 순서
		- model eval 모드로
		- loss, total, correct = 0
		- no grad()
			- loader for 문
				- 동일
		- avg loss, acc 계산
		- plot 그리기 위해 list 에 저장
	- loss, acc 출력
	- 모델 갱신되면 저장
8. train, test acc, loss plot
9. 실제, 예측 결과, 이미지 시각화 (마지막 배치에서 일부만) -> 오분류 이미지 확인
10. (feature map 시각화)



In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch

device = torch.cuda.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
mean =
std =

In [None]:
train_data_path = ''
test_data_path = ''

from torchvision import transforms
train_transform = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize(mean = mean, std = std)
])
test_transform = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor()
])

from torchvision.datasets import ImageFolder
train_dataset = ImageFolder(train_data_path, train_transform)
test_dataset = ImageFolder(test_data_path, test_transform)

In [None]:
batch_size = 32

from torch.utils.data import DataLoader

train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

In [None]:
import matplotlib.pyplot as plt
import random

plt.figure(figsize = (10, 10))


inv_norm = transforms.Normalize(
    mean=[-m/s for m, s in zip(mean, std)],
    std=[1/s for s in std]
)

for i in range(16):
  plt.subplot(4, 4, i+1)
  idx = random.randint(0, len(train_dataset)-1)
  img, label = train_dataset[idx]
  plt.imshow(inv_norm(img).permute(1, 2, 0))

  if label == 0:
    t = 'cat'
  else:
    t = 'dog'
  plt.title(t)

plt.tight_layout()
plt.show()

In [None]:
class basicblock(nn.Module):
  def __init__(self, in_ch, out_ch, stride=1, down = None):
    super(basicblock, self).__init__()

    self.down = down
    self.conv1 = nn.Conv2d(in_ch, out_ch, 3, stride, 1)
    self.b1 = nn.BatchNorm2d(out_ch)
    self.relu = nn.ReLU()

    self.conv2 = nn.Conv2d(out_ch, out_ch, 3, 1, 1)
    self.b2 = nn.BatchNorm2d(out_ch)

    self.conv3 = nn.Conv2d(in_ch, out_ch, 1, 2, 0)
    self.b3 = nn.BatchNorm2d(out_ch)

  def forward(self, x):
    out = self.conv1(x)
    out = self.b1(out)
    out = self.relu(out)
    out = self.conv2(out)
    out_main = self.b2(out)

    # down sample
    if self.down == True:
      out = self.conv3(x)
      identity = self.b3(out)
      out = out_main + identity
      out = self.relu(out)
      return out
    else:
      return self.relu(out_main + x)


class resnet18(nn.Module):
  def __init__(self):
    super(resnet18, self).__init__()
    self.relu = ReLU()

    self.conv1 = Conv2d(in_channels = 3, out_channels = 64, kernel_size = 7, stride = 2, padding = 3)
    self.b1 = BatchNorm2d(64)
    self.pool1 = MaxPool2d(kernel_size = 3, stride = 2, padding = 1)

    self.bb = basicblock(64, 64, 1, False)
    self.layer1 = nn.Sequential(self.bb)

    self.bb1 = basicblock(64, 128, 2, True)
    self.bb2 = basicblock(128, 128, 1, False)
    self.layer2 = nn.Sequential(self.bb1, self.bb2)

    self.bb1 = basicblock(128, 256, 2, True)
    self.bb2 = basicblock(256, 256, 1, False)
    self.layer3 = nn.Sequential(self.bb1, self.bb2)

    self.bb1 = basicblock(256, 512, 2, True)
    self.bb2 = basicblock(512, 512, 1, False)
    self.layer4 = nn.Sequential(self.bb1, self.bb2)

    self.pool2 = nn.AdaptiveAvgPool2d(1)
    self.fc = Linear(512, 2)

  def skip(self, x, out):
    return x + out

  def forward(self, x):
    out = self.conv1(x)
    out = self.b1(out)
    out = self.relu(out)
    out = self.pool1(out)

    # [layer1]
    out = self.layer1(out)

    # [layer2]
    out = self.layer2(out)

    # [layer3]
    out = self.layer3(out)

    # [layer4]
    out = self.layer4(out)

    out = self.pool2(out)
    out = self.fc(out.view(-1, 512))

    return out


In [None]:
model = resnet18()
model.to(device)

print(model)

In [None]:
for name, param in model.named_parameters():
  print(name, param)

In [None]:
import torch.nn as nn
from torch.optim import Adam

learning_rate = 0.01
criterion = nn.CrossEntropyLoss()
optimizer = Adam(params = model.parameters(), lr = learning_rate)

In [None]:
epochs = 10

train_loss_list = []
train_acc_list = []

test_loss_list = []
test_acc_list = []

test_acc = 0.0

for epoch in range(epochs):
  model.train()
  train_loss = 0.0
  train_total = 0
  train_correct = 0.0

  for img, label in train_loader:
    img, label = img.to(device), label.to(device)
    out = model(img)

    loss = criterion(out, label)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    bs = img.size(0)

    train_loss += loss.item() * bs

    pred = out.argmax(dim = 1)
    train_correct += (pred == label).sum().item()

    train_total += bs

  avg_train_loss = train_loss / train_total
  avg_train_acc = train_correct / train_total * 100

  train_loss_list.append(avg_train_loss)
  train_acc_list.append(avg_train_acc)


  model.eval()
  test_loss = 0.0
  test_total = 0
  test_correct = 0.0

  with torch.no_grad():
    for img, label in test_loader:
      img, label = img.to(device), label.to(device)
      out = model(img)

      loss = criterion(out, label)

      bs = img.size(0)

      test_loss += loss.item() * bs

      pred = out.argmax(dim = 1)
      test_correct += (pred == label).sum().item()

      test_total += bs

  avg_test_loss = test_loss / test_total
  avg_test_acc = test_correct / test_total * 100

  test_loss_list.append(avg_test_loss)
  test_acc_list.append(avg_test_acc)

  print(f"epoch:{epoch} | train loss: {avg_train_loss} train acc: {avg_train_acc} | test loss: {avg_test_loss} test acc: {avg_test_acc}")


  if avg_test_acc > test_acc:
    test_acc = avg_test_acc
    torch.save(model, 'model.pth')

In [None]:
import matplotlib.pyplot as plt

plt.subplot(2, 2, 1)
plt.plot(train_loss_list)
plt.title('train loss')

plt.subplot(2, 2, 2)
plt.plot(train_acc_list)
plt.title('train acc')

plt.subplot(2, 2, 3)
plt.plot(test_loss_list)
plt.title('test loss')

plt.subplot(2, 2, 4)
plt.plot(test_acc_list)
plt.title('test acc')


plt.tight_layout()
plt.show()

In [None]:
# 마지막 배치의 img, pred 와 label 비교
# test 의 경우 shuffle = False 이므로 순서대로 나오므로 마지막 배치는 dog 만 있음

plt.figure(figsize = (10, 10))

for i in range(20):
  plt.subplot(4, 5, i+1)

  plt.imshow(img[i].permute(1, 2, 0).cpu())
  if pred[i].cpu().item() == 0:
    t_pred = 'cat'
  else:
    t_pred = 'dog'

  if label[i].cpu().item() == 0:
    t_real = 'cat'
  else:
    t_real = 'dog'

  plt.title(t_pred + "(real: " + t_real + ")")

plt.tight_layout()
plt.show()