<a href="https://colab.research.google.com/github/enjoyPG/pytorch_basic/blob/main/0_Study%20/1_Tencho/4_CNN_Tencho.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## CNN
#### 1. CNN은 작은 필터를 이용해 이미지로부터 특징을 뽑아내는 알고리즘
#### 2. CNN이 필요한 이유는 4K UHD사진은 약 840만개의 픽셀을 갖는데 각 픽셀별로 가중치를 가진다면 엄청난 시간, 공간 이슈가 발생 추가적으로 정확도 문제도 발생 // 이를 극복하기 위해 합성곱은 작은 필터를 이용해 이미지로부터 특징을 추출해냄
#### 3. 기존 MLP방식은 시간, 공간 이슈가 있지만 모든 픽셀에 대해 완벽한 가중치를 가질 수 있다. 다만 특정 이미지 위치가 바뀌면 완전히 무용지물이 된다. 반대로 CNN은 커널을 이리저리 움직이면서 특징을 추출하므로 위치와 무관하게 특징을 잡아 낼 수 있다. 그리고 이미지 크기와 무관하게 커널의 크기는 변함이 없기 때문에 학습할 가중치도 줄어들고 특징의 위치가 바뀌는 문제도 극복이 가능하다

##### 커널: 이미지로투터 특징을 추출하기 위한 가중치를 행렬로 나타낸 것
##### 필터: 커널의 집합
##### 특징맵(Feature Map): 필터를 거쳐서 만들어진 결과

<img src="https://i0.wp.com/developersbreach.com/wp-content/uploads/2020/08/cnn_banner.png?fit=1200%2C564&ssl=1">

<img src="https://miro.medium.com/v2/resize:fit:1400/1*rlkTTRUNCnfXGD0RGNF5Eg.jpeg">
       

###1. 데이터 전처리(CIFAR)

In [18]:
"""
import matplotlib.pyplot as plt

from torchvision.datasets.cifar import CIFAR10
from torchvision.transforms import ToTensor

#CIFAR-10 데이터셋 불러오기
training_data = CIFAR10(
    root="./",
    train=True,
    download=True,
    transform=ToTensor()
)
test_data = CIFAR10(
    root="./",
    train=False,
    download=True,
    transform=ToTensor()
)
for i in range(9):
  plt.subplot(3,3,i+1)
  plt.imshow(training_data.data[i])
plt.show()
"""


'\nimport matplotlib.pyplot as plt\n\nfrom torchvision.datasets.cifar import CIFAR10\nfrom torchvision.transforms import ToTensor\n\n#CIFAR-10 데이터셋 불러오기\ntraining_data = CIFAR10(\n    root="./",\n    train=True,\n    download=True,\n    transform=ToTensor()\n)\ntest_data = CIFAR10(\n    root="./",\n    train=False,\n    download=True,\n    transform=ToTensor()\n)\nfor i in range(9):\n  plt.subplot(3,3,i+1)\n  plt.imshow(training_data.data[i])\nplt.show()\n'

###1-2 데이터 증강(Data Augmentation)
####transforms를 사용해 데이터 증강시키기

###1-3 이미지 정규화
####평균이 0 표준편차가 1인 정규분포로 변경해줌

In [19]:
"""
import matplotlib.pyplot as plt
import torchvision.transforms as T

from torchvision.datasets.cifar import CIFAR10
from torchvision.transforms import Compose
from torchvision.transforms import RandomHorizontalFlip, RandomCrop, Normalize

transforms = Compose([
    T.ToPILImage(),
    RandomCrop((32,32),padding=4),
    RandomHorizontalFlip(p=0.5),

    #이미지정규화
    T.ToTensor(),
    Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261)),
    T.ToPILImage()
])

training_data = CIFAR10(
    root="./",
    train=True,
    download=True,
    transform=transforms
)

test_data = CIFAR10(
    root="./",
    train=False,
    download=True,
    transform=transforms
)

for i in range(9):
  plt.subplot(3,3,i+1)
  plt.imshow(transforms(training_data.data[i]))
plt.show()
"""

'\nimport matplotlib.pyplot as plt\nimport torchvision.transforms as T\n\nfrom torchvision.datasets.cifar import CIFAR10\nfrom torchvision.transforms import Compose\nfrom torchvision.transforms import RandomHorizontalFlip, RandomCrop, Normalize\n\ntransforms = Compose([\n    T.ToPILImage(),\n    RandomCrop((32,32),padding=4),\n    RandomHorizontalFlip(p=0.5),\n\n    #이미지정규화\n    T.ToTensor(),\n    Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261)),\n    T.ToPILImage()\n])\n\ntraining_data = CIFAR10(\n    root="./",\n    train=True,\n    download=True,\n    transform=transforms\n)\n\ntest_data = CIFAR10(\n    root="./",\n    train=False,\n    download=True,\n    transform=transforms\n)\n\nfor i in range(9):\n  plt.subplot(3,3,i+1)\n  plt.imshow(transforms(training_data.data[i]))\nplt.show()\n'

###2. CNN(VGG알고리즘)으로 이미지 분류하기
<img src="https://ai-studio-static-online.cdn.bcebos.com/358f4af76ea0494093cec963202edeea1ff417d7db9a470f9503c99de8ebf23a">

In [20]:
import torch
import torch.nn as nn

class BasicBlock(nn.Module): #기본 블록 정의
  def __init__(self, in_channels, out_channels, hidden_dim):
    super(BasicBlock, self).__init__() #nn.Module 클래스의 요소 상속

    #합성곱층 정의
    self.conv1 = nn.Conv2d(in_channels, hidden_dim, kernel_size=3, padding=1)
    self.conv2 = nn.Conv2d(hidden_dim, out_channels, kernel_size=3, padding=1)
    self.relu = nn.ReLU()

    self.pool = nn.MaxPool2d(kernel_size=2, stride=2) #Maxpooling

  def forward(self, x):
    x=self.conv1(x)
    x=self.relu(x)
    x=self.conv2(x)
    x=self.relu(x)
    x=self.pool(x)

    return x


In [21]:
class CNN(nn.Module):
  def __init__(self, num_classes):
    super(CNN,self).__init__()

    #합성곱 기본 블록정의
    self.block1 = BasicBlock(in_channels=3, out_channels=32, hidden_dim=16)
    self.block2 = BasicBlock(in_channels=32, out_channels=128, hidden_dim=64)
    self.block3 = BasicBlock(in_channels=128, out_channels=256, hidden_dim=128)

    #분류기 정의
    self.fc1 = nn.Linear(in_features=4096, out_features=2048)
    self.fc2 = nn.Linear(in_features=2048, out_features=256)
    self.fc3 = nn.Linear(in_features=256, out_features=num_classes)

    self.relu = nn.ReLU()

  def forward(self, x):
    x = self.block1(x)
    x = self.block2(x)
    x = self.block3(x) #출력 (-1, 256, 4, 4)
    x = torch.flatten(x, start_dim=1)


    x=self.fc1(x)
    x=self.relu(x)
    x=self.fc2(x)
    x=self.relu(x)
    x=self.fc3(x)

    return x

In [22]:
# 모델학습하기

from torch.utils.data.dataloader import DataLoader
from torch.optim.adam import Adam

from torchvision.datasets.cifar import CIFAR10
from torchvision.transforms import Compose
from torchvision.transforms import RandomHorizontalFlip, RandomCrop, Normalize, ToTensor

transforms = Compose([
    RandomCrop((32,32,), padding=4),
    RandomHorizontalFlip(p=0.5),
    ToTensor(),
    Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
])

training_data = CIFAR10(
    root="./",
    train=True,
    download=True,
    transform=transforms
)

test_data = CIFAR10(
    root="./",
    train=False,
    download=True,
    transform=transforms
)
#데이터 로더 정의(학습시 데이터 로더가 데이터를 불러와 줌)
train_loader=DataLoader(training_data, batch_size=32, shuffle=True)
test_loader=DataLoader(test_data, batch_size=32, shuffle=False)

device = "cuda" if torch.cuda.is_available() else "cpu"

model = CNN(num_classes=10)

model.to(device)

Files already downloaded and verified
Files already downloaded and verified


CNN(
  (block1): BasicBlock(
    (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block2): BasicBlock(
    (conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block3): BasicBlock(
    (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=4096, out_features=2048, bias=True)
  (fc2): Linear(in_features=2048, out_features=256,

In [23]:
lr = 1e-3

optim = Adam(model.parameters(), lr=lr)

for epoch in range(100):
  for data, label in train_loader:
    optim.zero_grad()
    preds = model(data.to(device))

    loss = nn.CrossEntropyLoss()(preds, label.to(device))
    loss.backward()
    optim.step()

  if epoch==0 or epoch%10 == 9:
    print(f"epoch{epoch+1} loss:{loss.item()}")

torch.save(model.state_dict(), "CIFAR.pth")

epoch1 loss:1.1286242008209229
epoch10 loss:0.3660776615142822
epoch20 loss:0.5884189009666443
epoch30 loss:0.6462422609329224
epoch40 loss:0.4121656119823456
epoch50 loss:0.3190353512763977
epoch60 loss:0.0385807603597641
epoch70 loss:0.6034855842590332
epoch80 loss:0.023218614980578423
epoch90 loss:0.05036120116710663
epoch100 loss:0.05615365505218506


In [26]:
model.load_state_dict(torch.load("CIFAR.pth", map_location=device))
num_corr = 0

with torch.no_grad():
  for data, label in test_loader:

    output = model(data.to(device))
    preds = output.data.max(1)[1]
    corr = preds.eq(label.to(device).data).sum().item()
    num_corr += corr

  print(f"Accuracy:{num_corr/len(test_data)}")

Accuracy:0.8383
