<a href="https://colab.research.google.com/github/imhyunho99/2023-1--Deaplearning_Framework/blob/main/7_3_%EB%93%9C%EB%A1%AD%EC%95%84%EC%9B%83(Dropout).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 드롭아웃 Dropout

- nn.Dropout(p=0.5, inplace=False)
- Dropout으로 중요한 노드가 꺼진다 해도 오히려 좋을 수 있다. 왜냐하면 그만큼 주변 다른 노드들에 중요도가 분산되므로 프로그램이 좋은 방향으로 발전 할 수 있다.

![대체 텍스트](https://cdn-images-1.medium.com/max/2400/1*iWQzxhVlvadk6VAJjsgXgg.png)

In [1]:
# 런타임 유형을 GPU로 바꾸시길 추천드립니다.
!pip install torch torchvision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## 1. Settings
### 1) Import required libraries

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

### 2) Set hyperparameters

In [3]:
batch_size = 256
learning_rate = 0.0002
num_epoch = 10

## 2. Data

### 1) Download Data

In [4]:
mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 323823594.08it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 24407756.16it/s]


Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 188649519.82it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz





Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 6510775.38it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw






### 2) Check Dataset

In [5]:
print(mnist_train.__getitem__(0)[0].size(), mnist_train.__len__())
mnist_test.__getitem__(0)[0].size(), mnist_test.__len__()

torch.Size([1, 28, 28]) 60000


(torch.Size([1, 28, 28]), 10000)

### 3) Set DataLoader

In [6]:
train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size, shuffle=True,num_workers=2,drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size, shuffle=False,num_workers=2,drop_last=True)

## 3. Model & Optimizer

### 1) CNN Model

In [12]:
# 드롭아웃을 중간중간에 넣어줌으로써 모델이 오버피팅하는 경우 이를 어느정도 극복할 수 있습니다.
# 정형화에서 눈치채신분도 계시겠지만 오버피팅하지 않는 상태에서 정형화나 드롭아웃을 넣으면 오히려 학습이 잘 안됩니다.

class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(1,16,3,padding=1),  # 28
            nn.ReLU(),
            nn.Dropout2d(0.2),
            nn.Conv2d(16,32,3,padding=1), # 28
            nn.ReLU(),
            nn.Dropout2d(0.2),
            nn.MaxPool2d(2,2),            # 14
            nn.Conv2d(32,64,3,padding=1), # 14
            nn.ReLU(),
            nn.Dropout2d(0.2), #==> 요 코드=dropout
            nn.MaxPool2d(2,2)             # 7
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(64*7*7,100),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(100,10)
        )       
        
    def forward(self,x):
        out = self.layer(x)
        out = out.view(batch_size,-1)
        out = self.fc_layer(out)
        return out

### 2) Loss func & Optimizer

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model = CNN().to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

cuda:0


## 4. Train 

In [9]:
for i in range(num_epoch):
    for j,[image,label] in enumerate(train_loader):
        x = image.to(device)
        y_= label.to(device)
        
        optimizer.zero_grad()
        output = model.forward(x)
        loss = loss_func(output,y_)
        loss.backward()
        optimizer.step()
        
    if i % 10 == 0: #이렇게 10번 돌리면 그냥 처음에만 나오고 쓸모없는게 됨ㅋ
        print(loss)         

tensor(2.2999, device='cuda:0', grad_fn=<NllLossBackward0>)


In [10]:
#param_list = list(model.parameters())
#print(param_list)

## 5. Test

In [11]:
correct = 0
total = 0

# 배치정규화나 드롭아웃은 학습할때와 테스트 할때 다르게 동작하기 때문에 모델을 evaluation 모드로 바꿔서 테스트해야합니다.
model.eval()
with torch.no_grad():
  for image,label in test_loader:
      x = image.to(device)
      y_= label.to(device)

      output = model.forward(x)
      _,output_index = torch.max(output,1)

      total += label.size(0)
      correct += (output_index == y_).sum().float()

  print("Accuracy of Test Data: {}".format(100*correct/total))

Accuracy of Test Data: 10.897436141967773


In [15]:
for i in range(num_epoch):
    for j,[image,label] in enumerate(train_loader):
        x = image.to(device)
        y_= label.to(device)
        
        optimizer.zero_grad()
        output = model.forward(x)
        loss = loss_func(output,y_)
        loss.backward()
        optimizer.step()
    print(loss)         

tensor(2.2953, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.2968, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.2971, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.2954, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.2917, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.2926, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.2902, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.2883, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.2922, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(2.2850, device='cuda:0', grad_fn=<NllLossBackward0>)


In [14]:
correct = 0
total = 0

# 배치정규화나 드롭아웃은 학습할때와 테스트 할때 다르게 동작하기 때문에 모델을 evaluation 모드로 바꿔서 테스트해야합니다. test할때는 drpoout, regular을 모두 사용해야 하기 떄문에 평가모드 사용

model.eval()
with torch.no_grad():
  for image,label in test_loader:
      x = image.to(device)
      y_= label.to(device)

      output = model.forward(x)
      _,output_index = torch.max(output,1)

      total += label.size(0)
      correct += (output_index == y_).sum().float()

  print("Accuracy of Test Data: {}".format(100*correct/total))
  #Dropout 넣으니까 5% 오르긴 하는데...

Accuracy of Test Data: 15.154247283935547


**정규화와 드롭아웃의 장점**

모델에는 변화를 주지 않으면서 결과를 더 좋게 만들 수 있다.