# 1. 라이브러리 호출

In [58]:
import torch
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms # 이미지 변환(전처리) 기능을 제공
from torch.autograd import Variable
from torch import optim # 경사하강법을 이용하여 가중치를 구하기 위한 옵티마이저
import os # 파일 경로에 대한 함수들을 제공
import cv2
from PIL import Image
from tqdm import tqdm_notebook as tqdm # 진행 상황 표현
import random
import torch.nn as nn
import torch.nn.functional as F
from matplotlib import pyplot as plt

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 2. 이미지 데이터셋 전처리

In [59]:
class ImageTransform():    
    def __init__(self, resize, mean, std):
        self.data_transform = {
            'train': transforms.Compose([
                transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]), ## 1
            'val': transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(resize),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ])
        }
        
    def __call__(self, img, phase): ## 2
        return self.data_transform[phase](img)

# 3. 이미지 데이터셋을 불러온 후 훈련, 검증, 테스트로 분리

In [60]:
cat_directory = './dogs-vs-cats/Cat/'
dog_directory = './dogs-vs-cats/Dog/'

## 1
cat_images_filepaths = sorted([os.path.join(cat_directory, f) 
                               for f in os.listdir(cat_directory)])   
dog_images_filepaths = sorted([os.path.join(dog_directory, f) 
                               for f in os.listdir(dog_directory)])
                               
## 2                               
images_filepaths = [*cat_images_filepaths, *dog_images_filepaths] 

## 3
correct_images_filepaths = [i for i in images_filepaths if cv2.imread(i) is not None]    

random.seed(42) ## 4
random.shuffle(correct_images_filepaths)

# 일부 데이터만 사용
train_images_filepaths = correct_images_filepaths[:400]    
val_images_filepaths = correct_images_filepaths[400:-10]  
test_images_filepaths = correct_images_filepaths[-10:]    

print(len(train_images_filepaths), len(val_images_filepaths), len(test_images_filepaths))

400 92 10


# 4. test 데이터셋 이미지 확인

In [61]:
import matplotlib.pyplot as plt
import cv2
import os

def display_image_grid(images_filepaths, predicted_labels=(), cols=5):
    rows = len(images_filepaths) // cols  
    fig, ax = plt.subplots(rows, cols, figsize=(12, 6))
    ax = ax.ravel()

    for i, image_filepath in enumerate(images_filepaths): # 경로를 정규화 함
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) ## 1
        
        true_label = os.path.normpath(image_filepath).split(os.sep)[-2] ## 2
        predicted_label = predicted_labels[i] if predicted_labels else true_label ## 3
        color = "green" if true_label == predicted_label else "red"

        ax[i].imshow(image)
        ax[i].set_title(predicted_label, color=color)
        ax[i].axis("off")

    plt.tight_layout()
    plt.show()

# 5. 이미지 데이터셋 클래스 정의

In [62]:
class DogvsCatDataset(Dataset):
    def __init__(self, file_list, transform=None, phase='train'):
        self.file_list = file_list
        self.transform = transform # DogvsCatDataset 클래스를 호출할 때 transform에 대한 매개변수를 받아 옵니다.
        self.phase = phase # train 적용

    def __len__(self): # images_filepaths 데이터셋의 전체 길이 반환
        return len(self.file_list)
    
    def __getitem__(self,idx): # 데이터셋에서 데이터를 가져오는 부분으로 결과는 텐서 형태 
        img_path = self.file_list[idx]
        img = Image.open(img_path) # img_path 위치에서 이미지 데이터들을 가져옴
        img_transformed = self.transform(img, self.phase) # 이미지에 'train' 전처리 적용
        label = img_path.split('/')[-1].split('.')[0] ## 1

        if label == 'dog':
            label = 1
        elif label == 'cat':
            label = 0
        return img_transformed, label

# 6. 변수 값 정의

In [63]:
size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32

# 7. 이미지 데이터셋 정의

In [64]:
train_dataset = DogvsCatDataset(train_images_filepaths, 
                                transform=ImageTransform(size,mean, std), 
                                phase='train') # train 이미지에 train_transforms를 적용
val_dataset = DogvsCatDataset(val_images_filepaths, 
                              transform=ImageTransform(size,mean, std), 
                              phase='val') # val 이미지에 train_transforms를 적용

index = 0
print(train_dataset.__getitem__(index)[0].size()) # 훈련 데이터 train_dataset.__getitem__[0][0]의 크기(size) 출력
print(train_dataset.__getitem__(index)[1]) # 훈련 데이터의 레이블 출력

torch.Size([3, 224, 224])
0


# 8. 데이터로더 정의

In [65]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) ## 1
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# train_dataloader와 val_dataloader를 합쳐서 표현
dataloader_dict = {'train': train_dataloader, 'val': val_dataloader}

batch_iterator = iter(train_dataloader)
inputs, label = next(batch_iterator)
print(inputs.size())
print(label)

torch.Size([32, 3, 224, 224])
tensor([1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0,
        1, 0, 0, 1, 0, 1, 0, 1])


# 9. 모델의 네트워크 클래스

In [66]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        
        # 입력 : (3,244,244) 출력 : (16,220,220)
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=0) 
        self.relu1 = nn.ReLU() 
        self.maxpool1 = nn.MaxPool2d(kernel_size=2) # 220/2 -> (16,110,110)
        
        # 입력 : (16,110,110) 출력 : (32,106,106)
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0) 
        self.relu2 = nn.ReLU() # activation
        self.maxpool2 = nn.MaxPool2d(kernel_size=2) # 110/2 -> (32,53,53)   
        
        self.fc1 = nn.Linear(32*53*53, 512) 
        self.relu5 = nn.ReLU()         
        self.fc2 = nn.Linear(512, 2) 
        self.output = nn.Softmax(dim=1)        
    
    def forward(self, x):
        out = self.cnn1(x) 
        out = self.relu1(out)
        out = self.maxpool1(out)
        out = self.cnn2(out) 
        out = self.relu2(out) 
        out = self.maxpool2(out) 
        out = out.view(out.size(0), -1) # 완결연결층에 데이터를 전달하기 위해 데이터 형태를 1차원으로 바꿉니다.
        out = self.fc1(out) 
        out = self.fc2(out)                    
        out = self.output(out)
        return out

# 10. 모델 객체 생성

In [67]:
model = LeNet()
print(model)

LeNet(
  (cnn1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (cnn2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=89888, out_features=512, bias=True)
  (relu5): ReLU()
  (fc2): Linear(in_features=512, out_features=2, bias=True)
  (output): Softmax(dim=1)
)


# 11. torchsummary 라이브러리를 이용한 모델의 네트워크 구조 확인

In [68]:
#!pip install torchsummary
from torchsummary import summary
summary(model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 220, 220]           1,216
              ReLU-2         [-1, 16, 220, 220]               0
         MaxPool2d-3         [-1, 16, 110, 110]               0
            Conv2d-4         [-1, 32, 106, 106]          12,832
              ReLU-5         [-1, 32, 106, 106]               0
         MaxPool2d-6           [-1, 32, 53, 53]               0
            Linear-7                  [-1, 512]      46,023,168
            Linear-8                    [-1, 2]           1,026
           Softmax-9                    [-1, 2]               0
Total params: 46,038,242
Trainable params: 46,038,242
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 19.47
Params size (MB): 175.62
Estimated Total Size (MB): 195.67
--------------------------------

# 12. 옵티마이저와 손실 함수 정의

In [69]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# 13. 모델의 파라미터와 손실 함수를 CPU에 할당

In [70]:
model = model.to(device)
criterion = criterion.to(device)

# 14. 모델 학습 함수 정의

In [72]:
def train_model(model, dataloader_dict, criterion, optimizer, num_epoch):    
    since = time.time()
    best_acc = 0.0
    
    for epoch in range(num_epoch):
        print('Epoch {}/{}'.format(epoch + 1, num_epoch))
        print('-'*20)
        
        for phase in ['train', 'val']:           
            if phase == 'train':
                model.train() # 모델을 학습시키겠다는 의미
            else:
                model.eval()
                
            epoch_loss = 0.0
            epoch_corrects = 0
            
            for inputs, labels in tqdm(dataloader_dict[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        
                    epoch_loss += loss.item() * inputs.size(0)
                    epoch_corrects += torch.sum(preds == labels.data)
                    
            epoch_loss = epoch_loss / len(dataloader_dict[phase].dataset)
            epoch_acc = epoch_corrects.double() / len(dataloader_dict[phase].dataset)
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()
                
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    return model

# 15. 모델 학습

In [73]:
import time

num_epoch = 10
model = train_model(model, dataloader_dict, criterion, optimizer, num_epoch)

Epoch 1/10
--------------------


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for inputs, labels in tqdm(dataloader_dict[phase]):


  0%|          | 0/13 [00:00<?, ?it/s]

train Loss: 0.6940 Acc: 0.5250


  0%|          | 0/3 [00:00<?, ?it/s]

val Loss: 0.7253 Acc: 0.4674
Epoch 2/10
--------------------


  0%|          | 0/13 [00:00<?, ?it/s]

train Loss: 0.6936 Acc: 0.5175


  0%|          | 0/3 [00:00<?, ?it/s]

val Loss: 0.7053 Acc: 0.5000
Epoch 3/10
--------------------


  0%|          | 0/13 [00:00<?, ?it/s]

train Loss: 0.6811 Acc: 0.5500


  0%|          | 0/3 [00:00<?, ?it/s]

val Loss: 0.7309 Acc: 0.4348
Epoch 4/10
--------------------


  0%|          | 0/13 [00:00<?, ?it/s]

train Loss: 0.6819 Acc: 0.5800


  0%|          | 0/3 [00:00<?, ?it/s]

val Loss: 0.7057 Acc: 0.5326
Epoch 5/10
--------------------


  0%|          | 0/13 [00:00<?, ?it/s]

train Loss: 0.6738 Acc: 0.5875


  0%|          | 0/3 [00:00<?, ?it/s]

val Loss: 0.6990 Acc: 0.5217
Epoch 6/10
--------------------


  0%|          | 0/13 [00:00<?, ?it/s]

train Loss: 0.6744 Acc: 0.5525


  0%|          | 0/3 [00:00<?, ?it/s]

val Loss: 0.7218 Acc: 0.4674
Epoch 7/10
--------------------


  0%|          | 0/13 [00:00<?, ?it/s]

train Loss: 0.6758 Acc: 0.5650


  0%|          | 0/3 [00:00<?, ?it/s]

val Loss: 0.6848 Acc: 0.5109
Epoch 8/10
--------------------


  0%|          | 0/13 [00:00<?, ?it/s]

train Loss: 0.6627 Acc: 0.6125


  0%|          | 0/3 [00:00<?, ?it/s]

val Loss: 0.6821 Acc: 0.5543
Epoch 9/10
--------------------


  0%|          | 0/13 [00:00<?, ?it/s]

train Loss: 0.6672 Acc: 0.6325


  0%|          | 0/3 [00:00<?, ?it/s]

val Loss: 0.6834 Acc: 0.5652
Epoch 10/10
--------------------


  0%|          | 0/13 [00:00<?, ?it/s]

train Loss: 0.6567 Acc: 0.6150


  0%|          | 0/3 [00:00<?, ?it/s]

val Loss: 0.6806 Acc: 0.5761
Training complete in 2m 16s
Best val Acc: 0.576087


# 16. 모델 테스트를 위한 함수 정의

In [75]:
import pandas as pd
id_list = []
pred_list = []
_id=0

# 역전파 중 텐서들에 대한 변화도를 계산할 필요가 없음을 나타내는 것으로, 훈련 데이터셋의 모델 학습과 가장 큰 차이점입니다.
with torch.no_grad(): 
    for test_path in tqdm(test_images_filepaths): # test 데이터셋 이용
        img = Image.open(test_path)
        _id =test_path.split('/')[-1].split('.')[1]
        transform = ImageTransform(size, mean, std)
        img = transform(img, phase='val') # test 데이터셋 전처리
        img = img.unsqueeze(0) ## 1
        img = img.to(device)

        model.eval()
        outputs = model(img)
        preds = F.softmax(outputs, dim=1)[:, 1].tolist() ## 2       
        id_list.append(_id)
        pred_list.append(preds[0])
       
res = pd.DataFrame({
    'id': id_list,
    'label': pred_list
}) # test 예측 결과인 Id와 레이블을 데이터 프레임에 저장

res.sort_values(by='id', inplace=True)
res.reset_index(drop=True, inplace=True)

res.to_csv('LeNet.csv', index=False) # 데이터프레임을 csv 파일로 저장

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for test_path in tqdm(test_images_filepaths): # test 데이터셋 이용


  0%|          | 0/10 [00:00<?, ?it/s]

# 17. 테스트 데이터셋의 예측 결과 호출

In [76]:
res.head(10)

Unnamed: 0,id,label
0,109,0.424191
1,145,0.458853
2,15,0.636709
3,162,0.498484
4,167,0.531948
5,200,0.378861
6,210,0.569178
7,211,0.552809
8,213,0.397481
9,224,0.573105
