# LeNet-5 구현
## 구현 설계
- 기본 : pandas, scikit-learn, torch, numpy, matplotlib
- 라이브러리 선정
- 데이터를 불러와야 됨
- 간단한 CNN 모델을 작성
- 학습 및 평가

In [1]:
import time
from pathlib import Path

import random
import pandas as pandas
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

from PIL import Image
from tqdm import tqdm

plt.rcParams["font.family"] = "Malgun Gothic"
plt.rcParams["axes.unicode_minus"] = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

random.seed(42)

cuda


In [2]:
# 이미지 전처리
# imagenet std mean 수치 사용하면 일반적으로는 괜찮다

class ImageTransform:
    def __init__(self, resize, mean, std):
        self.data_transform = {
            "train": transforms.Compose([
                transforms.Resize((resize, resize)),
                transforms.RandomHorizontalFlip(p=0.5), # 학습에만
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            "val": transforms.Compose([
                transforms.Resize((resize, resize)),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            "test": transforms.Compose([
                transforms.Resize((resize, resize)),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ])
        }

    def __call__(self, img, phase):
        return self.data_transform[phase](img)

In [3]:
class DogVsCatDataset(Dataset):
    def __init__(self, file_list, transform=None, phase="train"):
        self.file_list = file_list
        self.transform = transform
        self.phase = phase

    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, index):
        img_path = self.file_list[index]
        img = Image.open(img_path).convert("RGB") # 흑백도 RGB로 바꿀거야
        img_transform = self.transform(img, self.phase)
        if "dog" in img_path.lower():
            label = 1
        else:
            label = 0
        return img_transform, label

In [4]:
cat_directory = Path("data/catanddog/train/Cat")
dog_directory = Path("data/catanddog/train/Dog")
cat_images_filepaths = sorted([str(p) for p in cat_directory.glob("*.jpg")])
dog_images_filepaths = sorted([str(p) for p in dog_directory.glob("*.jpg")])
images_filepath = cat_images_filepaths + dog_images_filepaths
random.shuffle(images_filepath)

In [5]:
cat_directory

WindowsPath('data/catanddog/train/Cat')

In [6]:
# cat_directory.glob("*.jpg") -> cat_directory의 .jpg를 포함한 모든것을 잡아라glob
[str(p) for p in cat_directory.glob("*.jpg")] # \\ -> 이스케이프문자+\

['data\\catanddog\\train\\Cat\\0.jpg',
 'data\\catanddog\\train\\Cat\\1.jpg',
 'data\\catanddog\\train\\Cat\\10.jpg',
 'data\\catanddog\\train\\Cat\\11.jpg',
 'data\\catanddog\\train\\Cat\\12.jpg',
 'data\\catanddog\\train\\Cat\\13.jpg',
 'data\\catanddog\\train\\Cat\\14.jpg',
 'data\\catanddog\\train\\Cat\\15.jpg',
 'data\\catanddog\\train\\Cat\\16.jpg',
 'data\\catanddog\\train\\Cat\\17.jpg',
 'data\\catanddog\\train\\Cat\\18.jpg',
 'data\\catanddog\\train\\Cat\\19.jpg',
 'data\\catanddog\\train\\Cat\\2.jpg',
 'data\\catanddog\\train\\Cat\\20.jpg',
 'data\\catanddog\\train\\Cat\\21.jpg',
 'data\\catanddog\\train\\Cat\\22.jpg',
 'data\\catanddog\\train\\Cat\\23.jpg',
 'data\\catanddog\\train\\Cat\\24.jpg',
 'data\\catanddog\\train\\Cat\\25.jpg',
 'data\\catanddog\\train\\Cat\\26.jpg',
 'data\\catanddog\\train\\Cat\\27.jpg',
 'data\\catanddog\\train\\Cat\\28.jpg',
 'data\\catanddog\\train\\Cat\\29.jpg',
 'data\\catanddog\\train\\Cat\\3.jpg',
 'data\\catanddog\\train\\Cat\\30.jpg',
 'da

In [7]:
total_images = len(images_filepath)
train_size = int(total_images * 0.7)
val_size = int(total_images * 0.2)
test_size = total_images - (train_size + val_size)

In [8]:
train_images_filepaths = images_filepath[:train_size]
val_images_filepaths = images_filepath[train_size: train_size + val_size]
test_images_filepaths = images_filepath[train_size + val_size: train_size + val_size + test_size]

In [9]:
size = 224,
mean= (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32

In [10]:
train_dataset = DogVsCatDataset(train_images_filepaths, transform=ImageTransform(size, mean, std), phase="train")
val_dataset = DogVsCatDataset(val_images_filepaths, transform=ImageTransform(size, mean, std), phase="val")
test_dataset = DogVsCatDataset(test_images_filepaths, transform=ImageTransform(size, mean, std), phase="test")

In [11]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0) # shuffle은 train만
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

In [12]:
dataloader_dic = {
    "train": train_dataloader,
    "val": val_dataloader,
    "test": test_dataloader
}

In [13]:
# 특성맵 사이즈 6@28*28(C) -> 6@14*14(Sampling -> Pooling) -> 16@10*10(C) -> 16@5*5 (특성맵 => OOPool)
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        # 224 -> 220
        self.c1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=0) 
        # 220 -> 110
        self.s2 = nn.AvgPool2d(kernel_size=2, stride=2)
        # 110 -> 106
        self.c3 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        # 
        self.s4 = nn.AvgPool2d(kernel_size=2, stride=2) # 16*53
        #
        self.f5 = nn.Linear(53*53*16, 120)
        self.f6 = nn.Linear(120, 64)
        self.output = nn.Linear(64, 2)

    def forward(self, x):
        x = torch.tanh(self.c1(x)) # 활성화 함수
        x = self.s2(x) # 활성화 함수 안한다!!!
        x = torch.tanh(self.c3(x))
        x = self.s4(x)
        
        # flatten : CNN -> DNN으로 바꿀 때 flatten 해주기!!!!!
        x = x.view(x.size(0), -1)

        x = torch.tanh(self.f5(x))
        x = torch.tanh(self.f6(x))
        x = self.output(x)

        return x

In [None]:
#model = LeNet5()
#print(model)

LeNet5(
  (c1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (s2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (c3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (s4): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (f5): Linear(in_features=44944, out_features=120, bias=True)
  (f6): Linear(in_features=120, out_features=64, bias=True)
  (output): Linear(in_features=64, out_features=2, bias=True)
)


In [None]:
def train_model(model, dataloader_dic, criterion, optimizer, num_epoch):
    since = time.time()
    best_acc = 0.0

    for epoch in range(num_epoch):
        print(f"Epoch: {epoch + 1} / {num_epoch}")
        print("-" * 20)
        
        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            else:
                model.eval()            
            epoch_loss = 0.0
            epoch_corrects = 0
            
            for inputs, labels in tqdm(dataloader_dic[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == "train":
                        loss.backward()
                        optimizer.step()                    
                    epoch_loss += loss.item() * inputs.size(0)
                    epoch_corrects += torch.sum(preds == labels.data)
            
            epoch_loss = epoch_loss / len(dataloader_dic[phase].dataset)
            epoch_acc = epoch_corrects.double() / len(dataloader_dic[phase].dataset)            
            print(f"loss: {epoch_loss}, acc: {epoch_acc}")

            if phase == "val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                bst_model_wts = model.state_dict() # 최적의 매개변수를 저장
    time_elapsed = time.time() - since
    print(f"best acc: {best_acc}, end: {time_elapsed % 60}s")
    return model

In [None]:
# %pip install tqdm

Note: you may need to restart the kernel to use updated packages.


In [None]:
model = LeNet5()
model = model.to(device)
optimizer = optim.SGD(model.parameters(), lr=0.002, momentum=0.9)
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)
num_epoch = 10
model = train_model(model, dataloader_dic, criterion, optimizer, num_epoch)

Epoch: 1 / 10
--------------------


  0%|          | 0/12 [00:00<?, ?it/s]


TypeError: 'tuple' object cannot be interpreted as an integer