In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

from sklearn.model_selection import train_test_split, KFold
from torch.utils.data import DataLoader, SubsetRandomSampler
from torchvision.datasets import ImageFolder

import matplotlib.pyplot as plt
import numpy as np
import time
import os
from PIL import Image

%matplotlib inline

In [2]:
# import glob

# data_path = '../../data/caffe_drinks/'
# class_names = os.listdir(data_path)

# for name in class_names:
#     image_list = glob.glob(data_path + name + '/*')
    
#     for image in image_list:
#         img = Image.open(image)
#         img_channel = len(img.split())
        
#         if img_channel != 3:
#             print(image)
    
#     print(name, 'Done')

In [3]:
EPOCH = 50
TRAIN_BATCH_SIZE = 32
TEST_BATCH_SIZE = 4
TEST_SIZE = 0.20
LEARNING_RATE = 0.001
MOMENTUM = 0.9
FOLD_N = 2
CLASS_NUM = 9

# GPU 여부
if torch.cuda.is_available(): device = torch.device('cuda') 
else: device = torch.device('cpu')
print('We are Using :', device)

torch.manual_seed(42)

We are Using : cuda


<torch._C.Generator at 0x204d5f84330>

In [6]:
class WrapperDataset:
    def __init__(self, dataset, transform=None, target_transform=None):
        self.dataset = dataset
        self.transform = transform
        self.target_transform = target_transform

    def __getitem__(self, index):
        image, label = self.dataset[index]
        
        if self.transform is not None:
            image = self.transform(image)
            
        if self.target_transform is not None:
            label = self.target_transform(label)
            
        return image, label

    def __len__(self):
        return len(self.dataset)

In [4]:
def train(model, loss_func, optimizer, device, dataloader):
    total_loss, accuracy = 0, 0
    total_samples, correct_samples = 0, 0
    
    model.train()
    
    for i, (images, labels) in enumerate(dataloader):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = loss_func(outputs, labels)
        loss.backward()
        
        optimizer.step()
        
        total_loss += loss.detach().cpu()
        
        preds = outputs.argmax(1)
        total_samples += preds.size()[0]
        correct_samples += preds.eq(labels.view_as(preds)).cpu().sum().item()
        
    accuracy = (correct_samples / total_samples) * 100
    
    return total_loss, accuracy

def test(model, loss_func, device, dataloader):
    total_loss, accuracy = 0, 0
    total_samples, correct_samples = 0, 0
    
    model.eval()
    
    with torch.no_grad():
        for i, (images, labels) in enumerate(dataloader):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)

            loss = loss_func(outputs, labels)
            total_loss += loss.detach().cpu()

            preds = outputs.argmax(1)
            total_samples += preds.size()[0]
            correct_samples += preds.eq(labels.view_as(preds)).cpu().sum().item()

        accuracy = (correct_samples / total_samples) * 100  
    
    return total_loss, accuracy


def draw_matrix(model, device, dataloader, class_num):
    total_samples, correct_samples = 0, 0
    
    model.eval()
    
    confusion_matrix = torch.zeros(CLASS_NUM, CLASS_NUM)
       
    with torch.no_grad():
        for i, (images, labels) in enumerate(dataloader):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)

            preds = outputs.argmax(1)
            total_samples += preds.size()[0]
            correct_samples += preds.eq(labels.view_as(preds)).cpu().sum().item()
                 
            # Make Confusion Matrix
            for row, col in zip(labels.view(-1), preds.view(-1)):
                confusion_matrix[row.long(), col.long()] += 1
    
    return confusion_matrix

In [5]:
data_path = '../../data/caffe_drinks/'
dataset = ImageFolder(root=data_path)

kfold = KFold(n_splits=FOLD_N, shuffle=True)

train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomAffine(30),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [7]:
for fold, (train_idx, test_idx) in enumerate(kfold.split(dataset)):
    print('--------------------------------')
    print(f'FOLD {fold}')
    print('--------------------------------')
    
    train_subsampler = SubsetRandomSampler(train_idx)
    test_subsampler = SubsetRandomSampler(test_idx)
    
    trainloader = DataLoader(WrapperDataset(dataset,transform=train_transform),
                            batch_size=TRAIN_BATCH_SIZE,
                            sampler=train_subsampler)                                                                                        
    testloader = DataLoader(WrapperDataset(dataset, transform=test_transform),
                            batch_size=TEST_BATCH_SIZE,
                            sampler=test_subsampler)
    
    model = torchvision.models.resnet152(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    model.fc = nn.Linear(2048, CLASS_NUM)
    model = model.to(device)
    
    optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
    loss_func = nn.CrossEntropyLoss()
    
    train_loss_per_epoch = []
    train_acc_per_epoch = []
    test_loss_per_epoch = []
    test_acc_per_epoch = []


    for epoch in range(EPOCH):
        start_time = time.time()

        train_loss, train_acc = train(model, loss_func, optimizer, device, trainloader)
        test_loss, test_acc = test(model, loss_func, device, testloader)

        train_loss_per_epoch.append(train_loss)
        train_acc_per_epoch.append(train_acc)
        test_loss_per_epoch.append(test_loss)
        test_acc_per_epoch.append(test_acc)
        
        confusion_matrix = draw_matrix(model, device, testloader, CLASS_NUM)

        print('Epoch %d / %d ---- train_loss: %0.4f ---- train_acc: %0.4f ---- test_loss: %0.4f ---- test_acc: %0.4f'
             %(epoch+1, EPOCH, train_loss, train_acc, test_loss, test_acc))

--------------------------------
FOLD 0
--------------------------------
Epoch 1 / 50 ---- train_loss: 115.6541 ---- train_acc: 49.9075 ---- test_loss: 651.1437 ---- test_acc: 72.3404
Epoch 2 / 50 ---- train_loss: 68.8921 ---- train_acc: 77.6596 ---- test_loss: 448.8478 ---- test_acc: 82.3312
Epoch 3 / 50 ---- train_loss: 54.0026 ---- train_acc: 81.0361 ---- test_loss: 373.1538 ---- test_acc: 84.4126
Epoch 4 / 50 ---- train_loss: 45.3266 ---- train_acc: 84.3201 ---- test_loss: 326.1725 ---- test_acc: 85.0139
Epoch 5 / 50 ---- train_loss: 41.0691 ---- train_acc: 84.1813 ---- test_loss: 307.1818 ---- test_acc: 85.0139
Epoch 6 / 50 ---- train_loss: 38.1304 ---- train_acc: 85.6614 ---- test_loss: 287.5931 ---- test_acc: 85.5227
Epoch 7 / 50 ---- train_loss: 34.1738 ---- train_acc: 87.0028 ---- test_loss: 264.3050 ---- test_acc: 86.4477
Epoch 8 / 50 ---- train_loss: 32.5792 ---- train_acc: 86.7253 ---- test_loss: 254.1192 ---- test_acc: 86.9103
Epoch 9 / 50 ---- train_loss: 31.4610 ---- tra

Epoch 24 / 50 ---- train_loss: 23.2366 ---- train_acc: 89.5005 ---- test_loss: 197.5818 ---- test_acc: 88.6216
Epoch 25 / 50 ---- train_loss: 23.9941 ---- train_acc: 89.0379 ---- test_loss: 191.0231 ---- test_acc: 88.8529
Epoch 26 / 50 ---- train_loss: 23.2379 ---- train_acc: 89.4542 ---- test_loss: 189.0785 ---- test_acc: 89.0842
Epoch 27 / 50 ---- train_loss: 23.6970 ---- train_acc: 88.3904 ---- test_loss: 191.4864 ---- test_acc: 88.8992
Epoch 28 / 50 ---- train_loss: 22.2607 ---- train_acc: 89.5005 ---- test_loss: 190.0513 ---- test_acc: 88.4366
Epoch 29 / 50 ---- train_loss: 22.5634 ---- train_acc: 90.1943 ---- test_loss: 195.7009 ---- test_acc: 87.9741
Epoch 30 / 50 ---- train_loss: 22.3754 ---- train_acc: 89.4542 ---- test_loss: 184.8720 ---- test_acc: 88.7142
Epoch 31 / 50 ---- train_loss: 21.4749 ---- train_acc: 90.3793 ---- test_loss: 184.8850 ---- test_acc: 88.7604
Epoch 32 / 50 ---- train_loss: 21.2986 ---- train_acc: 90.7493 ---- test_loss: 185.0263 ---- test_acc: 88.6216
E

In [12]:
label_to_name = {v:k for k,v in dataset.class_to_idx.items()}
print('Label to Name : ', label_to_name)

confusion_matrix = draw_matrix(model, device, testloader, CLASS_NUM)
result = confusion_matrix.diag()/confusion_matrix.sum(1)

print('--------Accuracy of Each Class--------')
for name, acc in zip(label_to_name.values(), result):
    print('%s : %0.2f%%' %(name, acc * 100))

Label to Name :  {0: 'americano', 1: 'bubbletea_blacksugar', 2: 'cappuccino', 3: 'caramel_macchiato', 4: 'frappuccino_javachip', 5: 'latte_Strawberry', 6: 'latte_goguma', 7: 'latte_greentea', 8: 'mango_juice'}
--------Accuracy of Each Class--------
americano : 96.36%
bubbletea_blacksugar : 82.59%
cappuccino : 81.71%
caramel_macchiato : 84.15%
frappuccino_javachip : 91.34%
latte_Strawberry : 95.74%
latte_goguma : 79.65%
latte_greentea : 89.67%
mango_juice : 95.04%
