In [1]:
import os
import shutil
 
original_dataset_dir = './dataset/'   
classes_list = os.listdir(original_dataset_dir) 
 
base_dir = './splitted/' 
os.mkdir(base_dir)
 
train_dir = os.path.join(base_dir, 'train') 
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'val')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

for cls in classes_list:     
    os.mkdir(os.path.join(train_dir, cls))
    os.mkdir(os.path.join(validation_dir, cls))
    os.mkdir(os.path.join(test_dir, cls))


FileExistsError: [WinError 183] 파일이 이미 있으므로 만들 수 없습니다: './splitted/'

In [None]:
import math
 
for cls in classes_list:
    path = os.path.join(original_dataset_dir, cls)
    fnames = os.listdir(path)
 
    train_size = math.floor(len(fnames) * 0.6)
    validation_size = math.floor(len(fnames) * 0.2)
    test_size = math.floor(len(fnames) * 0.2)
    
    train_fnames = fnames[:train_size]
    print("Train size(",cls,"): ", len(train_fnames))
    for fname in train_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(train_dir, cls), fname)
        shutil.copyfile(src, dst)
        
    validation_fnames = fnames[train_size:(validation_size + train_size)]
    print("Validation size(",cls,"): ", len(validation_fnames))
    for fname in validation_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(validation_dir, cls), fname)
        shutil.copyfile(src, dst)
        
    test_fnames = fnames[(train_size+validation_size):(validation_size + train_size +test_size)]

    print("Test size(",cls,"): ", len(test_fnames))
    for fname in test_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(test_dir, cls), fname)
        shutil.copyfile(src, dst)

Train size( Apple___Apple_scab ):  378
Validation size( Apple___Apple_scab ):  126
Test size( Apple___Apple_scab ):  126
Train size( Apple___Black_rot ):  372
Validation size( Apple___Black_rot ):  124
Test size( Apple___Black_rot ):  124
Train size( Apple___Cedar_apple_rust ):  165
Validation size( Apple___Cedar_apple_rust ):  55
Test size( Apple___Cedar_apple_rust ):  55
Train size( Apple___healthy ):  987
Validation size( Apple___healthy ):  329
Test size( Apple___healthy ):  329
Train size( Cherry___healthy ):  512
Validation size( Cherry___healthy ):  170
Test size( Cherry___healthy ):  170
Train size( Cherry___Powdery_mildew ):  631
Validation size( Cherry___Powdery_mildew ):  210
Test size( Cherry___Powdery_mildew ):  210
Train size( Corn___Cercospora_leaf_spot Gray_leaf_spot ):  307
Validation size( Corn___Cercospora_leaf_spot Gray_leaf_spot ):  102
Test size( Corn___Cercospora_leaf_spot Gray_leaf_spot ):  102
Train size( Corn___Common_rust ):  715
Validation size( Corn___Commo

In [6]:
# baseline model 

import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda' if USE_CUDA else 'cpu')

BATCH_SIZE = 256
EPOCH = 30

In [7]:
transform_base = transforms.Compose([transforms.Resize((64,64)),
                                    transforms.ToTensor()])

train_dataset = ImageFolder(root='./splitted/', transform=transform_base)
val_dataset = ImageFolder(root='./splitted/val/', transform=transform_base)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

In [8]:
# 모델 설계

class Net(nn.Module):
    def __init__(self) -> None:
        super(Net, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)

        self.fc1 = nn.Linear(4096, 512)
        self.fc2 = nn.Linear(512, 33)

    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = x.view(-1, 4096)
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.fc2(x)

        x = F.log_softmax(x, dim=1)
        return x

In [9]:
model_base = Net().to(DEVICE)
optimizer = optim.Adam(model_base.parameters(), lr=0.001)


In [10]:
# 학습 함수
def train(model, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()


In [11]:
# 모델 평가 함수
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)

            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy
    


In [12]:
# 모델 학습 실행

import time
import copy 

def train_baseline(model, train_loader, val_loader, optimizer, num_epochs=30):
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())
    
    for epoch in range(1, num_epochs+1):
        since = time.time()
        train(model, train_loader, optimizer)
        train_loss, train_acc = evaluate(model, train_loader)
        val_loss, val_acc = evaluate(model, val_loader)

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print('--------------epoch {} -------------'.format(epoch))
        print('train loss : {:.4f}, Accuracy : {:.2f}%'.format(train_loss,  train_acc))
        print('val loss : {:.4f}, Accuracy : {:.2f}%'.format(val_loss,  val_acc))
        print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed%60))
    model.load_state_dict(best_model_wts)
    return model

base = train_baseline(model_base, train_loader, val_loader, optimizer, EPOCH)

torch.save(base, 'baseline.pt')


--------------epoch 1 -------------
train loss : 1.0879, Accuracy : 60.02%
val loss : 5.1166, Accuracy : 4.78%
Completed in 0m 57s
--------------epoch 2 -------------
train loss : 1.0326, Accuracy : 60.02%
val loss : 5.6365, Accuracy : 4.78%
Completed in 0m 48s
--------------epoch 3 -------------
train loss : 0.9937, Accuracy : 60.02%
val loss : 6.4137, Accuracy : 4.78%
Completed in 0m 49s
--------------epoch 4 -------------
train loss : 1.0033, Accuracy : 60.02%
val loss : 6.1748, Accuracy : 4.78%
Completed in 0m 51s
--------------epoch 5 -------------
train loss : 0.9909, Accuracy : 60.02%
val loss : 6.5117, Accuracy : 4.78%
Completed in 0m 50s
--------------epoch 6 -------------
train loss : 0.9708, Accuracy : 60.02%
val loss : 7.1682, Accuracy : 4.78%
Completed in 0m 48s
--------------epoch 7 -------------
train loss : 0.9712, Accuracy : 60.02%
val loss : 7.2628, Accuracy : 4.78%
Completed in 0m 49s
--------------epoch 8 -------------
train loss : 0.9684, Accuracy : 60.02%
val loss

In [15]:
# transfer learning

data_transforms = {
    'train':transforms.Compose([
        transforms.Resize([64,64]),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomCrop(52),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456, 0.406], [0.229,0.224,0.225])
    ]),
    'val':transforms.Compose([
        transforms.Resize([64,64]),
        transforms.RandomCrop(52),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456, 0.406], [0.229,0.224,0.225])
    ])

}

data_dir = './splitted'
image_datasets = {x : ImageFolder(root=os.path.join(data_dir, x), transform=data_transforms[x]) for x in ['train','val']}

dataloaders = {x:torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=4) for x in ['train','val']}

dataset_sizes = {x:len(image_datasets[x])for x in ['train', 'val']}

class_names = image_datasets['train'].classes


In [17]:
# pre train model load

from torchvision import models

resnet = models.resnet50(pretrained=True)
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 33)
resnet = resnet.to(DEVICE)

criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, resnet.parameters()), lr=0.001)

from torch.optim import lr_scheduler

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


In [18]:
cnt = 0
for child in resnet.children():
    cnt +=1 
    if cnt <6:
        for param in child.parameters():
            param.requires_grad = False

In [19]:
def train_resnet(model, criterion, optimizer, scheduler, num_epochs=25):

    best_model_wts = copy.deepcopy(model.state_dict())  
    best_acc = 0.0  
    
    for epoch in range(num_epochs):
        print('-------------- epoch {} ----------------'.format(epoch+1)) 
        since = time.time()                                     
        for phase in ['train', 'val']: 
            if phase == 'train': 
                model.train() 
            else:
                model.eval()     
 
            running_loss = 0.0  
            running_corrects = 0  
 
            
            for inputs, labels in dataloaders[phase]: 
                inputs = inputs.to(DEVICE)  
                labels = labels.to(DEVICE)  
                
                optimizer.zero_grad() 
                
                with torch.set_grad_enabled(phase == 'train'):  
                    outputs = model(inputs)  
                    _, preds = torch.max(outputs, 1) 
                    loss = criterion(outputs, labels)  
    
                    if phase == 'train':   
                        loss.backward()
                        optimizer.step()
 
                running_loss += loss.item() * inputs.size(0)  
                running_corrects += torch.sum(preds == labels.data)  
            if phase == 'train':  
                scheduler.step()
 
            epoch_loss = running_loss/dataset_sizes[phase]  
            epoch_acc = running_corrects.double()/dataset_sizes[phase]  
 
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) 
 
          
            if phase == 'val' and epoch_acc > best_acc: 
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
 
        time_elapsed = time.time() - since  
        print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
 
    model.load_state_dict(best_model_wts) 

    return model

In [20]:
model_resnet50 = train_resnet(resnet, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=EPOCH)
torch.save(model_resnet50, 'resnet50.pt')

-------------- epoch 1 ----------------
train Loss: 0.5098 Acc: 0.8454
val Loss: 0.6032 Acc: 0.8678
Completed in 1m 21s
-------------- epoch 2 ----------------
train Loss: 0.1402 Acc: 0.9526
val Loss: 0.3137 Acc: 0.9109
Completed in 1m 21s
-------------- epoch 3 ----------------
train Loss: 0.1156 Acc: 0.9635
val Loss: 0.1370 Acc: 0.9580
Completed in 1m 21s
-------------- epoch 4 ----------------
train Loss: 0.1026 Acc: 0.9656
val Loss: 0.0877 Acc: 0.9692
Completed in 1m 21s
-------------- epoch 5 ----------------
train Loss: 0.0689 Acc: 0.9760
val Loss: 0.1210 Acc: 0.9645
Completed in 1m 23s
-------------- epoch 6 ----------------
train Loss: 0.0701 Acc: 0.9773
val Loss: 0.0978 Acc: 0.9676
Completed in 1m 23s
-------------- epoch 7 ----------------
train Loss: 0.0889 Acc: 0.9712
val Loss: 0.1043 Acc: 0.9707
Completed in 1m 21s
-------------- epoch 8 ----------------
train Loss: 0.0563 Acc: 0.9827
val Loss: 0.0610 Acc: 0.9815
Completed in 1m 21s
-------------- epoch 9 ----------------


In [23]:
transform_base = transforms.Compose([transforms.Resize([64,64]),
                                    transforms.ToTensor()])
test_base = ImageFolder(root='./splitted/test', transform=transform_base)
test_loader_base = torch.utils.data.DataLoader(test_base, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)


In [25]:
transform_resNet = transforms.Compose([
        transforms.Resize([64,64]),  
        transforms.RandomCrop(52),  
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 
    ])
    
test_resNet = ImageFolder(root='./splitted/test', transform=transform_resNet) 
test_loader_resNet = torch.utils.data.DataLoader(test_resNet, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

baseline=torch.load('baseline.pt') 
baseline.eval()  
test_loss, test_accuracy = evaluate(baseline, test_loader_base)

print('baseline test acc:  ', test_accuracy)

resnet50=torch.load('./resnet50.pt') 
resnet50.eval()  
test_loss, test_accuracy = evaluate(resnet50, test_loader_resNet)

print('ResNet test acc:  ', test_accuracy)

baseline test acc:   4.780262143407865


FileNotFoundError: [Errno 2] No such file or directory: './resnet50.pt'