## 작물잎 질병 분류

### 데이터 분할을 위한 폴더 생성

In [44]:
import os
import shutil

original_dataset_dir = './data/dataset'
classes_list = os.listdir(original_dataset_dir)

base_dir = "./data/splitted"
os.mkdir(base_dir)

train_dir = os.path.join(base_dir, "train")
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir,"val")
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir,"test")
os.mkdir(test_dir)

for clss in classes_list:
    os.mkdir(os.path.join(train_dir, clss))
    os.mkdir(os.path.join(validation_dir, clss))
    os.mkdir(os.path.join(test_dir, clss))

### 데이터 분할과 클래스별 데이터 수 확인

In [45]:
import math

for clss in classes_list:
    path = os.path.join(original_dataset_dir,clss)
    fnames = os.listdir(path)
    
    train_size = math.floor(len(fnames) *0.6)
    validation_size = math.floor(len(fnames)*0.2)
    test_size = math.floor(len(fnames)*0.2)
    
    train_fnames = fnames[:train_size]
    print("Train size(",clss,"):", len(train_fnames))
    
    for fname in train_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(train_dir,clss), fname)
        shutil.copyfile(src, dst)
    
    validation_fnames = fnames[train_size:(validation_size + train_size)]
    print(f"Valiadation size ({clss}):", len(validation_fnames))
    
    for fname in validation_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(validation_dir,clss), fname)
        shutil.copyfile(src, dst)
        
    test_fnames = fnames[(validation_size + train_size):(validation_size + train_size + test_size)]
    print(f"Test size ({clss}):", len(test_fnames))
    
    for fname in test_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(test_dir,clss), fname)
        shutil.copyfile(src, dst)

Train size( Corn___Northern_Leaf_Blight ): 591
Valiadation size (Corn___Northern_Leaf_Blight): 197
Test size (Corn___Northern_Leaf_Blight): 197
Train size( Peach___healthy ): 216
Valiadation size (Peach___healthy): 72
Test size (Peach___healthy): 72
Train size( Tomato___Tomato_mosaic_virus ): 223
Valiadation size (Tomato___Tomato_mosaic_virus): 74
Test size (Tomato___Tomato_mosaic_virus): 74
Train size( Grape___Black_rot ): 708
Valiadation size (Grape___Black_rot): 236
Test size (Grape___Black_rot): 236
Train size( Potato___Early_blight ): 600
Valiadation size (Potato___Early_blight): 200
Test size (Potato___Early_blight): 200
Train size( Tomato___Bacterial_spot ): 1276
Valiadation size (Tomato___Bacterial_spot): 425
Test size (Tomato___Bacterial_spot): 425
Train size( Potato___Late_blight ): 600
Valiadation size (Potato___Late_blight): 200
Test size (Potato___Late_blight): 200
Train size( Tomato___Tomato_Yellow_Leaf_Curl_Virus ): 3214
Valiadation size (Tomato___Tomato_Yellow_Leaf_Curl

### 베이스라인 모델 학습을 위한 준비

In [47]:
import torch

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")

BATCH_SIZE = 256
EPOCH = 30

import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

transform_base = transforms.Compose([transforms.Resize((64,64)), transforms.ToTensor()])

train_dataset = ImageFolder(root="./data/splitted/train", transform=transform_base)
validation_dataset = ImageFolder(root="./data/splitted/val", transform=transform_base)

from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(validation_dataset, batch_size = BATCH_SIZE, shuffle=True, num_workers=4)

### 베이스 라인 모델 설계

In [48]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    
    def __init__(self):
        
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(3,32,3,padding=1)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
        
        self.fc1 = nn.Linear(4096,512)
        self.fc2 = nn.Linear(512, 33)
        
    def forward(self, x):
        
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)
        
        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)
        
        x = x.view(-1, 4096)
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.25, training=self.training)
        x = self.fc2(x)
        
        return F.log_softmax(x, dim=1)
    
model_base = Net().to(DEVICE)
optimizer = optim.Adam(model_base.parameters(), lr=0.001)        

### 모델 학습을 위한 함수

In [49]:
def train(model, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

### 모델 평가를 위한 함수

In [50]:
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)
            
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    test_accuracy = 100*correct/len(test_loader.dataset)
    return test_loss, test_accuracy

### 모델 학습 실행하기

In [51]:
import time
import copy

def train_baseline(model, train_loader, val_loader, optimizer, num_epochs = 30):
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())
    
    for epoch in range(1,num_epochs+1):
        since = time.time()
        train(model, train_loader, optimizer)
        train_loss, train_acc = evaluate(model, train_loader)
        val_loss, val_acc = evaluate(model, val_loader)
        
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            
        time_elapsed = time.time() - since
        print(f'--------------------------------- epoch {epoch} ---------------------------------')
        print("train Loss: {:.4f}, Accuracy: {:.2f}%".format(train_loss, train_acc))
        print("validation Loss: {:.4f}, Accuracy: {:.2f}%".format(val_loss, val_acc))
        print("Completed in {:.0f}m {:.0f}s".format(time_elapsed // 60, time_elapsed % 60))
    model.load_state_dict(best_model_wts)
    return model

        
base = train_baseline(model_base, train_loader, val_loader, optimizer, EPOCH)

torch.save(base, "./model/plant_baseline.pt")

### Transfer Learning

In [58]:
data_transforms = {
    "train" : transforms.Compose([
        transforms.Resize([64,64]),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomCrop(52),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406]
                            ,[0.299, 0.224, 0.225])
    ]),
    
    "val" : transforms.Compose([
        transforms.Resize([64,64]),
        transforms.RandomCrop(52),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406]
                            ,[0.299, 0.224, 0.225])
    ])
}

data_dir = "./data/splitted"

image_datasets = {x: ImageFolder(root=os.path.join(data_dir, x), # 폴더 내의 이미지 데이터를 불러오는 함수, 폴더 이름이 target이 된다.
                                transform=data_transforms[x]) for x in ["train", "val"]}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=4) for x in ["train", "val"]}

dataset_sizes = {x: len(image_datasets[x]) for x in ["train", "val"]}

class_names = image_datasets["train"].classes

### Pre-Trained model import

In [102]:
from torchvision import models

resnet = models.resnet50(pretrained=True)
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 33) # header convert
resnet = resnet.to(DEVICE)

criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, resnet.parameters()), lr = 0.001) # 원하는 layer만 학습시킬 수 있다.

from torch.optim import lr_scheduler

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

### Pre-Trained Model layer Freeze

In [103]:
ct =0
for child in resnet.children():
    ct+=1
    if ct < 6:
        for param in child.parameters():
            param.requires_grad = False

### 모델 학습과 검증을 위한 함수

In [104]:
def train_resnet(model, criterion, optimizer, scheduler, num_epochs=25):
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print(f"-------------------------------- epoch {epoch+1} -------------------------------------")
        since = time.time()
        
        for phase in ["train","val"]:
            if phase == "train":
                model.train()
            else :
                model.eval()
            
            running_loss = 0.0
            running_corrects = 0
            
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase=="train"):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1) # 값, index // output 중 가장 큰 값을 반환 
                    loss = criterion(outputs, labels)
                    
                    if phase == "train":
                        loss.backward()
                        optimizer.step()
                        
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == "train":
                scheduler.step()
                l_r = [x["lr"] for x in optimizer_ft.param_groups]
                print("learning rate: ",l_r)
                
            epoch_loss = running_loss/dataset_sizes[phase]
            epoch_acc = running_corrects.double()/dataset_sizes[phase]
            
            print("{} Loss: {:.4f} Acc: {:.4f}".format(phase, epoch_loss, epoch_acc))
            
            if phase == "val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        
        time_elapsed = time.time() - since
        print("Completed in {:.0f}m {:.0f}s".format(time_elapsed // 60, time_elapsed % 60))
    print("Best val Acc: {:4f}".format(best_acc))
        
    model.load_state_dict(best_model_wts)
    return model

In [105]:
a = [[0.1, 0.2, 0.3,0.4]]
b = torch.Tensor(a)
torch.max(b,1)

torch.return_types.max(
values=tensor([0.4000]),
indices=tensor([3]))

### 모델 학습 실행

In [106]:
model_resnet50 = train_resnet(resnet, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=EPOCH)

torch.save(model_resnet50, "./model/plant_resnet50.pt")

-------------------------------- epoch 1 -------------------------------------
learning rate:  [0.001]
train Loss: 0.5926 Acc: 0.8221
val Loss: 0.4090 Acc: 0.8832
Completed in 0m 19s
-------------------------------- epoch 2 -------------------------------------
learning rate:  [0.001]
train Loss: 0.2263 Acc: 0.9283
val Loss: 0.2709 Acc: 0.9116
Completed in 0m 19s
-------------------------------- epoch 3 -------------------------------------
learning rate:  [0.001]
train Loss: 0.1766 Acc: 0.9434
val Loss: 0.2265 Acc: 0.9313
Completed in 0m 19s
-------------------------------- epoch 4 -------------------------------------
learning rate:  [0.001]
train Loss: 0.1414 Acc: 0.9538
val Loss: 0.1847 Acc: 0.9435
Completed in 0m 19s
-------------------------------- epoch 5 -------------------------------------
learning rate:  [0.001]
train Loss: 0.1212 Acc: 0.9601
val Loss: 0.1455 Acc: 0.9542
Completed in 0m 19s
-------------------------------- epoch 6 -------------------------------------
learni

### 베이스라인 모델 평가를 위한 전처리

In [110]:
transform_base = transforms.Compose([transforms.Resize([64,64]), transforms.ToTensor()])
test_base = ImageFolder(root="./data/splitted/test", transform = transform_base)
test_loader_base = torch.utils.data.DataLoader(test_base, batch_size = BATCH_SIZE, shuffle = True, num_workers=4)

### Transfer Learning모델 평가를 위한 전처리

In [125]:
transform_resNet = transforms.Compose([transforms.Resize([64,64])
                                      ,transforms.RandomCrop(52)
                                      ,transforms.ToTensor()
                                      ,transforms.Normalize([0.485,0.456,0.406], [0.299,0.224,0.225])]) #RGB 평균과 표준편차

test_resNet = ImageFolder(root='./data/splitted/test', transform = transform_resNet)
test_loader_resNet = torch.utils.data.DataLoader(test_resNet, batch_size = BATCH_SIZE, shuffle=True, num_workers=4)

### 베이스라인 모델 성능 평가하기

In [113]:
baseline = torch.load('./model/plant_baseline.pt')
baseline.eval()
test_loss, test_accuracy = evaluate(baseline,test_loader_base)

print("baseline test acc: ", test_accuracy)

baseline test acc:  93.99173864063087


In [126]:
resnet50 = torch.load('./model/plant_resnet50.pt')
resnet50.eval()
test_loss, test_accuracy = evaluate(resnet50, test_loader_resNet)
print("ResNet test acc:", test_accuracy)

ResNet test acc: 98.82338215045688
