Load library

In [None]:
from os import listdir
from os.path import join, splitext, basename
import glob
import torch.utils.data as data
import torchvision.transforms as trans
from torch.utils.data import DataLoader
from torch.utils.data.sampler import Sampler
from PIL import Image
from imgaug import augmenters as iaa
from matplotlib.pyplot import figure, imshow, axis
import imgaug as ia
import numpy as np
import PIL
import torch
from PIL import Image
import matplotlib.pyplot as plt
import statistics
import random
import natsort
import copy
import collections
import torchvision.models as models
import torchvision
import torch.nn as nn
import torch.optim as optim
import os 
from PIL import Image
from torch.utils.data import random_split
from collections import OrderedDict

Check device

In [None]:
#To determine if your system supports CUDA
print("==> Check devices..")
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Current device: ",device)

#Also can print your current GPU id, and the number of GPUs you can use.
print("Our selected device: ", torch.cuda.current_device())
print(torch.cuda.device_count(), " GPUs is available")

Weight balance sampler

In [None]:
def WRSampler(dataset, wts):
    each_data_wts = []
    class_num_dict = dataset.class_num_dict
    
    for label in range(len(class_num_dict.keys())):
        cls_num = class_num_dict[ dataset.label2name[label] ]
        for i in range (cls_num):
            each_data_wts.append(wts[label]/cls_num)
    
    sampler = torch.utils.data.sampler.WeightedRandomSampler(each_data_wts, len(each_data_wts), replacement=True)
    
    return sampler

Dataset

In [None]:
class Plantdisease_Dataset(data.Dataset):
    def __init__(self, image_dir, input_transform, is_train=False):
        super(Plantdisease_Dataset, self).__init__()
        
        self.image_filenames = []
        self.name2label = {}
        self.label2name = {}
        self.label = []
        
        path_pattern = image_dir + '/*'     
        body_part = glob.glob(path_pattern, recursive=True)
        self.class_num_dict = {}
        
        lab = 0
        for cls in body_part:
            self.class_num_dict[cls.split('/')[-1]] = 0
            self.name2label[cls.split('/')[-1]] = lab
            self.label2name[lab] = cls.split('/')[-1]
            
            image_list = glob.glob(cls + '/*')
            for image in image_list:
                self.class_num_dict[cls.split('/')[-1]] += 1
                self.image_filenames.append(image)
                self.label.append(lab)
                
            lab += 1                
                

        self.input_transform = input_transform

    def __getitem__(self, index):
        try:
            input_file = self.image_filenames[index]
        except:
            print('error:', index)
        img = Image.open(input_file)
        img = img.convert('RGB')
        
        if self.input_transform is not None:
            img = self.input_transform(img)
        label = self.label[index]
  
        return img, label

    def __len__(self):
        return len(self.image_filenames)
    
    def images(self):
        return self.image_filenames
    
    def class_num_dict(self):
        return self.class_num_dict
    
    def name2label(self):
        return self.name2label
    
    def classes(self):
        return list(self.class_num_dict.keys())

Training

In [None]:
def train(model, criterion, optimizer, max_epoch, train_loader, validation_loader, config):
    t_loss = []
    v_loss = []
    training_accuracy = []
    validation_accuracy = []
    total = 0
    min_val_loss = 0.0
    min_val_error = 0.0
    early_stop_timer = 0 
    current_best_model = None
    
    for epoch in range(max_epoch):  # loop over the dataset multiple times
        train_loss = 0.0
        validation_loss = 0.0
        correct_train = 0
        correct_validation = 0
        train_num = 0
        val_num = 0
        train_img_num = 0
        validation_img_num = 0


        ########################
        # train the model      #
        ########################

        model.train()
        for i, (inputs, labels) in enumerate(train_loader, 0):

            #change the type into cuda tensor 
            
            inputs = inputs.to(device) 
            labels = labels.to(device) 
            

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            # select the class with highest probability
            _, pred = outputs.max(1)
            # if the model predicts the same results as the true
            # label, then the correct counter will plus 1
            correct_train += pred.eq(labels).sum().item()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            train_loss += loss.item()
            train_num += 1
            train_img_num += len(labels)


        ########################
        # validate the model   #
        ########################

        model.eval()
        for i, (inputs, labels) in enumerate(validation_loader, 0):
            # move tensors to GPU if CUDA is available
            
            inputs = inputs.to(device) 
            labels = labels.to(device)
            
            # forward pass: compute predicted outputs by passing inputs to the model
            outputs = model(inputs)
            _, pred = outputs.max(1)
            correct_validation += pred.eq(labels).sum().item()
            # calculate the batch loss
            loss = criterion(outputs, labels)
            # update average validation loss 
            validation_loss += loss.item()
            val_num += 1
            validation_img_num += len(labels)


        if epoch % 1 == 0:    # print every 200 mini-batches
            val_error = 1 - correct_validation / validation_img_num
            print('[%d, %5d] train_loss: %.3f' % (epoch, max_epoch, train_loss / train_num))
            print('[%d, %5d] validation_loss: %.3f' % (epoch, max_epoch, validation_loss / val_num))
            print('%d epoch, training accuracy: %.4f' % (epoch, correct_train / train_img_num))
            print('%d epoch, validation accuracy: %.4f' % (epoch, correct_validation / validation_img_num))


            if epoch == 0:
                min_val_error = val_error
                current_best_model = model
                print('Current best.')

            if val_error < min_val_error:
                min_val_error = val_error
                config.best_epoch = epoch
                early_stop_timer = 0
                current_best_model = model
                print('Current best.')
            else:
                early_stop_timer += 1
                if early_stop_timer >= config.early_stop:
                    torch.save(current_best_model.state_dict(), os.path.join(config.model_ouput_dir, 
                                                                             config.model_name + '_' + 
                                                                             str(config.best_epoch) + '.pth'))
                    model = current_best_model
                    print('Early Stop.\n Best epoch is', str(config.best_epoch))
                    break
                    
            t_loss.append(train_loss / train_num)
            training_accuracy.append(correct_train / train_img_num)
            validation_accuracy.append(correct_validation / validation_img_num)
            running_loss = 0.0
            validation_loss = 0.0
            train_num = 0
            val_num = 0
            correct_train = 0
            correct_validation = 0
            total = 0
            print('-----------------------------------------')


    print('Finished Training')

Parameter config

In [None]:
class Config():
    def __init__(self):
        
        self.folder_names2code = {}
        self.image_size = 256
        self.early_stop = 3
        self.max_epoch = 1000
        self.train_batchsize = 16
        self.eva_val_batchsize = 16
        self.class_num = 15
        self.each_class_item_num = {}
        self.temperature = 1
        self.alpha = 0.5
        
        
        self.train_dataset_path = r'train'
        self.validation_dataset_path = r'validation'
        self.test_dataset_path = r'test'
        self.model_ouput_dir = './models/'
        self.best_epoch = 0
        self.model_name = 'densenet121_Aug'
        '''
        class_folder_name = listdir(self.test_dataset_path)
        self.class_folder_num = {}
        for cf in class_folder_name:
            self.class_folder_num[cf] = len(listdir(self.test_dataset_path + '/' + cf))
        '''
        self.net = 'resnet18'  # 0: resnet18
        self.pretrain = False
        
        self.wts = [500 for i in range(15)]
                    
        self.lr = 0.0001
        self.criterion = nn.CrossEntropyLoss() #定義損失函數

Image augmentation

In [None]:
class ImgAugTransform():
    def __init__(self, config=Config()):
        self.aug = iaa.Sequential([
            iaa.Scale((config.image_size, config.image_size)),
            iaa.Sometimes(0.25, iaa.GaussianBlur(sigma=(0, 3.0))),
            iaa.Fliplr(0.5),
            iaa.Affine(rotate=(-20, 20), mode='symmetric'),
            iaa.Sometimes(0.25,
                      iaa.OneOf([iaa.Dropout(p=(0, 0.1)),
                                 iaa.CoarseDropout(0.1, size_percent=0.5)])),  # 對batch中的一部分圖片應用一部分Augmenters,剩下的圖片應用另外的Augmenters。
            iaa.AddToHueAndSaturation(value=(-10, 10), per_channel=True)  # 即修改色調和飽和度
        ])
      
    def __call__(self, img):
        img = np.array(img)
        return self.aug.augment_image(img)

Main function (start training)

In [None]:
if __name__ == '__main__':
    config = Config()
    transform_train = trans.Compose([
        ImgAugTransform(config),
        trans.ToTensor(),
        trans.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    
    transform_validation = trans.Compose([
        trans.Resize((config.image_size, config.image_size)),
        trans.ToTensor(),
        trans.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    
    transform_test = trans.Compose([
        trans.Resize((config.image_size, config.image_size)),
        trans.ToTensor(),
        trans.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    
    train_dataset = Plantdisease_Dataset(image_dir=config.train_dataset_path, input_transform = transform_validation)
    val_dataset = Plantdisease_Dataset(image_dir=config.validation_dataset_path, input_transform = transform_validation)
    test_dataset = Plantdisease_Dataset(image_dir=config.test_dataset_path, input_transform = transform_test)
    
    # sampler = WRSampler(train_dataset, config.wts)
    train_dataloader = DataLoader(dataset=train_dataset, batch_size=config.train_batchsize, shuffle=True)
    validation_dataloader = DataLoader(dataset=val_dataset, batch_size=config.eva_val_batchsize, shuffle=False)
    test_dataloader = DataLoader(dataset=test_dataset, batch_size=config.eva_val_batchsize, shuffle=False)

    # net = torch.load('models/164.pth')
    net = models.densenet121(pretrained=config.pretrain)
    net.fc = nn.Sequential(nn.Linear(1024,512),nn.LeakyReLU(),nn.Linear(512,128),nn.LeakyReLU(),nn.Linear(128,config.class_num))
    net = net.to(device)
    
    learning_rate = config.lr
    max_epoch = config.max_epoch
    criterion = config.criterion
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, betas=[0.9, 0.999]) #定優化函數
    
    train_micro(net, criterion, optimizer, max_epoch, train_dataloader, validation_dataloader, config)

Evaluation

In [None]:
def evaluation(model, evaluation_dataset, evaluation_loader, config):
    test_loss = 0.0
    correct_test = 0
    test_num = 0
    correct_top3 = 0
    cls = np.zeros(config.class_num)
    avg = []
    
    model.eval()

    for i, (inputs, labels) in enumerate(evaluation_loader, 0):
        # move tensors to GPU if CUDA is available
        inputs = inputs.to(device) 
        labels = labels.to(device)
        # forward pass: compute predicted outputs by passing inputs to the model
        outputs = model(inputs)
        _, pred = outputs.max(1)
        correct_test += pred.eq(labels).sum().item()
        _, top3 = outputs.topk(3)
        correct_top3 += top3.eq(labels.view(-1,1).expand_as(top3)).sum().item()
        
        for j in range(config.class_num):
            cls[j] += (pred.eq(j) * pred.eq(labels)).sum().item()

    print('Test set: Top 1 Accuracy: %d/%d (%.2f%%), Top 3 Accuracy: %d/%d (%.2f%%)' 
          % (correct_test, len(evaluation_dataset), correct_test / len(evaluation_dataset)*100, correct_top3, len(evaluation_dataset),
             correct_top3/ len(evaluation_dataset)*100))
    
    class_num_dict = evaluation_dataset.class_num_dict
    name2label = evaluation_dataset.name2label
    classes = evaluation_dataset.classes
    
    for key in class_num_dict.keys():
        print('%-20s : %d/%d    %10f%%' % (key, cls[name2label[key]], class_num_dict[key],
                                            cls[name2label[key]]/class_num_dict[key]*100))
        avg.append(cls[name2label[key]]/class_num_dict[key]*100) 


    print('Average per case accuracy: %10f%%' % (sum(avg)/len(avg)))
    print('-----------------------------------------')

In [None]:
config = Config()

transform_test = trans.Compose([
    trans.Resize((config.image_size, config.image_size)),
    trans.ToTensor(),
    trans.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

test_dataset = Plantdisease_Dataset(image_dir=config.test_dataset_path, input_transform = transform_test)

test_dataloader = DataLoader(dataset=test_dataset, batch_size=config.eva_val_batchsize, shuffle=False)


print('-----------------------------------------')
print('Reload')
print('-----------------------------------------')
# net = torch.load('./models/Macro.pth')
net = models.resnet101(False)
net.fc = nn.Sequential(nn.Linear(2048,512),nn.LeakyReLU(),nn.Linear(512,128),nn.LeakyReLU(),nn.Linear(128,config.class_num))
state_dict = torch.load(r'./models/resnet101_raw_12.pth', map_location='cpu')
net.load_state_dict(state_dict)
#net = torch.load('./models/Enas_macro_10.pth')
net = net.to(device)

In [None]:
evaluation(net, test_dataset, test_dataloader, config)

Compute parameter and MACs

In [None]:
from thop import profile

input = torch.randn(1, 3, 256, 256).cuda()
macs, params = profile(net, inputs=(input,))

print('{} {} {}'.format('Total params :','%f' % (params/1000000),'M') )
print('{} {} {}'.format('Total macs :','%f' % (macs/1000000),'M') )