# Adjusting Size for Classification of Citrus Leaves
See `citrus_leaves.ipynb` for main file.

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.io import read_image # use PyTorch to read images
import torchvision.transforms as transforms
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

In [2]:
IMG_DIR = "./data_citrus_leaves/"
TRAIN_PROPORTION = 0.8
BATCH_SIZE = 32
NUM_EPOCHS = 40
IMAGE_SIZE = (256, 256)
LR = 0.001
RESNET_MEAN = [0.485, 0.456, 0.406]
RESNET_STD = [0.229, 0.224, 0.225]

In [45]:
# simple encoder to convert `string` to `int`
encoder = {
    "healthy" : 0,
    "black_spot" : 1,
    "canker" : 2,
    "greening" : 3,
    "healthy" : 4,
    "melanose" : 5
}

decoder = {
    0 : "healthy",
    1 : "black_spot",
    2 : "canker",
    3 : "greening",
    4 : "healthy",
    5 : "melanose"
}

# class label is name of sub directory, images are in sub directory
class CitrusLeavesDataset(Dataset):
    def __init__(self, img_dir, transform = None, target_transform = None, length = None):
        # define main directory of images and transformations
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

        # store paths and labels in DataFrame
        data = { "image_path" : [], "label" : [] }
        for sub_dir in os.listdir(img_dir):
            for file in os.listdir(os.path.join(img_dir, sub_dir)):
                data['image_path'].append(os.path.join(img_dir, sub_dir, file))
                data['label'].append(sub_dir)

        # store in annotations
        self.annotations = pd.DataFrame(data).sample(frac = 1).reset_index(drop = True)

        # if length is specified, only use that many samples
        if length is not None:
            if length > len(self.annotations) or length < 1:
                raise ValueError("Length must be between 1 and " + str(len(self.annotations)))
            self.annotations = self.annotations[:length]

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        # get image path and label
        image = read_image(self.annotations.iloc[idx, 0]).float()
        label = encoder[self.annotations.iloc[idx, 1]]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)

        return image, label

In [63]:
def create_model(device = None, learning_rate = LR):
    # load model
    model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet50', pretrained = True)
    feature_extractor = nn.Sequential(*list(model.children())[:-1])

    # create model
    model = nn.Sequential(
        feature_extractor,
        nn.Flatten(),
        nn.Linear(2048, 6)
    )
    
    if device is not None:
        model = model.to(device)

    # define loss function and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

    return model, criterion, optimizer

def train_model(model, loader, optimizer, criterion, device = None, epochs = NUM_EPOCHS, stats = False):
    # train model
    model.train()
    losses = []
    for epoch in range(epochs):
        for i, (images, labels) in enumerate(loader):
            if device is not None:
                images = images.to(device)
                labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            losses.append(loss.item())

            if stats:
                if i % 10 == 0:
                    print("[{}, {}] Loss: {}".format(epoch, i, loss.item()))

    return losses

def test_model(model, loader, device = None):
    # test model
    model.eval()
    predict = torch.tensor([], dtype = torch.long)
    actual = torch.tensor([], dtype = torch.long)

    if device is not None:
        predict = predict.to(device)
        actual = actual.to(device)

    with torch.no_grad():
        for images, labels in loader:
            if device is not None:
                images = images.to(device)
                labels = labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            predict = torch.cat((predict, predicted))
            actual = torch.cat((actual, labels))

    return predict, actual

In [52]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [64]:
# iterate through sizes
d = {}
for length in [ 100, 200, 300, 400, 500, 600 ]:
    d[length] = {}

    # load data
    data = CitrusLeavesDataset(
        img_dir = IMG_DIR,
        transform = transforms.Compose([
            transforms.Resize(IMAGE_SIZE),
            transforms.Normalize(mean = RESNET_MEAN, std = RESNET_STD)
        ]),
        length = length
    )

    train_size = int(TRAIN_PROPORTION * len(data))
    train, test = random_split(data, [ train_size, len(data) - train_size ])

    train_loader = DataLoader(train, batch_size = BATCH_SIZE, shuffle = True)
    test_loader = DataLoader(test, batch_size = BATCH_SIZE, shuffle = True)

    # create model
    model, criterion, optimizer = create_model(device = device)

    # train model
    losses = train_model(model, train_loader, optimizer, criterion, device = device)
    d[length]['losses'] = losses

    # test model
    predict, actual = test_model(model, test_loader, device = device)
    d[length]['predict'] = predict
    d[length]['actual'] = actual

    print("Length {}: {}".format(
        length,
        metrics.accuracy_score(actual.cpu().numpy(), predict.cpu().numpy())
    ))

Using cache found in C:\Users\fongc/.cache\torch\hub\pytorch_vision_v0.6.0


Length 100: 0.7


Using cache found in C:\Users\fongc/.cache\torch\hub\pytorch_vision_v0.6.0


Length 200: 0.75


Using cache found in C:\Users\fongc/.cache\torch\hub\pytorch_vision_v0.6.0


Length 300: 0.85


Using cache found in C:\Users\fongc/.cache\torch\hub\pytorch_vision_v0.6.0
