# Classification of Citrus Leaves with ResNet50

## Create Dataloader

In [40]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [28]:
# class label is name of sub directory, images are in sub directory
class CitrusLeavesDataset(Dataset):
    def __init__(self, img_dir, transform = None, target_transform = None):
        # define main directory of images and transformations
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

        # store paths and labels in DataFrame
        data = { "image_path" : [], "label" : [] }
        for sub_dir in os.listdir(img_dir):
            for file in os.listdir(os.path.join(img_dir, sub_dir)):
                data['image_path'].append(os.path.join(img_dir, sub_dir, file))
                data['label'].append(sub_dir)

        # store in annotations
        self.annotations = pd.DataFrame(data)

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        # get image path and label
        image = read_image(self.annotations.iloc[idx, 0])
        label = self.annotations.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)

        return image, label

In [43]:
TRAIN_PROPORTION = 0.8

In [41]:
# load data
citrus_leaves_dataset = CitrusLeavesDataset(img_dir = "./data_citrus_leaves/")

# split data into train and test
train_size = int(TRAIN_PROPORTION * len(citrus_leaves_dataset))
test_size = len(citrus_leaves_dataset) - train_size

# initialize dataloders
train_dataset, test_dataset = torch.utils.data.random_split(citrus_leaves_dataset, [train_size, test_size])

In [42]:
train_dataset

<torch.utils.data.dataset.Subset at 0x1efb313e680>

In [34]:
dataset[0][0].shape

torch.Size([3, 1417, 1520])