In [12]:
import os
import torch
import logging
import pandas as pd
import uuid

from pathlib import Path
from PIL import Image
from torchvision.transforms import v2, InterpolationMode
from torch.utils.data import Dataset, DataLoader


In [13]:
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

In [14]:
dataset_path = os.path.join(os.getcwd(), "..", "..", "dataset")
sample_dataset = os.path.join(dataset_path, "sample.csv")
raw_subset_dataset = os.path.join(dataset_path, 'corn-leaf-disease-subset-raw')
subset_dataset = os.path.join(dataset_path, 'corn-leaf-disease-subset')

raw_train_dir = os.path.join(raw_subset_dataset, 'train')
raw_valid_dir = os.path.join(raw_subset_dataset, 'valid')
raw_test_dir = os.path.join(raw_subset_dataset, 'test')

subset_train_dir = os.path.join(subset_dataset, 'train')
subset_valid_dir = os.path.join(subset_dataset, 'valid')
subset_test_dir = os.path.join(subset_dataset, 'test')

dataset_path, sample_dataset, raw_subset_dataset, subset_dataset

('d:\\Kuliah\\master\\deep-learning-uas\\src\\1\\..\\..\\dataset',
 'd:\\Kuliah\\master\\deep-learning-uas\\src\\1\\..\\..\\dataset\\sample.csv',
 'd:\\Kuliah\\master\\deep-learning-uas\\src\\1\\..\\..\\dataset\\corn-leaf-disease-subset-raw',
 'd:\\Kuliah\\master\\deep-learning-uas\\src\\1\\..\\..\\dataset\\corn-leaf-disease-subset')

In [15]:
df = pd.read_csv(sample_dataset)

df.head()

Unnamed: 0,index,col1,col2
0,1,one,two
1,2,three,four
2,3,five,six


In [16]:
class CornLeafDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = Path(root_dir)
        self.transform = transform
        
        # Get all image paths and corresponding labels
        exts = ('.jpg', '.jpeg', '.png')
        self.image_paths = []
        self.labels = []
        
        # Map class names to integer labels
        self.class_names = sorted([d.name for d in self.root_dir.iterdir() if d.is_dir()])
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.class_names)}
        self.idx_to_class = {idx: cls_name for cls_name, idx in self.class_to_idx.items()}
        
        
        for cls_name in self.class_names:
            cls_folder = self.root_dir / cls_name
            files = list(cls_folder.glob('*'))
            for f in files:
                if f.suffix.lower() in exts:
                    self.image_paths.append(f)
                    self.labels.append(self.class_to_idx[cls_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        
        # Load image
        image = Image.open(img_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

In [17]:
# Define transforms
tensor_transforms = v2.Compose([
    v2.ToImage(),
    # v2.Resize(size=(224,224),
    #           interpolation=InterpolationMode.BILINEAR),
    v2.ToDtype(torch.float32, scale=True),
    # v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

tensor_PIL_transforms = v2.Compose([
    v2.ToPILImage()
])

In [18]:
# Datasets
train_dataset = CornLeafDataset(raw_train_dir, transform=tensor_transforms)
valid_dataset = CornLeafDataset(raw_valid_dir, transform=tensor_transforms)
test_dataset  = CornLeafDataset(raw_valid_dir, transform=tensor_transforms)

In [19]:
# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [20]:
augmentation_pipeline = v2.Compose([
    v2.ToImage(),
    v2.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0), interpolation=InterpolationMode.BILINEAR),
    
    v2.RandomApply([
        v2.RandomRotation(degrees=(-15, 15)),
    ], p=0.5),
    
    v2.RandomApply([
        v2.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),
    ], p=0.5),
    v2.RandomApply([        
        v2.GaussianBlur(kernel_size=3)
    ], p=0.5),
])

In [21]:
image_augment_n = 5

for image_batch, label_batch in train_loader:
    for image_tensor, label_tensor in zip(image_batch, label_batch):
        for _ in range(image_augment_n):
            image = tensor_PIL_transforms(augmentation_pipeline(image_tensor))
            label = train_dataset.idx_to_class[label_tensor.item()]
            
            save_dir = os.path.join(subset_train_dir, label)
            
            if not os.path.isdir(save_dir):
                os.mkdir(save_dir)
            
            image_path = os.path.join(save_dir, f'{uuid.uuid4()}.jpg')
            image.save(image_path)
    #         break
    #     break
    # break

In [22]:
image_augment_n = 5

for image_batch, label_batch in valid_loader:
    for image_tensor, label_tensor in zip(image_batch, label_batch):
        for _ in range(image_augment_n):
            image = tensor_PIL_transforms(augmentation_pipeline(image_tensor))
            label = train_dataset.idx_to_class[label_tensor.item()]
            
            save_dir = os.path.join(subset_valid_dir, label)
            
            if not os.path.isdir(save_dir):
                os.mkdir(save_dir)
            
            image_path = os.path.join(save_dir, f'{uuid.uuid4()}.jpg')
            image.save(image_path)
    #         break
    #     break
    # break