In [None]:
import numpy as np
import pandas as pd
import os
import torch
import torchvision
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import Dataset
from torch import nn
from tqdm.auto import tqdm
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
train_dir = './Fruit360/Training'
test_dir = './Fruit360/Test'

In [None]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.class_names = sorted(os.listdir(self.root_dir))
        self.total_images = 0
        self.image_paths = []
        for class_name in self.class_names:
            class_dir = os.path.join(self.root_dir, class_name)
            image_names = os.listdir(class_dir)
            if len(image_names) > 0:
                self.total_images += len(image_names)
                self.image_paths.extend([os.path.join(class_dir, image_name) for image_name in image_names])

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        image = Image.open(image_path)

        if self.transform:
            image = self.transform(image)

        class_name = os.path.basename(os.path.dirname(image_path))
        return image, class_name

    def __len__(self):
        return self.total_images

In [None]:
# Define the transformation to be applied to the images
transform = transforms.ToTensor()

# Create the training dataset
train_dataset = CustomDataset(train_dir, transform=transform)

# Create the testing dataset
test_dataset = CustomDataset(test_dir, transform=transform)

# Create data loaders for batching and shuffling the data during training/testing
BATCH_SIZE = 32
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
print(train_dataset[0][0].shape)
print(len(train_dataset), len(test_dataset))

In [None]:
random_idx = np.random.randint(0, len(train_dataset), size=10)

fig = plt.figure(figsize=(10, 4))
for i, idx in enumerate(random_idx):
    ax = fig.add_subplot(2, 5, i+1)
    data = train_dataset[idx]
    image_tensor, class_name = data

    # Convert tensor to image format (C, H, W) -> (H, W, C)
    image = image_tensor.permute(1, 2, 0).numpy()

    # Display the image
    ax.imshow(image)
    ax.set_title(class_name)
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# See classes
class_names = train_dataset.class_names
class_names

In [None]:
# Let's check out what we've created
print(f"Dataloaders: {train_loader, test_loader}") 
print(f"Length of train dataloader: {len(train_loader)} batches of {BATCH_SIZE}")
print(f"Length of test dataloader: {len(test_loader)} batches of {BATCH_SIZE}")

In [None]:
train_features_batch, train_labels_batch = next(iter(train_loader))
train_features_batch.shape, len(train_labels_batch)

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device