# PyTorch Custom Datasets 

### Import PyTorch setup device agnostic code.

In [None]:
import torch
from torch import nn

print(torch.__version__)

In [None]:
device = "cuda" if torch.cuda.is_available() else 'cpu'
device

# Data

dataset is a subset of Food101 dataset


In [None]:
import requests
import zipfile
from pathlib import Path

data_path = Path("data/")
image_path = data_path / "pizza_steak_sushi"

if image_path.is_dir():
    print(f"{image_path} directory already exists...skipping download")
else:
    print(f"{image_path} does not exist, creating one...")
    image_path.mkdir(parents=True, exist_ok=True)

with open(data_path / "pizza_steak_sushi.zip", "wb") as f:
    request = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
    print('downloading pizza, steak, and sushi data')
    f.write(request.content)

with zipfile.ZipFile(data_path / "pizza_steak_sushi.zip", "r") as zip_ref:
    print("unzipping...")
    zip_ref.extractall(image_path)

# Exploring data

In [None]:
import os
def walk_through_dir(dir_path):
    for dirpath, dirnames, filenames in os.walk(dir_path):
        print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [None]:
walk_through_dir(image_path)

In [None]:
train_dir = image_path / "train"
test_dir = image_path / "test"

train_dir, test_dir

# Visualizing images

1. get all image path
2. pick random image: random.choice()
3. get the image class name using `pathlib.Path.parent.stem`
4. open image with Pillow
5. show image meta data

In [None]:
import random
from PIL import Image

# random.seed(42)

image_path_list = list(image_path.glob("*/*/*.jpg"))
image_path_list

random_image_path = random.choice(image_path_list)
random_image_path

image_class = random_image_path.parent.stem
image_class

img = Image.open(random_image_path)

print(f"random_image_path: {random_image_path}")
print(f"Image class: {image_class}")
print(f"Image height: {img.height}")
print(f"Image width: {img.width}")
img

In [None]:
import numpy as np
import matplotlib.pyplot as plt

img_as_array = np.asarray(img)

plt.figure(figsize=(10,7))
plt.imshow(img_as_array)
plt.title(f"Image class: {image_class} | Image shape: {img_as_array.shape} -> [height, width, color_channels]")
plt.axis(False)

In [None]:
img_as_array

# Data Transformation

1. data to tensors
2. dataset `torch.utils.data.Dataset` -> DataLoader `torch.utils.data.DataLoader`

In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

### transform data with `torchvision.transforms`

In [None]:
# create transorm for an image
data_transform = transforms.Compose([
    # Resize to 64x64
    transforms.Resize(size=(64,64)),
    # Flip the images randomly on horizontal
    transforms.RandomHorizontalFlip(p=0.5),
    # Turn image into torch tensor
    transforms.ToTensor()
])

In [None]:
data_transform(img).shape

In [None]:
def plot_transformed_images(image_paths: list, transform, n=3, seed=None):
    """
      Returns random images from an image patch, loads/transforms and plots original vs transformed
    """
    if seed:
      random.seed(seed)
    random_image_paths = random.sample(image_paths, k=n)
    for image_path in random_image_paths:
       with Image.open(image_path) as f:
          fig, ax = plt.subplots(nrows=1, ncols=2)
          ax[0].imshow(f)
          ax[0].set_title(f"Original\nSize:{f.size}")
          ax[0].axis(False)

          # Transform and plot target image
          transformed_image = transform(f).permute(1, 2, 0) # need to change shape for matplotlib
          ax[1].imshow(transformed_image)
          ax[1].set_title(f"Transformed\nSize: {transformed_image.shape}")
          ax[1].axis("off")

          fig.suptitle(f"Class: {image_path.parent.stem}", fontsize=16)

plot_transformed_images(image_paths=image_path_list,
                        transform=data_transform,
                        n=3,
                        seed=42) 

# loading image data from image folder

In [None]:
from torchvision import datasets
train_data = datasets.ImageFolder(root=train_dir,
                                  transform=data_transform, # a transorm for the data
                                  target_transform=None) # a transform the label/target

test_data = datasets.ImageFolder(root=test_dir,
                                 transform=data_transform)

train_data,test_data

In [None]:
# class names as list
class_names = train_data.classes
class_names

In [None]:
# get class names as a dict
class_dict = train_data.class_to_idx
class_dict

In [None]:
# len of dataset
len(train_data), len(test_data)

In [None]:
# Index train_data dataset to get single image and label
img, label = train_data[0][0], train_data[0][1]

In [None]:
print(f"Image Tensor:\n {img}")
print(f"Image shape: {img.shape}")
print(f"Image datatype: {img.dtype}")
print(f"Image label: {label}")
print(f"Label datatype: {type(label)}")

In [None]:
# Rearrange order of dimensions
img_permute = img.permute(1,2,0)

# print shapes
print(f"Original shape: {img.shape} -> [color_channels, height, width]")
print(f"Image Permute: {img_permute.shape} -> [height, width, color_channel]")

#plt image
plt.figure(figsize=(10,7))
plt.imshow(img_permute)
plt.axis("off")
plt.title(class_names[label], fontsize=14)

# datalaoders

In [None]:
from torch.utils.data import DataLoader
BATCH_SIZE = 32

train_dataloader = DataLoader(dataset=train_data,
                              batch_size=BATCH_SIZE,
                              num_workers=2,
                              shuffle=True)
test_dataloader = DataLoader(dataset=test_data,
                             batch_size=BATCH_SIZE,
                             num_workers=2,
                             shuffle=False)

train_dataloader, test_dataloader

In [None]:
img, label = next(iter(train_dataloader))

print(f"Image Shape: {img.shape}")
print(f"Label shape: {label.shape}")