### Download data

In [None]:
import os
import zipfile

from pathlib import Path

import requests

# Setup path to data folder
data_path = Path("data/")
image_path = data_path / "pizza_steak_sushi"

# If the image folder doesn't exist, download it and prepare it... 
if image_path.is_dir():
    print(f"{image_path} directory exists.")
else:
    print(f"Did not find {image_path} directory, creating one...")
    image_path.mkdir(parents=True, exist_ok=True)
    
# Download pizza, steak, sushi data
with open(data_path / "pizza_steak_sushi.zip", "wb") as f:
    request = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
    print("Downloading pizza, steak, sushi data...")
    f.write(request.content)

# Unzip pizza, steak, sushi data
with zipfile.ZipFile(data_path / "pizza_steak_sushi.zip", "r") as zip_ref:
    print("Unzipping pizza, steak, sushi data...") 
    zip_ref.extractall(image_path)
    
# Remove zip file
os.remove(data_path / "pizza_steak_sushi.zip")

In [None]:

# Setup train and testing paths
train_dir = image_path / "train"
test_dir = image_path / "test"

train_dir, test_dir

### Datasets and Dataloaders

save contents to a file `%%writefile filename -`

In [None]:
import os

if not os.path.exists("going_modular/"):
    print('found')
    os.makedirs("going_modular")

In [None]:
from torchvision import datasets, transforms

# Create simple transform
data_transform = transforms.Compose([ 
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

In [None]:
%%writefile going_modular/data_setup.py 
"""
 Contains functionality for creating PyTorch 
 DataLoader's for image classification data.
"""
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

NUM_WORKERS = os.cpu_count()

def create_dataloaders(
    train_dir:str,
    test_dir: str,
    transform:transforms.Compose,
    batch_size: int,
    num_workers: int=NUM_WORKERS):
    """
        Creates training and testing dataloaders

        Takes a training and testing directory path and creates PyTorch Datasets
        and PyTorch dataloaders

        Args:
            train_dir: path to traning directory
            test_dir: path to testing directory
            transform: torchvision transforms to perform on data
            batch_size: num samples per batch in each dataloaders
            num_works: int for number of workers per dataloaders

        Returns:
            a tuple of (train_dataloader, test_dataloader, class_names)
            where class_names is a lits of target classes
            Example usage:
                train_dataloader, test_dataloaders, class_names = create_dataloaders(
                    train_dir=path/to/train_dir,
                    test_dir=path/to/test_dir,
                    transform=some_transform,
                    batch_size=32,
                    num_works=4)
    """
    # Imagefolder to create datasets
    train_data = datasets.ImageFolder(train_dir, transform=transform)
    test_data = datasets.ImageFolder(test_dir, transform=transform)

    # class names
    class_names = train_data.classes

    # turn images into DataLoaders
    train_dataloader = DataLoader(
        train_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True
    )

    test_dataloader = DataLoader(
        test_data,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )

    return train_dataloader, test_dataloader, class_names



In [None]:
from going_modular import data_setup 

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=data_transform,
    batch_size=32
)


In [None]:
train_dataloader, test_dataloader, class_names