## 1. Get Data

In [18]:
import os
import zipfile
from pathlib import Path 
import requests

#Path to data folder
data_path = Path("data/")
image_path = data_path / "pizza_steak_sushi"

# If the image folder dosent exist, download it and prepare it
if image_path.is_dir():
    print(f"{image_path} directory exists...")
else:
    print(f"Did not find {image_path} directory, creating one..")
    image_path.mkdir(parents=True, exist_ok=True)
    
    # Download pizza, steak, sushi data
    with open(data_path / "pizza_steak_sushi.zip","wb") as f:
        request = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
        print("Downloading pizza, steak, sushi data..")
        
    # Unzip pizza, steak, sshi data
    with zipfile.ZipFile(data_path / "pizza_steak_sushi.zip","r") as zip_ref:
        print("Unzipping pizza, steak, sushi data..")
        zip_ref.extractall(image_path)

data\pizza_steak_sushi directory exists...


In [19]:
train_dir = image_path / "train"
test_dir = image_path / "test"
train_dir, test_dir

(WindowsPath('data/pizza_steak_sushi/train'),
 WindowsPath('data/pizza_steak_sushi/test'))

In [20]:
from torchvision import transforms, datasets
data_transform = transforms.Compose([ 
                                     transforms.Resize((64,64)),
                                     transforms.ToTensor()
                                     ])

## 2.1 Create datasets and dataloaders (script mode)

In [21]:
# create going modular dir 
import os
os.makedirs("going_modular",exist_ok=True)

In [22]:
%%writefile going_modular/data_setup.py
"""
 Contains functionality for creating PyTorch Dataloader's for image classification data
"""


import os
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

NUM_WORKERS = os.cpu_count()

def create_dataloaders( 
                       train_dir: str,
                       test_dir: str,
                       transform: transforms.Compose,
                       batch_size: int,
                       num_workers: int = NUM_WORKERS):
    # Use image_folder to create datasets
    train_data = datasets.ImageFolder(root = train_dir,
                                      transform = transform,
                                      target_transform = None)
    test_data = datasets.ImageFolder(root = test_dir,
                                     transform = transform)
    
    train_dataloader = DataLoader(dataset= train_data,
                                  batch_size = batch_size,
                                  num_workers = NUM_WORKERS,
                                  shuffle = True
                                  )
    
    test_dataloader = DataLoader(dataset = test_dataloader,
                                 batch_size = batch_size,
                                 num_workers = NUM_WORKERS,
                                 shuffle = False)
    
    return train_dataloader, test_dataloaders, class_names
    
    

Overwriting going_modular/data_setup.py
