<a href="https://colab.research.google.com/github/cuie23/learning-pytorch/blob/main/torch_load_external_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import os
import requests
import zipfile
from pathlib import Path
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [7]:
# Setup path to data folder
data_path = Path("folder")
image_path = data_path / "pizza_steak_sushi"

# If the image folder doesn't exist, download it and prepare it
if image_path.is_dir():
    print(f"{image_path} directory exists.")
else:
    print(f"Did not find {image_path} directory, creating one...")
    image_path.mkdir(parents=True, exist_ok=True)

# Download pizza, steak, sushi data
with open(data_path / "pizza_steak_sushi.zip", "wb") as f:
    request = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
    print("Downloading pizza, steak, sushi data...")
    f.write(request.content)

# Unzip pizza, steak, sushi data
with zipfile.ZipFile(data_path / "pizza_steak_sushi.zip", "r") as zip_ref:
    print("Unzipping pizza, steak, sushi data...")
    zip_ref.extractall(image_path)

# Remove zip file
os.remove(data_path / "pizza_steak_sushi.zip")

folder/pizza_steak_sushi directory exists.
Downloading pizza, steak, sushi data...
Unzipping pizza, steak, sushi data...


In [9]:
def walk_through_dir(dir_path):
  for dirpath, dirnames, filenames in os.walk(dir_path):
    print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

walk_through_dir(image_path)

There are 2 directories and 0 images in 'folder/pizza_steak_sushi'.
There are 3 directories and 0 images in 'folder/pizza_steak_sushi/train'.
There are 0 directories and 78 images in 'folder/pizza_steak_sushi/train/pizza'.
There are 0 directories and 72 images in 'folder/pizza_steak_sushi/train/sushi'.
There are 0 directories and 75 images in 'folder/pizza_steak_sushi/train/steak'.
There are 3 directories and 0 images in 'folder/pizza_steak_sushi/test'.
There are 0 directories and 25 images in 'folder/pizza_steak_sushi/test/pizza'.
There are 0 directories and 31 images in 'folder/pizza_steak_sushi/test/sushi'.
There are 0 directories and 19 images in 'folder/pizza_steak_sushi/test/steak'.


In [11]:
data_transform = transforms.Compose([

    # Resize the images to 64x64
    transforms.Resize(size=(64, 64)),

    # Flip the images randomly on the horizontal
    transforms.RandomHorizontalFlip(p=0.5),

    # Turn the image into a torch.Tensor
    transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0
])

In [12]:
train_dir = image_path / "train"
test_dir = image_path / "test"

train_data = datasets.ImageFolder(root = train_dir,
                                  transform = data_transform)
test_data = datasets.ImageFolder(root=test_dir,
                                 transform=data_transform)

train_dataloader = DataLoader(dataset = train_data,
                              batch_size = 1,
                              num_workers = 1,
                              shuffle = True)

test_dataloader = DataLoader(dataset = test_data,
                             batch_size = 1,
                             num_workers = 1,
                             shuffle = True)