In [1]:
from torchvision import transforms, datasets
import torch
import zipfile

def unzip_dataset(INPATH, OUTPATH):
    with zipfile.ZipFile(INPATH) as zf:
        zf.extractall(OUTPATH)



In [2]:
data_transforms = {
    'train':transforms.Compose([
        transforms.Resize(256),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [.229, 0.224, 0.225]),
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [.229, 0.224, 0.225]),
    ]),
}



In [3]:
image_datasets = {
    'train': datasets.ImageFolder('./input/train', data_transforms['train']),
    'val': datasets.ImageFolder('./input/val', data_transforms['val'])
}


In [4]:
image_datasets['train']

Dataset ImageFolder
    Number of datapoints: 250
    Root location: ./input/train
    StandardTransform
Transform: Compose(
               Resize(size=256, interpolation=PIL.Image.BILINEAR)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [5]:
image_datasets['val']

Dataset ImageFolder
    Number of datapoints: 100
    Root location: ./input/val
    StandardTransform
Transform: Compose(
               Resize(size=256, interpolation=PIL.Image.BILINEAR)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [24]:
print(image_datasets['train'].samples[0:5])

[('./input/train/ng/cast_def_0_0.jpeg', 0), ('./input/train/ng/cast_def_0_103.jpeg', 0), ('./input/train/ng/cast_def_0_105.jpeg', 0), ('./input/train/ng/cast_def_0_107.jpeg', 0), ('./input/train/ng/cast_def_0_109.jpeg', 0)]


In [15]:
print(image_datasets['train'].class_to_idx)

{'ng': 0, 'ok': 1}


In [27]:
image_dataloaders = {
    'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=4, shuffle=True, num_workers=0, drop_last=True),
    'val': torch.utils.data.DataLoader(image_datasets['val'], batch_size=4, shuffle=True, num_workers=0, drop_last=True),
}


In [29]:
for i, (inputs, labels) in enumerate(image_dataloaders['train']):
    print(inputs)
    print(labels)
    if i == 0:
        break


tensor([[[[0.3823, 0.3823, 0.3823,  ..., 0.3823, 0.3823, 0.3823],
          [0.3823, 0.3823, 0.3823,  ..., 0.3823, 0.3823, 0.3823],
          [0.3823, 0.3823, 0.3823,  ..., 0.3823, 0.3823, 0.3823],
          ...,
          [0.7248, 0.7248, 0.7248,  ..., 0.6734, 0.6734, 0.6734],
          [0.7248, 0.7248, 0.7248,  ..., 0.6734, 0.6734, 0.6734],
          [0.7248, 0.7248, 0.7248,  ..., 0.6734, 0.6734, 0.6734]],

         [[0.5203, 0.5203, 0.5203,  ..., 0.5203, 0.5203, 0.5203],
          [0.5203, 0.5203, 0.5203,  ..., 0.5203, 0.5203, 0.5203],
          [0.5203, 0.5203, 0.5203,  ..., 0.5203, 0.5203, 0.5203],
          ...,
          [0.8704, 0.8704, 0.8704,  ..., 0.8179, 0.8179, 0.8179],
          [0.8704, 0.8704, 0.8704,  ..., 0.8179, 0.8179, 0.8179],
          [0.8704, 0.8704, 0.8704,  ..., 0.8179, 0.8179, 0.8179]],

         [[0.7402, 0.7402, 0.7402,  ..., 0.7402, 0.7402, 0.7402],
          [0.7402, 0.7402, 0.7402,  ..., 0.7402, 0.7402, 0.7402],
          [0.7402, 0.7402, 0.7402,  ..., 0