In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

Files already downloaded and verified


Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data
    Split: Train

In [None]:
# Download training set
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
trainset

In [3]:
# check trainset
for i in range(len(trainset)):
    print('size of image {} label {}'.format(trainset[i][0].size(), trainset[i][1]))
    if i > 2: break
    
print(trainset[0])


size of image torch.Size([3, 32, 32]) label 6
size of image torch.Size([3, 32, 32]) label 9
size of image torch.Size([3, 32, 32]) label 9
size of image torch.Size([3, 32, 32]) label 4
(tensor([[[0.2314, 0.1686, 0.1961,  ..., 0.6196, 0.5961, 0.5804],
         [0.0627, 0.0000, 0.0706,  ..., 0.4824, 0.4667, 0.4784],
         [0.0980, 0.0627, 0.1922,  ..., 0.4627, 0.4706, 0.4275],
         ...,
         [0.8157, 0.7882, 0.7765,  ..., 0.6275, 0.2196, 0.2078],
         [0.7059, 0.6784, 0.7294,  ..., 0.7216, 0.3804, 0.3255],
         [0.6941, 0.6588, 0.7020,  ..., 0.8471, 0.5922, 0.4824]],

        [[0.2431, 0.1804, 0.1882,  ..., 0.5176, 0.4902, 0.4863],
         [0.0784, 0.0000, 0.0314,  ..., 0.3451, 0.3255, 0.3412],
         [0.0941, 0.0275, 0.1059,  ..., 0.3294, 0.3294, 0.2863],
         ...,
         [0.6667, 0.6000, 0.6314,  ..., 0.5216, 0.1216, 0.1333],
         [0.5451, 0.4824, 0.5647,  ..., 0.5804, 0.2431, 0.2078],
         [0.5647, 0.5059, 0.5569,  ..., 0.7216, 0.4627, 0.3608]],

   

In [5]:
# display an image
import matplotlib.pyplot as plt
%matplotlib inline

torchimage = trainset[0][0]
npimage = torchimage.permute(1,2,0)
plt.imshow(npimage)

<matplotlib.image.AxesImage at 0x133e3a4a8>

In [14]:
# DataLoader - multipurpose iterator
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)
dataiter = iter(trainloader)
images, labels = dataiter.next()
print(labels[0:])
print(images.size())

tensor([4, 8, 0, 8])
torch.Size([4, 3, 32, 32])


In [1]:
#Custom Dataset
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import torch
import csv
import os

class toyDataset(Dataset):
    def __init__(self, dataPath, labelsFile, transform=None):
        self.dataPath = dataPath
        self.transform = transform
        
        with open(os.path.join(self.dataPath, labelsFile)) as f:
            self.labels = [tuple(line) for line in csv.reader(f)]
            
        for i in range(len(self.labels)):
            assert os.path.isfile(dataPath + '/' + self.labels[i][0]), self.labels[i][0]
            
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        imageName, imageLabel = self.labels[idx][0:]
        imagePath = os.path.join(self.dataPath, imageName)
        image = Image.open(open(imagePath, 'rb'))
        
        if self.transform:
            image = self.transform(image)
        return((image, imageLabel))
    

In [2]:
# transform
tforms = transforms.Compose([transforms.Grayscale(3), 
                             transforms.CenterCrop(300), 
                             transforms.ToTensor()])
toyData = toyDataset('data/GiuseppeToys', 'labels2.csv', transform=tforms)


In [3]:
toyloader = DataLoader(toyData, batch_size=2, shuffle=True)
toyiter = iter(toyloader)
images, labels = toyiter.next()
print(labels[0:])
print(images.size())

(' toy', ' toy')
torch.Size([2, 3, 300, 300])


In [6]:
# using datasets.ImageFolder
from torchvision import datasets
dataFromFolders = datasets.ImageFolder(root='data/GiuseppeToys/images', transform=tforms)
folderloader = DataLoader(dataFromFolders, batch_size=4, shuffle=True)
images, labels = iter(folderloader).next()
print(labels, images.size())
print(folderloader.dataset.classes)

tensor([2, 2, 1, 2]) torch.Size([4, 3, 300, 300])
['NoToy', 'Scenes', 'SingleToy']


In [None]:
cc2 = RemoveChannel('b')