https://www.youtube.com/watch?v=8PnxJ3s3Cwo

In [None]:
import torch
import torchvision
import torchvision.transforms as tr
from torch.utils.data import DataLoader, Dataset
import numpy as np

### 1. 파이토치 제공 데이터 사용

In [None]:
transf = tr.Compose([tr.Resize(8), tr.ToTensor()])
# Transforms on PIL Image
# Pad, Grayscale, RandomCrop, Normalize ..
# Transforms on torch.*Tensor - tensor image
# torchvision.transforms.ToPILImage(mode=None)...
# ...

In [None]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transf)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transf)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
trainset[0][0].size()

torch.Size([3, 8, 8])

In [None]:
trainloader = DataLoader(trainset, batch_size=50, shuffle=True, num_workers=2)
testloader = DataLoader(testset, batch_size=50, shuffle=True, num_workers=2)

In [None]:
len(trainloader)

1000

In [None]:
# iter: 하나씩 값을 불러옴

dataiter = iter(trainloader)
images, labels = dataiter.next()

In [None]:
# batch size: 50
# channel: 3
# height: 8
# width: 8

images.size()

torch.Size([50, 3, 8, 8])

### 2. 같은 클래스 별 폴더 이미지 데이터 이용

폴더 정리를 못하는 경우에는 사용 불가

1. 다른 작업과 공용으로 사용
2. 폴더가 아닌 SQL 같은 곳에서 넘어오는 경우

In [None]:
# ./class/tiger     ./class/lion
transf = tr.Compose([tr.Resize(16), tr.ToTensor()])
trainset = torchvision.datasets.ImageFolder(root='./class', transform=transf)
trainloader = DataLoader(trainset, batch_size=10, shuffle=False, num_workers=2)
print(len(trainloader))

In [None]:
trainset[0][0].size()

### 3. 개인 데이터 사용(2 types)

In [None]:
# import preprocessing

train_images = np.random.randint(256, size=(20, 32, 32, 3))
train_labels = np.random.randint(2, size=(20,1))

# preprocessing......
# train_images, train_labels = preprocessing(train_images, train_labels)

print(train_images.shape, train_labels.shape)

(20, 32, 32, 3) (20, 1)


In [None]:
class TensorData(Dataset):

    def __init__(self, x_data, y_data):
        self.x_data = torch.FloatTensor(x_data)
        self.x_data = self.x_data.permute(0,3,1,2)  ###### 이미지 개수, 채널 수, 이미지 너비, 높이
        self.y_data = torch.LongTensor(y_data)
        self.len = self.y_data.shape[0]

    def __getitem__(self, index):

        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [None]:
train_data = TensorData(train_images, train_labels)
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)

In [None]:
train_data[0][0].size()

torch.Size([3, 32, 32])

In [None]:
dataiter = iter(train_loader)
images, labels = dataiter.next()

In [None]:
images.size()

torch.Size([10, 3, 32, 32])

```python
from torch.utils.data import Dataset

class MyDataset(Dataset):

    def __init__(self):
        ...
    def __getitem__(self, index):
        ...
    def __len__(self):
        ...
```

In [None]:
class MyDataset(Dataset):

    def __init__(self, x_data, y_data, transform=None):

        self.x_data = x_data
        self.y_data = y_data
        self.transform = transform
        self.len = len(y_data)

    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]

        if self.transform:
            sample = self.transform(sample)

        return sample

    def __len__(self):
        return self.len

class ToTensor:
    def __call__(self, sample):
        inputs, labels = sample
        inputs = torch.FloatTensor(inputs)
        inputs = inputs.permute(2,0,1)
        return inputs, torch.LongTensor(labels)

class LinearTensor:

    def __init__(self, slope=1, bias=0):
        self.slope=slope
        self.bias=bias

    def __call__(self, sample):
        inputs, labels = sample
        inputs = self.slope*inputs + self.bias

        return inputs, labels

In [None]:
trans = tr.Compose([ToTensor(), LinearTensor(2,5)])
ds1 = MyDataset(train_images, train_labels, transform=trans)
train_loader1 = DataLoader(ds1, batch_size=10, shuffle=True)

In [None]:
first_data = ds1[0]
features, labels = first_data
print(type(features), type(labels))

<class 'torch.Tensor'> <class 'torch.Tensor'>


In [None]:
dataiter1 = iter(train_loader1)
images1, labels1 = dataiter1.next()

In [None]:
images1

tensor([[[[251., 101., 351.,  ..., 465.,  13., 287.],
          [341., 271., 341.,  ..., 361., 327., 345.],
          [ 51., 421.,   9.,  ..., 291., 405.,  71.],
          ...,
          [285., 405., 337.,  ..., 217., 143.,  49.],
          [ 41., 437., 405.,  ..., 183., 129., 341.],
          [405.,  19., 143.,  ..., 419., 297., 349.]],

         [[199., 187., 377.,  ..., 443., 267., 389.],
          [275., 401., 445.,  ..., 409., 515., 111.],
          [227., 187., 123.,  ..., 187., 323.,  71.],
          ...,
          [ 15., 103., 325.,  ..., 237., 335., 303.],
          [141., 231.,   9.,  ..., 149.,   5., 349.],
          [ 95., 227., 159.,  ...,  45.,  39., 201.]],

         [[343.,  51., 251.,  ..., 141., 303., 125.],
          [447.,  45., 221.,  ...,  19., 431.,  51.],
          [323., 139., 255.,  ..., 223., 347., 475.],
          ...,
          [ 77., 373., 217.,  ..., 137., 269., 301.],
          [ 73., 465., 103.,  ...,  85.,  61., 229.],
          [457.,  69., 409.,  ...

In [None]:
class MyDataset(Dataset):

    def __init__(self, x_data, y_data, transform=None):

        self.x_data = x_data
        self.y_data = y_data
        self.transform = transform
        self.len = len(y_data)

    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]

        if self.transform:
            sample = self.transform(sample)

        return sample

    def __len__(self):
        return self.len

class MyTransform:

    def __call__(self, sample):
        inputs, labels = sample
        inputs = torch.FloatTensor(inputs)
        inputs = inputs.permute(2,0,1)
        labels = torch.FloatTensor(labels)

        transf = tr.Compose([tr.ToPILImage(), tr.Resize(128), tr.ToTensor(),
                             tr.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
        final_output = transf(inputs)

        return final_output, labels

In [None]:
ds2 = MyDataset(train_images, train_labels, transform=MyTransform())
train_loader2 = DataLoader(ds2, batch_size=10, shuffle=True)

In [None]:
first_data = ds2[0]
features, labels = first_data
print(type(features), type(labels))

<class 'torch.Tensor'> <class 'torch.Tensor'>


In [None]:
dataiter2 = iter(train_loader2)
images2, labels2 = dataiter2.next()

In [None]:
images2.size()

torch.Size([10, 3, 128, 128])

In [None]:
images2

tensor([[[[ 0.2078,  0.2078,  0.1137,  ...,  0.2706,  0.3961,  0.3961],
          [ 0.2078,  0.2078,  0.1137,  ...,  0.2706,  0.3961,  0.3961],
          [ 0.0588,  0.0588, -0.0039,  ...,  0.2392,  0.3490,  0.3490],
          ...,
          [ 0.1765,  0.1765,  0.1294,  ...,  0.1294,  0.1294,  0.1294],
          [ 0.1216,  0.1216,  0.0902,  ...,  0.2157,  0.2471,  0.2471],
          [ 0.1216,  0.1216,  0.0902,  ...,  0.2157,  0.2471,  0.2471]],

         [[ 0.0431,  0.0431, -0.0353,  ...,  0.0431,  0.0431,  0.0431],
          [ 0.0431,  0.0431, -0.0353,  ...,  0.0431,  0.0431,  0.0431],
          [-0.0275, -0.0275, -0.0824,  ...,  0.0980,  0.1137,  0.1137],
          ...,
          [ 0.8196,  0.8196,  0.6706,  ..., -0.5216, -0.5529, -0.5529],
          [ 0.8980,  0.8980,  0.7333,  ..., -0.6549, -0.6784, -0.6784],
          [ 0.8980,  0.8980,  0.7333,  ..., -0.6549, -0.6784, -0.6784]],

         [[-0.6706, -0.6706, -0.5137,  ...,  0.7882,  0.7961,  0.7961],
          [-0.6706, -0.6706, -