# 데이터 사용하기

In [1]:
# !pip install torchvision

In [2]:
import torch
import torchvision
import torchvision.transforms as tr
from torch.utils.data import DataLoader, Dataset
import numpy as np

## 1. Pytorch 제공 데이터 사용

In [3]:
transf = tr.Compose([tr.Resize(8), tr.ToTensor()])
# Transforms on PIL Image
# 들어오는 이미지 형태가 PIL인 경우 Transformer 사용 가능
# Pad, Grayscale, RandomCrop, Normalize ..
# Transforms on torch, *Tensor - tensor image
# torchvision.transforms.ToPILImage(mode=None)...

In [4]:
trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transf)
testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transf)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
trainset[0][0].size()

torch.Size([3, 8, 8])

In [6]:
trainloader = DataLoader(trainset, batch_size = 50, shuffle = True, num_workers = 2)
testset = DataLoader(testset, batch_size = 50, shuffle = True, num_workers = 2)

In [7]:
len(trainloader)

1000

In [8]:
dataiter = iter(trainloader)
Images, labels = dataiter.next()

In [9]:
Images.size()

torch.Size([50, 3, 8, 8])

## 2. 같은 클래스 별 폴더 이미지 데이터 이용

In [10]:
# ./class/tiger ./class/lion
transf = tr.Compose([tr.Resize(16), tr.ToTensor()])
trainset = torchvision.datasets.ImageFolder(root = './class', transform = transf)
trainloader = DataLoader(trainset, batch_size = 10, shuffle = False, num_workers = 2)
print(len(trainloader))

1


In [11]:
trainset[0][0].size()

torch.Size([3, 16, 25])

## 3. 개인 데이터 사용

### 방법 1

In [12]:
# import preprocessing
train_images = np.random.randint(256, size=(20,32,32,3))
train_labels = np.random.randint(2, size=(20,1))

In [13]:
# preprocessing......
# train_images, train_labels = preprocessing(train_images, train_labels)
print(train_images.shape, train_labels.shape)

(20, 32, 32, 3) (20, 1)


In [14]:
class TensorData(Dataset):
    def __init__(self, x_data, y_data):
        self.x_data = torch.FloatTensor(x_data)
        self.x_data = self.x_data.permute(0,3,1,2) # 이미지 개수, 채널 수, 이미지 너비, 높이
        self.y_data = torch.LongTensor(y_data)
        self.len = self.y_data.shape[0]
    
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

In [15]:
train_data2 = TensorData(train_images, train_labels)
train_loader2 = DataLoader(train_data2, batch_size = 10, shuffle = True)

In [16]:
train_data2[0][0].size()

torch.Size([3, 32, 32])

In [17]:
dataiter2 = iter(train_loader2)
Images2, labels2 = dataiter.next()

In [18]:
Images2.size()

torch.Size([50, 3, 8, 8])

### 방법2

In [19]:
from torch.utils.data import Dataset

In [20]:
class MyDataset(Dataset):
    def __init__(self, x_data, y_data, transform=None):
        self.x_data = x_data
        self.y_data = y_data
        self.transform = transform
        self.len = len(y_data)
    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample
    
    def __len__(self):
        return self.len
    
class ToTensor:
    def __call__(self, sample):
        inputs, labels = sample
        # FloatTensor로 tensor 생성
        inputs = torch.FloatTensor(inputs)
        # permute로 이미지, 채널 사이즈 변경
        inputs = inputs.permute(2,0,1)
        # label은 tensor를 바꿈으로써 나가는게 Tensor로 나간다
        return inputs, torch.LongTensor(labels)

class LinearTensor:
    def __init__(self, slope=1, bias=0):
        self.slope = slope
        self.bias = bias
    def __call__(self, sample):
        inputs, labels = sample
        inputs = self.slope*inputs + self.bias
        return inputs, labels

In [21]:
# 가지고 있는 데이터가 numpy이므로 ToTensor() 사용
trans = tr.Compose([ToTensor(), LinearTensor(2,5)])
ds1 = MyDataset(train_images, train_labels, transform=trans)
train_loader1 = DataLoader(ds1, batch_size=10, shuffle = True)

In [22]:
first_data = ds1[0]
features, labels = first_data
print(type(features), type(labels))

<class 'torch.Tensor'> <class 'torch.Tensor'>


In [23]:
dataiter3 = iter(train_loader1)
Images3, labels3 = dataiter3.next()

In [24]:
Images3

tensor([[[[171.,  53., 135.,  ..., 355., 225., 199.],
          [211., 473., 375.,  ..., 383., 315., 133.],
          [435., 427., 493.,  ..., 305., 481.,  69.],
          ...,
          [303., 129., 407.,  ..., 425., 457., 425.],
          [449., 311., 261.,  ..., 343., 345., 301.],
          [447., 405.,  27.,  ..., 247., 381., 227.]],

         [[435.,  99., 479.,  ..., 405., 167., 405.],
          [299., 399., 123.,  ...,  27.,  13., 123.],
          [189., 495., 361.,  ..., 161., 223., 195.],
          ...,
          [  5., 493., 381.,  ...,   5., 393., 193.],
          [141., 453., 455.,  ..., 163., 323., 209.],
          [145., 447., 121.,  ..., 225., 147., 433.]],

         [[493., 243., 243.,  ..., 281.,  49., 219.],
          [511., 433., 375.,  ..., 371., 193., 479.],
          [141., 343., 259.,  ..., 419., 125., 475.],
          ...,
          [445., 433., 461.,  ..., 307., 143., 267.],
          [ 79.,  59., 331.,  ..., 439.,  95., 227.],
          [ 75., 177., 277.,  ...

### 방법3

In [25]:
class MyDatasets(Dataset):
    
    def __init__(self, x_data, y_data, transform=None):
        self.x_data = x_data
        self.y_data = y_data
        self.transform = transform
        self.len = len(y_data)
    
    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample
    
    def __len__(self):
        return self.len

class MyTransform:
    
    def __call__(self, sample):
        inputs, labels = sample
        inputs = torch.FloatTensor(inputs)
        inputs = inputs.permute(2,0,1)
        labels = torch.FloatTensor(labels)
        
        # tr.ToPILImage(): PILImage 형태로 변환
        transf = tr.Compose([tr.ToPILImage(), tr.Resize(128), tr.ToTensor(), tr.Normalize((0.5, 0.5, 0.5), (0.5,0.5, 0.5))])
        final_output = transf(inputs)
        
        return final_output, labels

In [26]:
ds2 = MyDataset(train_images, train_labels, transform = MyTransform())
train_loader4 = DataLoader(ds2, batch_size = 10, shuffle = True)

In [27]:
first_data = ds2[0]
features, labels = first_data
print(type(features), type(labels))

<class 'torch.Tensor'> <class 'torch.Tensor'>


In [28]:
dataiter4 = iter(train_loader4)
images4, labels4 = dataiter4.next()

In [29]:
images4.size()

torch.Size([10, 3, 128, 128])

In [30]:
images4

tensor([[[[ 0.1059,  0.1059, -0.0196,  ..., -0.3569, -0.5373, -0.5373],
          [ 0.1059,  0.1059, -0.0196,  ..., -0.3569, -0.5373, -0.5373],
          [ 0.1451,  0.1451,  0.0196,  ..., -0.2471, -0.4039, -0.4039],
          ...,
          [ 0.6078,  0.6078,  0.5294,  ...,  0.7490,  0.8745,  0.8745],
          [ 0.5608,  0.5608,  0.4824,  ...,  0.7490,  0.8745,  0.8745],
          [ 0.5608,  0.5608,  0.4824,  ...,  0.7490,  0.8745,  0.8745]],

         [[ 0.6471,  0.6471,  0.6235,  ..., -0.0118, -0.0824, -0.0824],
          [ 0.6471,  0.6471,  0.6235,  ..., -0.0118, -0.0824, -0.0824],
          [ 0.4588,  0.4588,  0.4431,  ..., -0.0510, -0.1059, -0.1059],
          ...,
          [ 0.0824,  0.0824,  0.0118,  ..., -0.1765, -0.1765, -0.1765],
          [ 0.2000,  0.2000,  0.1216,  ..., -0.3098, -0.3412, -0.3412],
          [ 0.2000,  0.2000,  0.1216,  ..., -0.3098, -0.3412, -0.3412]],

         [[-0.1373, -0.1373, -0.2314,  ..., -0.1765, -0.2314, -0.2314],
          [-0.1373, -0.1373, -