In [1]:
# 필요 라이브러리 임포트
import torch # 파이토치 기본 라이브러리
import torchvision # 이미지 관련된 파이토치 라이브러리
import torchvision.transforms as tr # 이미지 전처리 기능들을 제공하는 라이브러리
from torch.utils.data import DataLoader, Dataset # 데이터를 모델에 사용할수 있도록 정리하는 라이브러리
import numpy as np

## 1.파이토치 제공  데이터 사용

In [2]:
# 전처리
transf = tr.Compose([
                     tr.Resize(16), # 16x16으로 이미지 변환
                     tr.ToTensor()  # 텐서 타입으로 변환
])

# 데이터 불러오기(일반적으로 이미지와 라벨이 동시에 들어있는 튜플 형태)
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transf)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transf)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# image
print(f"image shape: {trainset[0][0].size()}") # (채널수, width, height)

# label
print(f"label: {trainset[0][1]}")

image shape: torch.Size([3, 16, 16])
label: 6


In [4]:
# DataLoader
trainloader = DataLoader(trainset, batch_size=50, shuffle=True)
testloader = DataLoader(testset, batch_size=50, shuffle=False)

print(f"length of trainloader: {len(trainloader)}\n")

images, labels = next(iter(trainloader))
print(f"image.size: {images.size()}")  # (배치 사이즈, 채널수, width, height)

length of trainloader: 1000

image.size: torch.Size([50, 3, 16, 16])


## 2.같은 클래스 별로 폴더를 정리한 경우

In [5]:
from google.colab import drive
drive.mount('/content/dirve')

Drive already mounted at /content/dirve; to attempt to forcibly remount, call drive.mount("/content/dirve", force_remount=True).


In [6]:
cd/content/dirve/MyDrive/github/딥러닝/Pytorch/Usage

/content/dirve/MyDrive/github/딥러닝/Pytorch/Usage


In [7]:
transfer = tr.Compose([
                       tr.Resize(16),
                       tr.ToTensor()
])
trainset = torchvision.datasets.ImageFolder(root='./class', transform=transf)
trainloader = DataLoader(trainset, batch_size=1, shuffle=False)

images, labels = next(iter(trainloader))
print(f"image.size:{images.size()}")

image.size:torch.Size([1, 3, 16, 16])


## 3.정형화 되지 않은 커스텀 데이터 불러오기

In [8]:
train_images = np.random.randint(0, 256, (100, 32, 32, 3))
train_labels = np.random.randint(0, 2, (100, 1))

class TensorData(Dataset):
    def __init__(self, x_data, y_data):
        self.x_data = torch.FloatTensor(x_data) # 이미지 데이터를 FloatTensor로 변형
        self.x_data = self.x_data.permute(0, 3, 1, 2) # (이미지수, width, height, 채널수) -> (이미지수, 채널수, width, height)
        self.y_data = torch.LongTensor(y_data) # y_data를 LongTensor로 변형
        self.len = self.y_data.shape[0] # 데이터 개수

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index] # 뽑아낼 데이터

    def __len__(self):
        return self.len

train_data = TensorData(train_images, train_labels)
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)

images, labels = next(iter(train_loader))
print(f"image.size: {images.size()}")

image.size: torch.Size([10, 3, 32, 32])


## 4.커스텀 데이터

In [9]:
# 데이터 생성
train_images = np.random.randint(256,size=(100,32,32,3)) # (이미지 수)x(너비)x(높이)x(채널 수)
train_labels = np.random.randint(2,size=(100,1)) # 라벨 수

In [10]:
class MyDataset(Dataset):
    
    def __init__(self, x_data, y_data, transform=None):
        
        self.x_data = x_data 
        self.y_data = y_data
        self.transform = transform
        self.len = len(y_data)
    
    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]
        
        if self.transform:
            sample = self.transform(sample) #self.transform이 None이 아니면 전처리
        
        return sample
    
    def __len__(self):
        return self.len


## 4-1.커스텀 전처리

In [11]:
# 텐서 변환
class ToTensor():
    def __call__(self, sample):
        inputs, labels = sample
        inputs = torch.FloatTensor(inputs)
        inputs = inputs.permute(2, 0, 1) # call함수에 데이터가 하나씩 들어온다
        labels = torch.LongTensor(labels)
        return inputs, labels

# 선형식
class LinearTensor():

    def __init__(self, slope=1, bias=0):
        self.slope = slope
        self.bias = bias

    def __call__(self, sample):
        inputs, labels = sample
        inputs = self.slope*inputs + self.bias # ax + b
        return inputs, labels

In [12]:
trans = tr.Compose([
                    ToTensor(),
                    LinearTensor(2, 5)
])
dataset1 = MyDataset(train_images, train_labels, transform=trans)
train_loader1 = DataLoader(dataset1, batch_size=12, shuffle=True)

images1, labels1 =  next(iter(train_loader1))
print(f"image.size:{images1.size()}")

image.size:torch.Size([12, 3, 32, 32])


### 4-2.torchvision.transforms 전처리

In [13]:
class MyTransform():
    def __call__(self, sample):
        inputs, labels = sample
        inputs = torch.FloatTensor(inputs)
        inputs = inputs.permute(2, 0, 1)
        labels = torch.LongTensor(labels)

        transf = tr.Compose([
                             tr.ToPILImage(), # torchvision.transforms은 입력 이미지가 PILImage 타입이나 텐서일 경유 동작한다.
                             tr.Resize(128),
                             tr.ToTensor(),
                             tr.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        final_output = transf(inputs)

        return final_output, labels

In [14]:
dataset2 = MyDataset(train_images, train_labels, transform=MyTransform())
train_loader2 = DataLoader(dataset2, batch_size=20, shuffle=True)

images2,labels2 = next(iter(train_loader2))
print(f"image.shape:{images2.size()}")

image.shape:torch.Size([20, 3, 128, 128])
