iris로 데이터셋 만들기

In [19]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder

In [20]:
class IrisDataset(Dataset):
    # 초기화 콜백
    def __init__(self, csv_file): 
        super().__init__()
        data = pd.read_csv(csv_file)
        
        self.x = data.iloc[:,:-1].values #feature
        self.y = data.iloc[:,-1].values # target
        
        # 타겟(str) 인코딩하기
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(self.y)
        self.y = y
        #tensorize
        self.feature = torch.tensor(self.y,dtype=torch.long)
        self.target = torch.FloatTensor(self.x)
        
    #데이터셋 길이 체크(개수) 콜백    
    def __len__(self):
        return len(self.target) # return self.target.shape[0]
    
    #특정 인덱스 + 라벨(타겟) 데이터 반환 콜백함수
    def __getitem__(self, idx):
        return self.target[idx], self.feature[idx]

In [21]:
dataset = IrisDataset('../../datas/iris.csv') 
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [22]:
# __getitem__(0)
dataset[0]

(tensor([5.1000, 3.5000, 1.4000, 0.2000]), tensor(0))

### DATALOADER
- batch_size, shuffle, drop_last, sampler

In [29]:
# 로딩된 데이터 확인 함수
def print_batch_data(loader,epochs, batch_size = 1, shuffle = False, drop_last = False, sampler = None) : #sampler과 shuffle는 함께 쓸 수 없음: 하나가 true면 하나는 False
    print(f'[설정값] batchsize = {batch_size}, shuffle: {shuffle}, drop_last = {drop_last}, sampler = {sampler}')
    for ep in range(epochs):
        print(f'[{ep}] EPOCHS===================Batch {len(loader)}===')
        for (feature,label) in loader:
            print(feature.shape, label.shape, label)

In [30]:
### => [ 기본 사용 ] batch_size = 1, shuffle = False, drop_last = False, sampler = None
loader = DataLoader(dataset)
print_batch_data(loader=loader, epochs=2)

[설정값] batchsize = 1, shuffle: False, drop_last = False, sampler = None
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) tensor([0])
torch.Size([1, 4]) torch.Size([1]) t

In [31]:
### => [ 설정 사용 ] batch_size = 22, shuffle = False, drop_last = False, sampler = None
loader = DataLoader(dataset, batch_size=22)
print_batch_data(loader=loader, epochs=2)

[설정값] batchsize = 1, shuffle: False, drop_last = False, sampler = None
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([18, 4]) torch.Size([18]) tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([

In [32]:
### => [ 설정 사용 ] batch_size = 22, shuffle = False, drop_last = False, sampler = None
loader = DataLoader(dataset, batch_size=22, drop_last=True)
print_batch_data(loader=loader, epochs=2, drop_last=True)

[설정값] batchsize = 1, shuffle: False, drop_last = True, sampler = None
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22

In [33]:
### => [ 설정 사용 ] batch_size = 22, shuffle = True, drop_last = False, sampler = None
loader = DataLoader(dataset, batch_size=22, shuffle=True)
print_batch_data(loader=loader, epochs=2, shuffle=True)

[설정값] batchsize = 1, shuffle: True, drop_last = False, sampler = None
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 2, 2, 1, 0, 0, 1, 0, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 2, 2, 1, 1, 0, 2, 1, 1, 2, 1, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 1, 1, 0, 1, 1, 0, 2, 2, 0, 2, 2, 0, 2, 1, 1, 1, 1, 2, 2, 2, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([2, 0, 0, 2, 0, 1, 1, 0, 0, 1, 0, 2, 0, 2, 2, 1, 0, 1, 2, 0, 0, 1])
torch.Size([22, 4]) torch.Size([22]) tensor([1, 2, 1, 1, 2, 0, 1, 2, 0, 2, 2, 0, 0, 2, 0, 1, 0, 1, 0, 0, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 2, 1, 2, 2, 2, 2, 2, 0, 1, 1, 2, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0])
torch.Size([18, 4]) torch.Size([18]) tensor([2, 0, 0, 1, 2, 2, 2, 0, 1, 1, 1, 2, 2, 1, 0, 2, 0, 0])
torch.Size([22, 4]) torch.Size([22]) tensor([0, 0, 1, 1, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, 1, 0, 1, 0, 1, 0, 0, 2])
torch.Size([22, 4]) torch.Size([22]) tensor([0

In [34]:
for (feature, label) in loader:
    print(feature.shape, label.shape, label.bincount())

torch.Size([22, 4]) torch.Size([22]) tensor([ 5, 10,  7])
torch.Size([22, 4]) torch.Size([22]) tensor([10,  6,  6])
torch.Size([22, 4]) torch.Size([22]) tensor([9, 4, 9])
torch.Size([22, 4]) torch.Size([22]) tensor([6, 7, 9])
torch.Size([22, 4]) torch.Size([22]) tensor([ 4, 10,  8])
torch.Size([22, 4]) torch.Size([22]) tensor([9, 5, 8])
torch.Size([18, 4]) torch.Size([18]) tensor([7, 8, 3])
