In [103]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import numpy as np
import pandas as pd

In [104]:
file = '../data/iris.csv'
irisDF = pd.read_csv(file, header = None)
irisDF.columns = ['sepal_length', 'sepal_width', 'petal_length','petal_width','variety']

irisNP = np.loadtxt(file, delimiter = ',', usecols = [0,1,2,3])

In [105]:
## 피처와 라벨로 분리
featureDF = irisDF[irisDF.columns[:-1]]
targetDF = irisDF[irisDF.columns[-1]]

# object 타입 타겟 ===> int 타입 타겟 변환
from sklearn.preprocessing import LabelEncoder

targetNP = LabelEncoder().fit_transform(targetDF)
targetNP = targetNP.reshape(-1,1)

In [106]:
class DLDataset(Dataset):
    def __init__(self,csv_file):
        super().__init__()
        
        data = pd.read_csv(csv_file)
        self.x = data.iloc[:,:-1].values # feature
        self.y = data.iloc[:, -1].values # target
    
        
        # x,y 데이터 ==> ndarray
        x_data = x_data.values if isinstance(x_data, pd.DataFrame) else x_data
        y_data = y_data.values if isinstance(y_data, pd.DataFrame) else y_data
        
        # ndarray ==> tensor
        self.feature = torch.FloatTensor(x_data)
        self.target = torch.LongTensor(y_data) 

    def __len__(self):
        return self.target.shape[0]

    def __getitem__(self, index):
        return self.feature[index], self.target[index]
    
# 데이터셋 생성 => NP, NP
my_dataset2 = DLDataset(irisNP, targetNP)
print(my_dataset2[0])

(tensor([5.1000, 3.5000, 1.4000, 0.2000]), tensor([0]))


In [107]:
from torch.utils.data import random_split
seed = torch.Generator().manual_seed(2)
trainDS, validDS, testDS = random_split(my_dataset2, [0.7, 0.1, 0.2], generator = seed )

### DataLoader
- batch_size, shuffle, drop_last, sampler

In [111]:
### 로딩된 데이터 확인 함수
def print_batch_data(loader, epochs, batch_size = 1, shuffle = False, drop_last = False, sampler = None):
    print(f"[설정값] batch_size: {batch_size}, shuffle: {shuffle}, drop_last: {drop_last}, sampler: {sampler}")
    
    for ep in range(epochs) :
        print(f"[{ep} EPOCHS] ============ batch {len(loader)}개")
        for (feature, label) in loader :
            print(feature.shape, label.shape, label)

In [112]:
# 로딩된 데이터 확인 함수
def print_batch_data(loader,epochs, batch_size = 1, shuffle = False, drop_last = False, sampler = None) :
    print(f'[설정값] batchsize = {batch_size}, shuffle: {shuffle}, drop_last = {drop_last}, sampler = {sampler}')
    for ep in range(epochs):
        print(f'[{ep}] EPOCHS===================Batch {len(loader)}===')
        for (feature,label) in loader:
            print(feature.shape, label.shape, label)

In [113]:
### ===>  batch_size = 22, shuffle = False, drop_last = False, sampler = None
loader = DataLoader(my_dataset2, 22)
print_batch_data(loader, 2, batch_size = 22)

[설정값] batchsize = 22, shuffle: False, drop_last = False, sampler = None
torch.Size([22, 4]) torch.Size([22, 1]) tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]])
torch.Size([22, 4]) torch.Size([22, 1]) tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]])
torch.Size([22, 4]) torch.Size([22, 1]) tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],


In [114]:
### ===>  batch_size = 22, shuffle = False, drop_last = False, sampler = None
loader = DataLoader(my_dataset2, 22, drop_last = True)
print_batch_data(loader, 2, batch_size = 22, drop_last = True)

[설정값] batchsize = 22, shuffle: False, drop_last = True, sampler = None
torch.Size([22, 4]) torch.Size([22, 1]) tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]])
torch.Size([22, 4]) torch.Size([22, 1]) tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]])
torch.Size([22, 4]) torch.Size([22, 1]) tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
 

In [115]:
loader = DataLoader(my_dataset2, drop_last = True)
print_batch_data(loader, 2, 22, True, True)

[설정값] batchsize = 22, shuffle: True, drop_last = True, sampler = None
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([[0]])
torch.Size([1, 4]) torch.Size([1, 1]) tensor([