# DataSet and DataLoader

In [4]:
import numpy as np
import pandas as pd

import torch as tr
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import math


In [16]:
# wine_data = "../dataset_pytorch/wine.csv"
# df = pd.read_csv(wine_data)
# df.head()

class WineDataset(Dataset):
    def __init__(self):
        #data loading
        xy = np.loadtxt("../dataset_pytorch/wine.csv", delimiter = ",", dtype = np.float32, skiprows = 1)
        self.x = tr.from_numpy(xy[:, 1:])
        self.y = tr.from_numpy(xy[:, [0]])
        self.n_samples = xy.shape[0]


    def __getitem__(self, index):
        #dataset[0]
        return self.x[index], self.y[index]


    def __len__(self):
        return self.n_samples

dataset = WineDataset()
data_loader = DataLoader(
                        dataset = dataset, 
                        batch_size = 4, 
                        shuffle = True,
                        # num_workers = 2
                        )

# data_iter = iter(data_loader)
# features, labels = next(data_iter)
# print(features, labels)

num_epoch = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
print(total_samples, n_iterations)

for epoch in range(num_epoch):
    for i, (inputs, labels) in enumerate(data_loader):
        # forward backward updates
        if (i+1)%5 ==0:
            print(f"epoch {epoch+1}/{num_epoch}: step {i+1}/{n_iterations}, inputs {inputs.shape}")


178 45
epoch 1/2: step 5/45, inputs torch.Size([4, 13])
epoch 1/2: step 10/45, inputs torch.Size([4, 13])
epoch 1/2: step 15/45, inputs torch.Size([4, 13])
epoch 1/2: step 20/45, inputs torch.Size([4, 13])
epoch 1/2: step 25/45, inputs torch.Size([4, 13])
epoch 1/2: step 30/45, inputs torch.Size([4, 13])
epoch 1/2: step 35/45, inputs torch.Size([4, 13])
epoch 1/2: step 40/45, inputs torch.Size([4, 13])
epoch 1/2: step 45/45, inputs torch.Size([2, 13])
epoch 2/2: step 5/45, inputs torch.Size([4, 13])
epoch 2/2: step 10/45, inputs torch.Size([4, 13])
epoch 2/2: step 15/45, inputs torch.Size([4, 13])
epoch 2/2: step 20/45, inputs torch.Size([4, 13])
epoch 2/2: step 25/45, inputs torch.Size([4, 13])
epoch 2/2: step 30/45, inputs torch.Size([4, 13])
epoch 2/2: step 35/45, inputs torch.Size([4, 13])
epoch 2/2: step 40/45, inputs torch.Size([4, 13])
epoch 2/2: step 45/45, inputs torch.Size([2, 13])


### Data Transforms

In [21]:

# torchvision.datasets.MNIST()
# fashion-mnist, cifar, coco

class WineDataset(Dataset):
    def __init__(self, transform = None):
        #data loading
        xy = np.loadtxt("../dataset_pytorch/wine.csv", delimiter = ",", dtype = np.float32, skiprows = 1)
        self.n_samples = xy.shape[0]
        self.x = xy[:, 1:]
        self.y = xy[:, [0]]
        
        self.transform = transform

    def __getitem__(self, index):
        sample = self.x[index], self.y[index]

        if self.transform :
            sample = self.transform(sample)
        #dataset[0]
        return sample


    def __len__(self):
        return self.n_samples

class ToTensor():
    def __call__(self, sample):
        inputs, targets = sample
        return tr.from_numpy(inputs), tr.from_numpy(targets)

class MulTransform:
    def __init__(self, factor):
        self.factor = factor
    
    def __call__(self, sample):
        inputs, targets = sample
        inputs *= self.factor
        return inputs, targets

dataset = WineDataset(transform = ToTensor())
first_data = dataset[0]
f, l = first_data
print(f)
print(type(f), type(l))


composed = torchvision.transforms.Compose([ToTensor(), MulTransform(2)])
dataset = WineDataset(transform = composed)
first_data = dataset[0]
f, l = first_data
print(f)
print(type(f), type(l))


tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03])
<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
        6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
        2.1300e+03])
<class 'torch.Tensor'> <class 'torch.Tensor'>
