### DataLoader

In [1]:
import numpy as np
import torch
from torch.utils.data import Dataset,DataLoader,random_split

#import torchvision
#import torchvision.transforms as transforms
#import torch.nn as nn
#import torch.nn.functional as F

In [2]:
class KidneyDataset(Dataset): 
    # Initialize your data, download, etc.
    def __init__(self):
        xy = np.loadtxt("kidney.dat", dtype=np.float32)
        
        x = xy[:,:-1]
        y = (xy[:,-1]+1)/2. ## convert y from {-1,+1} to {0,1}:

        self.x_t = torch.from_numpy(x)
        self.y_t = torch.from_numpy(y)
                
        self.n_samples = xy.shape[0]
    
    ## return one item on the index
    def __getitem__(self, index): 
        return self.x_t[index], self.y_t[index]
    
    ## return the data length
    def __len__(self): 
        return self.n_samples

In [3]:
dataset = KidneyDataset()

In [4]:
dataset

<__main__.KidneyDataset at 0x7f80a81fc9d0>

In [5]:
dataset.len

372

In [6]:
## first sample
dataset[0]

(tensor([ 48.0000,  80.0000,   0.0000,   0.0000,   0.0000,   1.0000,   0.0000,
           0.0000,   1.0000,   0.0000,   0.0000,   0.0000,   0.0000,   1.0000,
           0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   1.0000,  -1.0000,
          -1.0000, 121.0000,  36.0000,   1.2000,  15.4000,  44.0000,   1.0000,
           1.0000,  -1.0000,  -1.0000,  -1.0000,  -1.0000]),
 tensor(0.))

In [7]:
## feature
dataset[0][0]

tensor([ 48.0000,  80.0000,   0.0000,   0.0000,   0.0000,   1.0000,   0.0000,
          0.0000,   1.0000,   0.0000,   0.0000,   0.0000,   0.0000,   1.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   1.0000,  -1.0000,
         -1.0000, 121.0000,  36.0000,   1.2000,  15.4000,  44.0000,   1.0000,
          1.0000,  -1.0000,  -1.0000,  -1.0000,  -1.0000])

In [8]:
dataset[0][0].shape

torch.Size([33])

In [9]:
## target
dataset[0][1]

tensor(0.)

In [10]:
## training and validation datasets
train_size = int(dataset.len*0.7)
val_size = dataset.len - train_size

train_set, val_set = random_split(dataset, [train_size, val_size])
len(train_set), len(val_set)

(260, 112)

In [11]:
train_loader = DataLoader(dataset=train_set,batch_size=32, shuffle=True)
val_loader = DataLoader(dataset=val_set,batch_size=32)