### DataLoader

In [1]:
import numpy as np
import torch
from torch.utils.data import Dataset,DataLoader,random_split

#import torchvision
#import torchvision.transforms as transforms
#import torch.nn as nn
#import torch.nn.functional as F

In [2]:
class KidneyDataset(Dataset): 
    # Initialize your data, download, etc.
    def __init__(self):
        #xy = np.loadtxt("kidney.dat", dtype=np.float32)
        
        xy = np.array([[11,12,13,14,15],
                       [21,22,23,24,25],
                       [31,32,33,34,35],
                       [41,42,43,44,45],
                       [51,52,53,54,55]])
        
        ## use only 20 rows and 5 last columns for demonstration purpose
        #xy = xy[220:240,-5:]  
        
        print("xy.shape:", xy.shape)
        x = xy[:, :-1]
        y = xy[:, -1]
        #x = xy[:,:-1]
        #y = (xy[:,-1]+1)/2. ## convert y from {-1,+1} to {0,1}:
        
        self.x = torch.from_numpy(x)
        self.y = torch.from_numpy(y)
                
        self.n_samples = xy.shape[0]
    
    ## return one item on the index
    def __getitem__(self, index): 
        return self.x[index], self.y[index]
    
    ## return the data length
    def __len__(self): 
        return self.n_samples

In [3]:
dataset = KidneyDataset()

xy.shape: (5, 5)


In [4]:
dataset

<__main__.KidneyDataset at 0x7fb200771910>

In [5]:
dataset.n_samples

5

In [6]:
## features
dataset.x

tensor([[11, 12, 13, 14],
        [21, 22, 23, 24],
        [31, 32, 33, 34],
        [41, 42, 43, 44],
        [51, 52, 53, 54]])

In [7]:
dataset.x.shape

torch.Size([5, 4])

In [8]:
## target
dataset.y

tensor([15, 25, 35, 45, 55])

In [9]:
## first sample
dataset[0]

(tensor([11, 12, 13, 14]), tensor(15))

In [10]:
## feature of sample 0
dataset[0][0]

tensor([11, 12, 13, 14])

In [11]:
dataset[0][0].shape

torch.Size([4])

In [12]:
## target of sample 0
dataset[0][1]

tensor(15)

### DataLoader

In [13]:
## dataloader
batch_size = 2
data_loader = DataLoader(dataset=dataset,batch_size=batch_size, shuffle=True)

In [14]:
n_epochs = 3

total_samples = len(dataset)
n_iterations = int(total_samples/batch_size)
print(total_samples, n_iterations)

5 2


In [15]:
for i in data_loader:
    print(i)

[tensor([[31, 32, 33, 34],
        [41, 42, 43, 44]]), tensor([35, 45])]
[tensor([[11, 12, 13, 14],
        [51, 52, 53, 54]]), tensor([15, 55])]
[tensor([[21, 22, 23, 24]]), tensor([25])]


In [16]:
for epoch in range(n_epochs):
    for i, (inputs, labels) in enumerate(data_loader):
        ## forward, backward
        #if (i+1) % 5 ==0:
        print(f"epoch {epoch+1}/{n_epochs}, step {i+1}/{n_epochs}, inputs {inputs.shape}")

epoch 1/3, step 1/3, inputs torch.Size([2, 4])
epoch 1/3, step 2/3, inputs torch.Size([2, 4])
epoch 1/3, step 3/3, inputs torch.Size([1, 4])
epoch 2/3, step 1/3, inputs torch.Size([2, 4])
epoch 2/3, step 2/3, inputs torch.Size([2, 4])
epoch 2/3, step 3/3, inputs torch.Size([1, 4])
epoch 3/3, step 1/3, inputs torch.Size([2, 4])
epoch 3/3, step 2/3, inputs torch.Size([2, 4])
epoch 3/3, step 3/3, inputs torch.Size([1, 4])


### Data Iter

In [17]:
## data_iter
data_iter = iter(data_loader)

In [18]:
data_iter

<torch.utils.data.dataloader._SingleProcessDataLoaderIter at 0x7fb1b0202750>

In [19]:
data = data_iter.next()

features, labels = data

In [20]:
print(features,labels)

tensor([[31, 32, 33, 34],
        [21, 22, 23, 24]]) tensor([35, 25])


In [21]:
data_iter.next()

[tensor([[51, 52, 53, 54],
         [41, 42, 43, 44]]),
 tensor([55, 45])]

In [22]:
data_iter.next()

[tensor([[11, 12, 13, 14]]), tensor([15])]

### Training and Validation datasets

In [23]:
## training and validation datasets
train_size = int(dataset.n_samples*0.7)
val_size = dataset.n_samples - train_size

train_set, val_set = random_split(dataset, [train_size, val_size])
len(train_set), len(val_set)

(3, 2)

In [24]:
## dataloader
train_loader = DataLoader(dataset=train_set,batch_size=32, shuffle=True)
val_loader = DataLoader(dataset=val_set,batch_size=32)