In [1]:
# For large datasets, better divide them into smaller sets i.e. batches
# So the training loops have two for-loops now:
# for epoch in range(num_epochs):
#   for i in range(total_batches):
#       x_batch, y_batch = ...

In [2]:
# Terms

# Epoch: one forward & backward pass of ALL training samples

# Batch size: number of training samples in one forward & backward pass

# Number of iterations: number of passes, each pass using [batch_size] number of samples
# e.g. 100 samples, batch_size = 20, then for one epoch there are 100/20 = 5 iterations

In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math 

In [4]:
class WineDataset(Dataset):
    def __init__(self):
        # data loading
        xy = np.loadtxt('./data/wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
        self.x = torch.from_numpy(xy[:, 1:])
        self.y = torch.from_numpy(xy[:, [0]]) # (n_samples, 1)
        self.n_samples = xy.shape[0]
    
    def __getitem__(self, index):
        # dataset[i]
        return self.x[index], self.y[index]

    def __len__(self):
        # len(dataset)
        return self.n_samples

In [5]:
dataset = WineDataset()
first_data = dataset[0]
features, label = first_data
print(features, label)

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])


In [6]:
# Use Dataloader

In [7]:
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=0)

dataiter = iter(dataloader)
data = dataiter.next()
features, label = data 
print(features)
print(label)

tensor([[1.3160e+01, 3.5700e+00, 2.1500e+00, 2.1000e+01, 1.0200e+02, 1.5000e+00,
         5.5000e-01, 4.3000e-01, 1.3000e+00, 4.0000e+00, 6.0000e-01, 1.6800e+00,
         8.3000e+02],
        [1.4370e+01, 1.9500e+00, 2.5000e+00, 1.6800e+01, 1.1300e+02, 3.8500e+00,
         3.4900e+00, 2.4000e-01, 2.1800e+00, 7.8000e+00, 8.6000e-01, 3.4500e+00,
         1.4800e+03],
        [1.3860e+01, 1.3500e+00, 2.2700e+00, 1.6000e+01, 9.8000e+01, 2.9800e+00,
         3.1500e+00, 2.2000e-01, 1.8500e+00, 7.2200e+00, 1.0100e+00, 3.5500e+00,
         1.0450e+03],
        [1.2600e+01, 2.4600e+00, 2.2000e+00, 1.8500e+01, 9.4000e+01, 1.6200e+00,
         6.6000e-01, 6.3000e-01, 9.4000e-01, 7.1000e+00, 7.3000e-01, 1.5800e+00,
         6.9500e+02]])
tensor([[3.],
        [1.],
        [1.],
        [3.]])


In [8]:
# Training loop

In [9]:
num_epochs = 3

total_samples = len(dataset)

n_iterations = math.ceil(total_samples / 4) #batch_size=4; one batch has four samples

print(total_samples, n_iterations)

178 45


In [10]:
for epoch in range(num_epochs):
    print(f"----- Epoch {epoch + 1} -----")
    for i, (inputs, labels) in enumerate(dataloader):
        # Forward, backward, update
        if (i+1)%5 == 0:
            print(f"Epoch {epoch+1}/{num_epochs}, Iter {i+1}/{n_iterations}")
            print(f"Inputs: (nSamples, nFeatures) = {inputs.shape}")

----- Epoch 1 -----
Epoch 1/3, Iter 5/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
Epoch 1/3, Iter 10/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
Epoch 1/3, Iter 15/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
Epoch 1/3, Iter 20/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
Epoch 1/3, Iter 25/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
Epoch 1/3, Iter 30/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
Epoch 1/3, Iter 35/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
Epoch 1/3, Iter 40/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
Epoch 1/3, Iter 45/45
Inputs: (nSamples, nFeatures) = torch.Size([2, 13])
----- Epoch 2 -----
Epoch 2/3, Iter 5/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
Epoch 2/3, Iter 10/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
Epoch 2/3, Iter 15/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
Epoch 2/3, Iter 20/45
Inputs: (nSamples, nFeatures) = torch.Size([4, 13])
