In [114]:
import math
import numpy as np

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn

from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score

import matplotlib.pyplot as plt
from torch.utils.data.dataset import T_co

gpu = torch.cuda.current_device()
torch.cuda.get_device_name()

'NVIDIA GeForce GTX 1650 with Max-Q Design'

epoch -> 1 forward and backward pass of all training samples

batch size -> num. of training samples in one forward and backward pass

num. of iterations -> num. of passes, each pass using [batch_size] number of samples

e.g.:

100 samples, batch size=20 -> 100/20 = 5 iterations for each epoch

In [115]:
class WineDataset(Dataset):

    def __init__(self):

        # data loading
        wine = np.loadtxt("./data/wine.csv", delimiter=",", dtype=np.float32, skiprows=1)

        self.X = torch.from_numpy(wine[:, 1:])
        self.y = torch.from_numpy(wine[:, [0]])
        self.n_samples = wine.shape[0]

    def __getitem__(self, index)-> T_co:
        return self.X[index], self.y[index]

    def __len__(self):
        return self.n_samples

dataset = WineDataset()

In [116]:
# check

first_data = dataset[0]
features, labels = first_data

features, labels


(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
         3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
         1.0650e+03]),
 tensor([1.]))

In [117]:
data_loader = DataLoader(
    dataset,
    batch_size=4,
    shuffle=True,
    num_workers=0
)

In [118]:
# create a data iterator from data loader
data_iter = iter(data_loader)

# each iteration will return a single batch
features, labels = data_iter.next()

features, labels

(tensor([[1.3270e+01, 4.2800e+00, 2.2600e+00, 2.0000e+01, 1.2000e+02, 1.5900e+00,
          6.9000e-01, 4.3000e-01, 1.3500e+00, 1.0200e+01, 5.9000e-01, 1.5600e+00,
          8.3500e+02],
         [1.2000e+01, 9.2000e-01, 2.0000e+00, 1.9000e+01, 8.6000e+01, 2.4200e+00,
          2.2600e+00, 3.0000e-01, 1.4300e+00, 2.5000e+00, 1.3800e+00, 3.1200e+00,
          2.7800e+02],
         [1.3770e+01, 1.9000e+00, 2.6800e+00, 1.7100e+01, 1.1500e+02, 3.0000e+00,
          2.7900e+00, 3.9000e-01, 1.6800e+00, 6.3000e+00, 1.1300e+00, 2.9300e+00,
          1.3750e+03],
         [1.4020e+01, 1.6800e+00, 2.2100e+00, 1.6000e+01, 9.6000e+01, 2.6500e+00,
          2.3300e+00, 2.6000e-01, 1.9800e+00, 4.7000e+00, 1.0400e+00, 3.5900e+00,
          1.0350e+03]]),
 tensor([[3.],
         [2.],
         [1.],
         [1.]]))

In [119]:
# a sample training loop

num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples / 4)

print(total_samples, n_iterations)

for epoch in range(1, num_epochs+1):

    for i, (inputs, labels) in enumerate(data_loader):

        # do forward, backward, update
        print(f"epoch: {epoch}/{num_epochs}, step {i+1}/{n_iterations}, inputs: {inputs.shape}")

178 45
epoch: 1/2, step 1/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 2/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 3/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 4/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 5/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 6/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 7/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 8/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 9/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 10/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 11/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 12/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 13/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 14/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 15/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 16/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 17/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 18/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 19/45, inputs: torch.Size([4, 13])
epoch: 1/2, st