<a href="https://colab.research.google.com/github/dkhanna511/Pytorch_tutorials_Revision/blob/main/Custom_Dataset_and_Dataloader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd

In [4]:
url = 'https://raw.githubusercontent.com/patrickloeber/pytorchTutorial/master/data/wine/wine.csv'
my_df = pd.read_csv(url) ### We're using the IRIS Dataset. We cna get it from the url directly
my_df

Unnamed: 0,Wine,Alcohol,Malic.acid,Ash,Acl,Mg,Phenols,Flavanoids,Nonflavanoid.phenols,Proanth,Color.int,Hue,OD,Proline
0,1,14.23,1.71,2.43,15.6,127,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050
2,1,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,3,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740
174,3,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750
175,3,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835
176,3,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840


In [5]:
class WineDataset(Dataset):
    def __init__(self):
        ## Data loading
        xy = np.loadtxt(url, delimiter= ',', dtype = np.float32, skiprows = 1)
        self.x = torch.from_numpy(xy[:, 1:]) ## Features
        self.y = torch.from_numpy(xy[:,[0]])   ## Classes --> [n_samples, 1]

        self.n_samples =xy.shape[0]


    def __getitem__(self, index):
        # dataset[0]
        ## index will come from the batch that we define in thw dataloader, basically it will iterate over the dataset and fetch it.
        return self.x[index], self.y[index]

    def __len__(self):
        return self.n_samples




In [8]:
dataset = WineDataset()
first_data = dataset[0]
features, labels = first_data
print(features, labels)     ## this gives us onw row vector of features and 1 label corresponding to it

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])


In [14]:
### Dataloader

dataloader = DataLoader(dataset = dataset, batch_size = 4, shuffle = True, num_workers = 1)  ## num_workers might make loading faster as it uses subprocesses
data_iter = iter(dataloader)

data = next(data_iter)

features, labels = data
print(features,"\n", labels)
print(" shaper of featurs : {}, \n shape of labels : {}".format(features.shape, labels.shape) )  ## We get (4,13) for features signifying 4 rows and 13 features in each row. 4 is the batch size. labels will give (4,1) defining 4 labels of different rows

tensor([[1.2420e+01, 2.5500e+00, 2.2700e+00, 2.2000e+01, 9.0000e+01, 1.6800e+00,
         1.8400e+00, 6.6000e-01, 1.4200e+00, 2.7000e+00, 8.6000e-01, 3.3000e+00,
         3.1500e+02],
        [1.2220e+01, 1.2900e+00, 1.9400e+00, 1.9000e+01, 9.2000e+01, 2.3600e+00,
         2.0400e+00, 3.9000e-01, 2.0800e+00, 2.7000e+00, 8.6000e-01, 3.0200e+00,
         3.1200e+02],
        [1.3880e+01, 5.0400e+00, 2.2300e+00, 2.0000e+01, 8.0000e+01, 9.8000e-01,
         3.4000e-01, 4.0000e-01, 6.8000e-01, 4.9000e+00, 5.8000e-01, 1.3300e+00,
         4.1500e+02],
        [1.2880e+01, 2.9900e+00, 2.4000e+00, 2.0000e+01, 1.0400e+02, 1.3000e+00,
         1.2200e+00, 2.4000e-01, 8.3000e-01, 5.4000e+00, 7.4000e-01, 1.4200e+00,
         5.3000e+02]]) 
 tensor([[2.],
        [2.],
        [3.],
        [3.]])
 shaper of featurs : torch.Size([4, 13]), 
 shape of labels : torch.Size([4, 1])


In [18]:
## Training loop
num_epochs = 3
batch_size = 4
total_samples = len(dataset)
import math
n_iterations= math.ceil(total_samples / batch_size)
n_iterations

45

In [19]:
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(dataloader):
        if (i+1)//5 ==0:
            print(" Epoch  : {}/{}, step {}/{}, inputs {}".format(epoch, num_epochs, i+1, n_iterations, inputs.shape))

 Epoch  : 0/3, step 1/45, inputs torch.Size([4, 13])
 Epoch  : 0/3, step 2/45, inputs torch.Size([4, 13])
 Epoch  : 0/3, step 3/45, inputs torch.Size([4, 13])
 Epoch  : 0/3, step 4/45, inputs torch.Size([4, 13])
 Epoch  : 1/3, step 1/45, inputs torch.Size([4, 13])
 Epoch  : 1/3, step 2/45, inputs torch.Size([4, 13])
 Epoch  : 1/3, step 3/45, inputs torch.Size([4, 13])
 Epoch  : 1/3, step 4/45, inputs torch.Size([4, 13])
 Epoch  : 2/3, step 1/45, inputs torch.Size([4, 13])
 Epoch  : 2/3, step 2/45, inputs torch.Size([4, 13])
 Epoch  : 2/3, step 3/45, inputs torch.Size([4, 13])
 Epoch  : 2/3, step 4/45, inputs torch.Size([4, 13])
