In [1]:
#importing librabries
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

#### Resources: https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

In [2]:
torch.cuda.is_available()

True

In [3]:
torch.cuda.current_device()

0

In [4]:
torch.cuda.get_device_name(0)

'GeForce GTX 1650'

### gradient computation etc. not efficient for whole data set
### divide dataset into small batches


#### epoch = 
one forward and backward pass of ALL training samples
#### batch_size = 
number of training samples used in one forward/backward pass
#### number of iterations =

number of passes, each pass (forward+backward) using [batch_size] number of samples.

#### e.g : 100 samples, batch_size=20 -> 100/20=5 iterations for 1 epoch


In [5]:
# --> DataLoader can do the batch computation for us

# Implement a custom Dataset:
# inherit Dataset
# implement __init__ , __getitem__ , and __len__

#data : https://raw.githubusercontent.com/Uttam580/mlDlNlpLearning/master/datasets./winequality_white.csv

In [6]:
DATASETPATH= 'https://raw.githubusercontent.com/Uttam580/mlDlNlpLearning/master/datasets./winequality_white.csv'

In [7]:
import pandas as pd
db= pd.read_csv(DATASETPATH)
db.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [8]:
db.shape

(4898, 12)

In [9]:
db.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

In [10]:
xy= np.loadtxt(DATASETPATH,delimiter=',',dtype=np.float32, skiprows=1)
print(xy[:,:11])
print(xy[:,11])

[[ 7.    0.27  0.36 ...  3.    0.45  8.8 ]
 [ 6.3   0.3   0.34 ...  3.3   0.49  9.5 ]
 [ 8.1   0.28  0.4  ...  3.26  0.44 10.1 ]
 ...
 [ 6.5   0.24  0.19 ...  2.99  0.46  9.4 ]
 [ 5.5   0.29  0.3  ...  3.34  0.38 12.8 ]
 [ 6.    0.21  0.38 ...  3.26  0.32 11.8 ]]
[6. 6. 6. ... 6. 7. 6.]


In [11]:
class Winedataset(Dataset):
    
    def __init__(self):
        xy= np.loadtxt(DATASETPATH,delimiter=',',dtype=np.float32, skiprows=1)
        self.n_samples = xy.shape[0]
        self.x_data = torch.from_numpy(xy[:,:11])
        self.y_data= torch.from_numpy(xy[:,[11]])
    
    def __getitem__(self,index):
        return self.x_data[index],self.y_data[index]

    def __len__(self):
        return self.n_samples
    

In [12]:
# create dataset
dataset = Winedataset()

In [13]:
# get first sample and unpack
first_data = dataset[0]
features, labels = first_data
print(features, labels)

tensor([7.0000e+00, 2.7000e-01, 3.6000e-01, 2.0700e+01, 4.5000e-02, 4.5000e+01,
        1.7000e+02, 1.0010e+00, 3.0000e+00, 4.5000e-01, 8.8000e+00]) tensor([6.])


In [14]:
# Load whole dataset with DataLoader
# shuffle: shuffle data, good for training
# num_workers: faster loading with multiple subprocesses
train_loader = DataLoader(dataset=dataset,
                          batch_size=4,
                          shuffle=True,
                          num_workers=0)

In [15]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x1fa6e4bbfd0>

In [16]:
# convert to an iterator and look at one random sample
dataiter = iter(train_loader)
data = dataiter.next()
features, labels = data
print(features, labels)

tensor([[6.8000e+00, 6.4000e-01, 8.0000e-02, 9.7000e+00, 6.2000e-02, 2.6000e+01,
         1.4200e+02, 9.9720e-01, 3.3700e+00, 4.6000e-01, 8.9000e+00],
        [7.3000e+00, 1.8000e-01, 3.1000e-01, 1.7300e+01, 5.5000e-02, 3.2000e+01,
         1.9700e+02, 1.0002e+00, 3.1300e+00, 4.6000e-01, 9.0000e+00],
        [6.9000e+00, 1.1500e-01, 3.5000e-01, 5.4000e+00, 4.8000e-02, 3.6000e+01,
         1.0800e+02, 9.9390e-01, 3.3200e+00, 4.2000e-01, 1.0200e+01],
        [6.2000e+00, 1.6000e-01, 3.2000e-01, 1.1000e+00, 3.6000e-02, 7.4000e+01,
         1.8400e+02, 9.9096e-01, 3.2200e+00, 4.1000e-01, 1.1000e+01]]) tensor([[4.],
        [6.],
        [6.],
        [6.]])


In [17]:
# Dummy Training loop
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
print(total_samples, n_iterations)

4898 1225


In [18]:
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        
        # here: 178 samples, batch_size = 4, n_iters=178/4=44.5 -> 45 iterations
        # Run your training process
        if (i+1) % 5 == 0:
            print(f'Epoch: {epoch+1}/{num_epochs}, Step {i+1}/{n_iterations}| Inputs {inputs.shape} | Labels {labels.shape}')

Epoch: 1/2, Step 5/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 10/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 15/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 20/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 25/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 30/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 35/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 40/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 45/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 50/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 55/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 60/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 65/1225| Inp

Epoch: 1/2, Step 570/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 575/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 580/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 585/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 590/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 595/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 600/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 605/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 610/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 615/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 620/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 625/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step

Epoch: 1/2, Step 1140/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 1145/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 1150/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 1155/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 1160/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 1165/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 1170/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 1175/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 1180/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 1185/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 1190/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 1195/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoc

Epoch: 2/2, Step 645/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 650/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 655/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 660/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 665/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 670/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 675/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 680/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 685/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 690/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 695/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 700/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step

Epoch: 2/2, Step 1205/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 1210/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 1215/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 1220/1225| Inputs torch.Size([4, 11]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 1225/1225| Inputs torch.Size([2, 11]) | Labels torch.Size([2, 1])


In [19]:
# s datasets are available in torchvision.datasets
# e.g. MNIST, Fashion-MNIST, CIFAR10, COCO

train_dataset = torchvision.datasets.MNIST(root='./data', 
                                           train=True, 
                                           transform=torchvision.transforms.ToTensor(),  
                                           download=True)

train_loader = DataLoader(dataset=train_dataset, 
                                           batch_size=3, 
                                           shuffle=True)

In [20]:
# look at one random sample
dataiter = iter(train_loader)
data = dataiter.next()
inputs, targets = data
print(inputs.shape, targets.shape)

torch.Size([3, 1, 28, 28]) torch.Size([3])


In [21]:
inputs[1]

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,

In [22]:
targets[1]

tensor(7)

### Transformers

Transforms can be applied to PIL images, tensors, ndarrays, or custom data
during creation of the DataSet
complete list of built-in transforms: 
https://pytorch.org/docs/stable/torchvision/transforms.html

#### On Images

CenterCrop, Grayscale, Pad, RandomAffine
RandomCrop, RandomHorizontalFlip, RandomRotation
Resize, Scale

#### On Tensors
----------
LinearTransformation, Normalize, RandomErasing

#### Conversion
----------
ToPILImage: from tensor or ndrarray
ToTensor : from numpy.ndarray or PILImage

#### Generic
-------
Use Lambda 

#### Custom
------
Write own class

#### Compose multiple Transforms
---------------------------
composed = transforms.Compose([Rescale(256),
                               RandomCrop(224)])

In [23]:
import torch
import torchvision
from torch.utils.data import Dataset
import numpy as np

In [30]:
class WineDataset(Dataset):
    
    def __init__(self,transform=None):
        xy= np.loadtxt(DATASETPATH,delimiter=',',dtype=np.float32, skiprows=1)
        self.n_samples = xy.shape[0]
        self.x_data = xy[:,:11]
        self.y_data= xy[:,[11]]
        self.transform = transform

    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]

        if self.transform:
            sample = self.transform(sample)

        return sample

    def __len__(self):
        return self.n_samples

#### Without Transform'

In [31]:
print('Without Transform')
dataset_without_trans = WineDataset()
first_data = dataset_without_trans[0]
first_data

Without Transform


(array([7.000e+00, 2.700e-01, 3.600e-01, 2.070e+01, 4.500e-02, 4.500e+01,
        1.700e+02, 1.001e+00, 3.000e+00, 4.500e-01, 8.800e+00],
       dtype=float32),
 array([6.], dtype=float32))

In [32]:
features, labels = first_data
print(type(features), type(labels))
print(features, labels)

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
[7.000e+00 2.700e-01 3.600e-01 2.070e+01 4.500e-02 4.500e+01 1.700e+02
 1.001e+00 3.000e+00 4.500e-01 8.800e+00] [6.]


#### With Tensor Transform

In [33]:
# Custom Transforms
# implement __call__(self, sample)
class ToTensor:
    # Convert ndarrays to Tensors
    def __call__(self, sample):
        inputs, targets = sample
        return torch.from_numpy(inputs), torch.from_numpy(targets)

In [34]:
print('\nWith Tensor Transform')
dataset = WineDataset(transform=ToTensor())
first_data = dataset[0]
features, labels = first_data
print(type(features), type(labels))
print(features, labels)


With Tensor Transform
<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([7.0000e+00, 2.7000e-01, 3.6000e-01, 2.0700e+01, 4.5000e-02, 4.5000e+01,
        1.7000e+02, 1.0010e+00, 3.0000e+00, 4.5000e-01, 8.8000e+00]) tensor([6.])


#### With Tensor and Multiplication Transform

In [35]:
class MulTransform:
    # multiply inputs with a given factor
    def __init__(self, factor):
        self.factor = factor

    def __call__(self, sample):
        inputs, targets = sample
        inputs *= self.factor
        return inputs, targets

In [36]:
print('\nWith Tensor and Multiplication Transform')
composed = torchvision.transforms.Compose([ToTensor(), MulTransform(4)])
dataset = WineDataset(transform=composed)
first_data = dataset[0]
features, labels = first_data
print(type(features), type(labels))
print(features, labels)


With Tensor and Multiplication Transform
<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([2.8000e+01, 1.0800e+00, 1.4400e+00, 8.2800e+01, 1.8000e-01, 1.8000e+02,
        6.8000e+02, 4.0040e+00, 1.2000e+01, 1.8000e+00, 3.5200e+01]) tensor([6.])
