<a href="https://colab.research.google.com/github/ganesh3/pytorch-work/blob/master/pytorch_dataloader_dropout_lr_schedule.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The dataset for this colab is available at http://archive.ics.uci.edu/ml/datasets/connectionist+bench+(sonar,+mines+vs.+rocks). Please download the sonar.all-data file and rename the type as cxv to upload to colab or in your local environment.

I have referred to these links which are from machine learning mastery for my learning.
1. [link1](https://machinelearningmastery.com/training-a-pytorch-model-with-dataloader-and-dataset/)   
2. [link2](https://machinelearningmastery.com/using-learning-rate-schedule-in-pytorch-training/) 
3. [link3](https://machinelearningmastery.com/using-dropout-regularization-in-pytorch-models/)

Please use the same if you want to refer it.

In [40]:
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader, random_split, default_collate
from sklearn.preprocessing import LabelEncoder
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import torch.optim.lr_scheduler as lr_scheduler
from sklearn.model_selection import StratifiedKFold

In [2]:
# Read data, convert to NumPy arrays
df = pd.read_csv("sonar.csv", header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [3]:
df.shape

(208, 61)

In [4]:
X = df.iloc[:, 0:60].values
y = df.iloc[:, 60].values

In [5]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)

In [6]:
#conver to pytorch tensor
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True)

In [8]:
# create DataLoader, then take one batch
dls = DataLoader(list(zip(X_train, y_train)), shuffle=True, batch_size=16)

In [9]:
for X_batch, y_batch in dls:
  print(X_batch, y_batch)
  break

tensor([[0.0408, 0.0653, 0.0397, 0.0604, 0.0496, 0.1817, 0.1178, 0.1024, 0.0583,
         0.2176, 0.2459, 0.3332, 0.3087, 0.2613, 0.3232, 0.3731, 0.4203, 0.5364,
         0.7062, 0.8196, 0.8835, 0.8299, 0.7609, 0.7605, 0.8367, 0.8905, 0.7652,
         0.5897, 0.3037, 0.0823, 0.2787, 0.7241, 0.8032, 0.8050, 0.7676, 0.7468,
         0.6253, 0.1730, 0.2916, 0.5003, 0.5220, 0.4824, 0.4004, 0.3877, 0.1651,
         0.0442, 0.0663, 0.0418, 0.0475, 0.0235, 0.0066, 0.0062, 0.0129, 0.0184,
         0.0069, 0.0198, 0.0199, 0.0102, 0.0070, 0.0055],
        [0.0206, 0.0132, 0.0533, 0.0569, 0.0647, 0.1432, 0.1344, 0.2041, 0.1571,
         0.1573, 0.2327, 0.1785, 0.1507, 0.1916, 0.2061, 0.2307, 0.2360, 0.1299,
         0.3812, 0.5858, 0.4497, 0.4876, 1.0000, 0.8675, 0.4718, 0.5341, 0.6197,
         0.7143, 0.5605, 0.3728, 0.2481, 0.1921, 0.1386, 0.3325, 0.2883, 0.3228,
         0.2607, 0.2040, 0.2396, 0.1319, 0.0683, 0.0334, 0.0716, 0.0976, 0.0787,
         0.0522, 0.0500, 0.0231, 0.0221, 0.0144, 0.

In [10]:
# create model
model = nn.Sequential(
    nn.Linear(60, 60),
    nn.ReLU(),
    nn.Linear(60, 30),
    nn.ReLU(),
    nn.Linear(30, 1),
    nn.Sigmoid()
)

In [11]:
#define loss function & optimize
loss_fn = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [12]:
# Train the model
num_epochs = 200
model.train()
for epoch in range(num_epochs):
  for X_batch, y_batch in dls:
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()  

In [13]:
# evaluate accuracy after training
model.eval()
y_test_pred = model(X_test)
acc = (y_test_pred.round() == y_test).float().mean()
acc = float(acc)
print("Model accuracy: %.2f%%" % (acc*100))

Model accuracy: 80.95%


### Create Data Iterator using Dataset Class

In [14]:
class SonarDataset(Dataset):
  def __init__(self, X, y):
    """ convert into PyTorch tensors and remember them"""
    self.X = torch.tensor(X, dtype=torch.float32)
    self.y = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)

  def __len__(self):
    """ this should return the size of the dataset """
    return len(self.X)

  def __getitem__(self, idx):
    """ this should return one sample from the dataset """
    features = self.X[idx]
    target = self.y[idx]
    return features, target

In [15]:
# set up DataLoader for training set
dataset = SonarDataset(X_train, y_train)
loader = DataLoader(dataset, shuffle=True, batch_size=16)

  self.X = torch.tensor(X, dtype=torch.float32)
  self.y = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)


In [16]:
n_epochs = 200
model.train()

for epoch in range(n_epochs):
  for X_batch, y_batch in dls:
    y_pred = model(X_batch)
    loss = loss_fn(y_pred, y_batch)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [17]:
model.eval()
y_test_pred = model(X_test)
acc = (y_test_pred.round() == y_test).float().mean()
acc = float(acc)
print("Model accuracy: %.2f%%" % (acc*100))

Model accuracy: 50.79%


In [18]:
# set up DataLoader for data set
trainset, testset = random_split(dataset, [0.7, 0.3])
loader = DataLoader(trainset, shuffle=True, batch_size=16)

In [19]:
# example of how default_collate works
default_collate([[0, 1], [2, 4]])

[tensor([0, 2]), tensor([1, 4])]

In [20]:
# Train the model
n_epochs = 200
loss_fn = nn.BCELoss()
model.train()
for epoch in range(n_epochs):
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# create one test tensor from the testset
#  use the default_collate() function to collect samples from a dataset into tensors
X_test, y_test = default_collate(testset)
model.eval()
y_pred = model(X_test)
acc = (y_pred.round() == y_test).float().mean()
acc = float(acc)
print("Model accuracy: %.2f%%" % (acc*100))

Model accuracy: 90.70%


In [21]:
tpl = (10, 20, 30, 40)
print(tpl[::-1][list(enumerate(tpl, start=1))[-1][0]-1])

10


### Applying Learning Rate Schedules in PyTorch Training

In [22]:
scheduler = lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.5, total_iters=30)

In [24]:
# Train the model
n_epochs = 50
batch_size = 24
batch_start = torch.arange(0, len(X_train), batch_size)
model.train()
for epoch in range(n_epochs):
    for start in batch_start:
        X_batch = X_train[start:start+batch_size]
        y_batch = y_train[start:start+batch_size]
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    before_lr = optimizer.param_groups[0]["lr"]
    #print(optim.param_groups[0])
    scheduler.step()
    after_lr = optimizer.param_groups[0]["lr"]
    print("Epoch %d: SGD lr %.4f -> %.4f" % (epoch, before_lr, after_lr))
 
# evaluate accuracy after training
model.eval()
y_pred = model(X_test)
acc = (y_pred.round() == y_test).float().mean()
acc = float(acc)
print("Model accuracy: %.2f%%" % (acc*100))

Epoch 0: SGD lr 0.1000 -> 0.0983
Epoch 1: SGD lr 0.0983 -> 0.0967
Epoch 2: SGD lr 0.0967 -> 0.0950
Epoch 3: SGD lr 0.0950 -> 0.0933
Epoch 4: SGD lr 0.0933 -> 0.0917
Epoch 5: SGD lr 0.0917 -> 0.0900
Epoch 6: SGD lr 0.0900 -> 0.0883
Epoch 7: SGD lr 0.0883 -> 0.0867
Epoch 8: SGD lr 0.0867 -> 0.0850
Epoch 9: SGD lr 0.0850 -> 0.0833
Epoch 10: SGD lr 0.0833 -> 0.0817
Epoch 11: SGD lr 0.0817 -> 0.0800
Epoch 12: SGD lr 0.0800 -> 0.0783
Epoch 13: SGD lr 0.0783 -> 0.0767
Epoch 14: SGD lr 0.0767 -> 0.0750
Epoch 15: SGD lr 0.0750 -> 0.0733
Epoch 16: SGD lr 0.0733 -> 0.0717
Epoch 17: SGD lr 0.0717 -> 0.0700
Epoch 18: SGD lr 0.0700 -> 0.0683
Epoch 19: SGD lr 0.0683 -> 0.0667
Epoch 20: SGD lr 0.0667 -> 0.0650
Epoch 21: SGD lr 0.0650 -> 0.0633
Epoch 22: SGD lr 0.0633 -> 0.0617
Epoch 23: SGD lr 0.0617 -> 0.0600
Epoch 24: SGD lr 0.0600 -> 0.0583
Epoch 25: SGD lr 0.0583 -> 0.0567
Epoch 26: SGD lr 0.0567 -> 0.0550
Epoch 27: SGD lr 0.0550 -> 0.0533
Epoch 28: SGD lr 0.0533 -> 0.0517
Epoch 29: SGD lr 0.0517 

In [25]:
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.99)

In [26]:
# train the model
n_epoch = 50
batch_size = 24
batch_start = torch.arange(0, len(X_train), batch_size)

model.train()

for epoch in range(n_epoch):
  for start in batch_start:
    X_batch = X_train[start:start+batch_size]
    y_batch = y_train[start:start+batch_size]
    y_pred = model(X_batch)
    loss = loss_fn(y_pred, y_batch)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  before_lr = optimizer.param_groups[0]["lr"]
  scheduler.step()
  after_lr = optimizer.param_groups[0]["lr"]
  print("Epoch %d: SGD lr %.4f -> %.4f" % (epoch, before_lr, after_lr))

Epoch 0: SGD lr 0.0500 -> 0.0495
Epoch 1: SGD lr 0.0495 -> 0.0490
Epoch 2: SGD lr 0.0490 -> 0.0485
Epoch 3: SGD lr 0.0485 -> 0.0480
Epoch 4: SGD lr 0.0480 -> 0.0475
Epoch 5: SGD lr 0.0475 -> 0.0471
Epoch 6: SGD lr 0.0471 -> 0.0466
Epoch 7: SGD lr 0.0466 -> 0.0461
Epoch 8: SGD lr 0.0461 -> 0.0457
Epoch 9: SGD lr 0.0457 -> 0.0452
Epoch 10: SGD lr 0.0452 -> 0.0448
Epoch 11: SGD lr 0.0448 -> 0.0443
Epoch 12: SGD lr 0.0443 -> 0.0439
Epoch 13: SGD lr 0.0439 -> 0.0434
Epoch 14: SGD lr 0.0434 -> 0.0430
Epoch 15: SGD lr 0.0430 -> 0.0426
Epoch 16: SGD lr 0.0426 -> 0.0421
Epoch 17: SGD lr 0.0421 -> 0.0417
Epoch 18: SGD lr 0.0417 -> 0.0413
Epoch 19: SGD lr 0.0413 -> 0.0409
Epoch 20: SGD lr 0.0409 -> 0.0405
Epoch 21: SGD lr 0.0405 -> 0.0401
Epoch 22: SGD lr 0.0401 -> 0.0397
Epoch 23: SGD lr 0.0397 -> 0.0393
Epoch 24: SGD lr 0.0393 -> 0.0389
Epoch 25: SGD lr 0.0389 -> 0.0385
Epoch 26: SGD lr 0.0385 -> 0.0381
Epoch 27: SGD lr 0.0381 -> 0.0377
Epoch 28: SGD lr 0.0377 -> 0.0374
Epoch 29: SGD lr 0.0374 

In [27]:
# evaluate accuracy after training
model.eval()
y_pred = model(X_test)
acc = (y_pred.round() == y_test).float().mean()
acc = float(acc)
print("Model accuracy: %.2f%%" % (acc*100))

Model accuracy: 100.00%


### Custom Learning Rate Schedules

A custom learning rate schedule can be defined using a custom function. For example, you want to have a learning rate that:

$l r_n=\frac{l r_0}{1+\alpha n}$

In [28]:
def lr_lambda(epoch):
  # LR to be 0.1 * (1/1+0.01*epoch)
  base_lr = 0.1
  factor = 0.01
  return base_lr/(1+ factor * epoch)

In [29]:
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda)

In [33]:
# train the model
n_epochs = 50
batch_size = 24
batch_start = torch.arange(0, len(X_train), batch_size)
model.train()

for epoch in range(n_epochs):
  for size in batch_start:
    X_batch = X_train[start: start+batch_size]
    y_batch = y_train[start: start+batch_size]

    y_pred = model(X_batch)
    loss = loss_fn(y_pred, y_batch)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  before_lr = optimizer.param_groups[0]["lr"]
  scheduler.step()
  after_lr = optimizer.param_groups[0]["lr"]
  print("Epoch %d: SGD lr %.4f -> %.4f" % (epoch, before_lr, after_lr))

Epoch 0: SGD lr 0.0100 -> 0.0099
Epoch 1: SGD lr 0.0099 -> 0.0098
Epoch 2: SGD lr 0.0098 -> 0.0097
Epoch 3: SGD lr 0.0097 -> 0.0096
Epoch 4: SGD lr 0.0096 -> 0.0095
Epoch 5: SGD lr 0.0095 -> 0.0094
Epoch 6: SGD lr 0.0094 -> 0.0093
Epoch 7: SGD lr 0.0093 -> 0.0093
Epoch 8: SGD lr 0.0093 -> 0.0092
Epoch 9: SGD lr 0.0092 -> 0.0091
Epoch 10: SGD lr 0.0091 -> 0.0090
Epoch 11: SGD lr 0.0090 -> 0.0089
Epoch 12: SGD lr 0.0089 -> 0.0088
Epoch 13: SGD lr 0.0088 -> 0.0088
Epoch 14: SGD lr 0.0088 -> 0.0087
Epoch 15: SGD lr 0.0087 -> 0.0086
Epoch 16: SGD lr 0.0086 -> 0.0085
Epoch 17: SGD lr 0.0085 -> 0.0085
Epoch 18: SGD lr 0.0085 -> 0.0084
Epoch 19: SGD lr 0.0084 -> 0.0083
Epoch 20: SGD lr 0.0083 -> 0.0083
Epoch 21: SGD lr 0.0083 -> 0.0082
Epoch 22: SGD lr 0.0082 -> 0.0081
Epoch 23: SGD lr 0.0081 -> 0.0081
Epoch 24: SGD lr 0.0081 -> 0.0080
Epoch 25: SGD lr 0.0080 -> 0.0079
Epoch 26: SGD lr 0.0079 -> 0.0079
Epoch 27: SGD lr 0.0079 -> 0.0078
Epoch 28: SGD lr 0.0078 -> 0.0078
Epoch 29: SGD lr 0.0078 

In [34]:
# showing how batch_start looks like
batch_start

tensor([  0,  24,  48,  72,  96, 120, 144])

In [35]:
# evaluate accuracy after training
model.eval()
y_test_pred = model(X_test)
acc = (y_test_pred.round() == y_test).float().mean()
acc = float(acc)
print("Model accuracy: %.2f%%" % (acc*100))

Model accuracy: 100.00%


### Using Dropout Regularization in PyTorch Models

Dropout is a regularization technique for neural network models proposed around 2012 to 2014. It is a layer in the neural network. During training of a neural network model, it will take the output from its previous layer, randomly select some of the neurons and zero them out before passing to the next layer, effectively ignored them. This means that their contribution to the activation of downstream neurons is temporally removed on the forward pass, and any weight updates are not applied to the neuron on the backward pass.

When the model is used for inference, dropout layer is just to scale all the neurons constantly to compensate the effect of dropping out during training.

You do not need to randomly select elements from a PyTorch tensor to implement dropout manually. The nn.Dropout() layer from PyTorch can be introduced into your model. It is implemented by randomly selecting nodes to be dropped out with a given probability $p$ (e.g., 20%) while in the training loop. In PyTorch, the dropout layer further scale the resulting tensor by a factor of $\frac{1}{1-p}$
so the average tensor value is maintained. Thanks to this scaling, the dropout layer operates at inference will be an identify function (i.e., no effect, simply copy over the input tensor as output tensor). You should make sure to turn the model into inference mode when evaluating the the model

In [105]:
# Define PyTorch model, with dropout at hidden layers
class SonarModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(60, 60)
        self.act1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.05)
        self.layer2 = nn.Linear(60, 30)
        self.act2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.1)
        self.output = nn.Linear(30, 1)
        self.sigmoid = nn.Sigmoid()
 
    def forward(self, x):
        x = self.act1(self.layer1(x))
        x = self.dropout1(x)
        x = self.act2(self.layer2(x))
        x = self.dropout2(x)
        x = self.sigmoid(self.output(x))
        return x

In [109]:
# Helper function to train the model and return the validation result
def model_train(model, X_train, y_train, X_val, y_val,
                n_epochs=300, batch_size=16):
    loss_fn = nn.BCELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.8)
    batch_start = torch.arange(0, len(X_train), batch_size)
    #scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
    #scheduler = lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.5, total_iters=30)
    model.train()
    for epoch in range(n_epochs):
        for start in batch_start:
            X_batch = X_train[start:start+batch_size]
            y_batch = y_train[start:start+batch_size]
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        #scheduler.step()
 
    # evaluate accuracy after training
    model.eval()
    y_pred = model(X_val)
    acc = (y_pred.round() == y_val).float().mean()
    acc = float(acc)
    return acc

In [110]:
X = df.iloc[:, 0:60]
y = df.iloc[:, 60]
 
# Label encode the target from string to integer
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)

# Convert to 2D PyTorch tensors
X = torch.tensor(X.values, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)
# run 10-fold cross validation
kfold = StratifiedKFold(n_splits=10, shuffle=True)
accuracies = []
for train, test in kfold.split(X, y):
  # create model, train, and get accuracy
  model = SonarModel()
  acc = model_train(model, X[train], y[train], X[test], y[test])
  print("Accuracy: %.2f" % acc)
  accuracies.append(acc)

Accuracy: 0.81
Accuracy: 0.81
Accuracy: 0.81
Accuracy: 0.86
Accuracy: 0.81
Accuracy: 0.90
Accuracy: 0.86
Accuracy: 0.76
Accuracy: 0.85
Accuracy: 0.85


In [112]:
# evaluate the model
mean = np.mean(accuracies)
std = np.std(accuracies)
print("Baseline: %.2f%% (+/- %.2f%%)" % (mean*100, std*100))

Baseline: 83.19% (+/- 3.76%)
