**Author:** Boris Kundu

**Problem Statement:** Train and compare optimizer parameters

**Dataset:** Iris

In [170]:
#Import packages
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn import datasets

In [171]:
#Read data
iris = datasets.load_iris()

In [172]:
#Define input parameters
n1 = len(iris.feature_names)  # input size
k = len(iris.target_names)    # output size
n2 = 5                        # hidden layer size

In [173]:
#Class to define model
class Model(nn.Module):
    #Initialize
    def __init__(self, datasize, hiddensize, outputsize):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(datasize, hiddensize)
        self.layer2 = nn.Linear(hiddensize, outputsize)
    #Feed forward
    def forward(self, x):
        x = F.sigmoid(self.layer1(x))
        return self.layer2(x)

In [174]:
#Define inputs and output
X = torch.tensor(iris["data"], dtype=torch.float)
target = torch.tensor(iris["target"], dtype=torch.long)

In [175]:
#Define system parameters
alpha = 0.9 #Momentum
eta = 0.01 #Learning rate
epochs = 1000 #Iterations
weightDecay = 0.001 #Weight decay
useNesterov = True #Enable Nesterov

In [176]:
#Initialize model
model = Model(n1, n2, k)

In [177]:
#Define SGD optimizers with different parameters
sgd = optim.SGD(model.parameters(), lr=eta)
sgdWeightDecay = optim.SGD(model.parameters(), lr=eta, weight_decay=weightDecay)
sgdMomentum = optim.SGD(model.parameters(), lr=eta, momentum=alpha)
sgdNesterovMomentum = optim.SGD(model.parameters(), lr=eta, momentum=alpha, nesterov=useNesterov)

In [178]:
#Make predictions
def predict(features,target_class,my_model,msg):
    o2 = my_model(X)
    ypred = o2.argmax(axis=1)
    print(f'Predictions using {msg} are:\n{ypred}')
    matches = torch.eq(ypred, target).int().sum()
    print(f'Matches using {msg} are:{matches.item()}')

In [179]:
#Train model using optimizer
def train(features,target_class,my_model,opt,msg):
    for i in range(epochs):
        o2 = my_model(features)
        L = F.cross_entropy(o2, target_class)
        if (i%100 == 0):
            print(f'Loss:{L.item()} at Epoch:{i}')
        opt.zero_grad()
        L.backward()
        opt.step()
    #Predict
    predict(features,target_class,my_model,msg)

In [180]:
#Train SGD
train(X,target,model,sgd,'SGD')

Loss:1.1092737913131714 at Epoch:0
Loss:1.0907673835754395 at Epoch:100
Loss:1.078997015953064 at Epoch:200
Loss:1.0635530948638916 at Epoch:300
Loss:1.0389412641525269 at Epoch:400
Loss:1.0074021816253662 at Epoch:500
Loss:0.9731917381286621 at Epoch:600
Loss:0.9359657764434814 at Epoch:700
Loss:0.8962724804878235 at Epoch:800
Loss:0.8557345867156982 at Epoch:900
Predictions using SGD are:
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2])
Matches using SGD are:109


In [181]:
#Train SGD with weight decay
train(X,target,model,sgdWeightDecay,'SGD with weight decay')

Loss:0.816070556640625 at Epoch:0
Loss:0.7789621949195862 at Epoch:100
Loss:0.7448146939277649 at Epoch:200
Loss:0.7139304876327515 at Epoch:300
Loss:0.6863029599189758 at Epoch:400
Loss:0.6617550253868103 at Epoch:500
Loss:0.6400279998779297 at Epoch:600
Loss:0.6208282709121704 at Epoch:700
Loss:0.6038506031036377 at Epoch:800
Loss:0.5887932181358337 at Epoch:900
Predictions using SGD with weight decay are:
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1,
        2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1,
        2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2])
Matches using SGD with weight decay are:116


In [182]:
#Train SGD with momentum
train(X,target,model,sgdMomentum,'SGD with standard momentum')

Loss:0.5753728151321411 at Epoch:0
Loss:0.4918035864830017 at Epoch:100
Loss:0.4310658276081085 at Epoch:200
Loss:0.37077727913856506 at Epoch:300
Loss:0.311044305562973 at Epoch:400
Loss:0.25911274552345276 at Epoch:500
Loss:0.21808962523937225 at Epoch:600
Loss:0.18701350688934326 at Epoch:700
Loss:0.16369596123695374 at Epoch:800
Loss:0.14608433842658997 at Epoch:900
Predictions using SGD with standard momentum are:
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1,
        2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2])
Matches using SGD with standard momentum are:147


In [183]:
#Train SGD with Nesterov momentum
train(X,target,model,sgdNesterovMomentum,'SGD with Nesterov momentum')

Loss:0.13258770108222961 at Epoch:0
Loss:0.1228458508849144 at Epoch:100
Loss:0.11432917416095734 at Epoch:200
Loss:0.10744847357273102 at Epoch:300
Loss:0.10179830342531204 at Epoch:400
Loss:0.09708991646766663 at Epoch:500
Loss:0.09311432391405106 at Epoch:600
Loss:0.08971786499023438 at Epoch:700
Loss:0.08678551763296127 at Epoch:800
Loss:0.08423002809286118 at Epoch:900
Predictions using SGD with Nesterov momentum are:
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1,
        2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2])
Matches using SGD with Nesterov momentum are:147
