In [1]:
import torch as t
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR, ExponentialLR

### Model

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.linear1 = nn.Linear(28*28, 10)
        self.relu = nn.ReLU()

    def forward(self, img): #convert + flatten
        x = img.view(-1, 28*28)
        x = self.relu(self.linear1(x))
        return x
net = Net()



### Hyperparams

In [3]:
num_epochs=50
after_every=100
loss_fn = nn.CrossEntropyLoss()
batch_size=128

In [4]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)),])

mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = t.utils.data.DataLoader(mnist_trainset, batch_size=batch_size, shuffle=True)

mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = t.utils.data.DataLoader(mnist_testset, batch_size=batch_size, shuffle=True)

### Training

In [5]:
def train_one_epoch(model, epoch_index, optimizer, train_dataset):
    running_loss = 0.
    last_loss = 0.

    for i, data in enumerate(train_dataset):
        # Every data instance is an input + label pair
        # data.shape=torch.Size([10, 1, 28, 28]) --> 10 images,labels
        inputs, labels = data

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs.view(-1, 28*28))


        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()

        if i % after_every == after_every-1:
            last_loss = running_loss / after_every # loss per after_every batches
            print(' batch {} loss: {}'.format(i + 1, last_loss))
            running_loss = 0.

    return last_loss

### Evaluate

In [6]:
def test(model, loader):
  total=0
  correct=0
  model.eval()
  with t.no_grad():
      for data in loader:
          x, y = data
          output = model(x.view(-1, 28*28))
          for idx, i in enumerate(output):
              if t.argmax(i) == y[idx]:
                  correct +=1
              total +=1
  return round(correct/total, 3)*100

In [7]:
def train_model(model, epochs, train_dataset, test_dataset, optimizer, scheduler, LR_type):
  epoch_number = 0

  losses = []
  train_accuracy = []
  test_accuracy = []

  for epoch in range(epochs):
      print('EPOCH {}:'.format(epoch_number + 1))

      # Make sure gradient tracking is on, and do a pass over the data
      model.train(True)
      avg_loss = train_one_epoch(model, epoch_number,optimizer,train_dataset)
      losses.append(avg_loss)

      # Train accuracy
      train_acc = test(model, train_dataset)
      train_accuracy.append(train_acc)

      # Test accuracy
      test_acc = test(model, test_dataset)
      test_accuracy.append(test_acc)

      if LR_type=='Plateau':
        before_lr = optimizer.param_groups[0]["lr"]
        scheduler.step(avg_loss)
        after_lr = optimizer.param_groups[0]["lr"]
        print('lr {} -> {}'.format(before_lr, after_lr))

      elif LR_type!='None':
        before_lr = optimizer.param_groups[0]["lr"]
        scheduler.step()
        after_lr = optimizer.param_groups[0]["lr"]
        print('lr {} -> {}'.format(before_lr, after_lr))

      print('Train accuracy {}:'.format(train_acc))
      print('Test accuracy {}:'.format(test_acc))
      epoch_number += 1
  return losses, train_accuracy, test_accuracy

In [8]:
import datetime as dt
name = ['SGD', 'Adam', 'RMSProp']
def train_with_optimizers(starting_lr, l2reg):
  losses_arr = []
  train_accuracy_arr = []
  test_accuracy_arr = []
  description = []
  # Different Optimizers
  for i,opti in enumerate(name):
    print('Optimizer {}:'.format(opti))
    net_new = Net()
    optimizerr = opti
    if(opti=='SGD'):
        optimizerr = t.optim.SGD(net_new.parameters(), lr=starting_lr, weight_decay=l2reg)
    if(opti=='Adam'):
        optimizerr = t.optim.Adam(net_new.parameters(), lr=starting_lr, weight_decay=l2reg)
    if(opti=='RMSProp'):
        optimizerr = t.optim.RMSprop(net_new.parameters(), lr=starting_lr, weight_decay=l2reg)
    scheduler = StepLR(optimizerr, 25, gamma=0.1)

    losses, train_accuracy, test_accuracy = train_model(net_new, num_epochs, train_loader, test_loader, optimizerr, scheduler, 'Step')

    losses_arr.append(losses)
    train_accuracy_arr.append(train_accuracy)
    test_accuracy_arr.append(test_accuracy)
    description.append('Optimizer: {}, Starting LR:{}, L2Reg:{}, Batch size: {}'.format(name[i], starting_lr, l2reg, batch_size))

    # save model
    ts_now = dt.datetime.now()
    unix_timestamp = dt.datetime.timestamp(ts_now)*1000
    model_save_name = str(unix_timestamp)
    path = F"{model_save_name}_{name[i]}_L2Reg={l2reg}_bs{batch_size}_e{num_epochs}.pt"
    t.save(net.state_dict(), path)
  return description, losses_arr, train_accuracy_arr, test_accuracy_arr


In [9]:
import csv
def save_to_csv(filename, description, losses_arr, train_acc_arr, test_acc_arr):
  # Create a CSV file and write data to it
  with open(filename+'.csv', 'w', newline='') as csvfile:
    for i, desc in enumerate(description):
      fieldnames = ['Description', 'epoch', 'train_loss', 'train_accuracy', 'test_accuracy']
      writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
      writer.writeheader()
      for epoch, (loss, train, test) in enumerate(zip(losses_arr[i], train_acc_arr[i], test_acc_arr[i]), 1):
          writer.writerow({'Description':description[i], 'epoch': epoch, 'train_loss': loss, 'train_accuracy': train, 'test_accuracy': test})


In [None]:
learning_rates = 0.01
l2reg = 1e-7
description, losses_arr, train_accuracy_arr, test_accuracy_arr = train_with_optimizers(learning_rates,l2reg)
save_to_csv('Optimizers', description, losses_arr, train_accuracy_arr, test_accuracy_arr)

Optimizer SGD:
EPOCH 1:
 batch 100 loss: 1.8150903487205505
 batch 200 loss: 1.3337183940410613
 batch 300 loss: 1.2030242562294007
 batch 400 loss: 1.127565987110138
lr 0.01 -> 0.01
Train accuracy 63.3:
Test accuracy 63.800000000000004:
EPOCH 2:
 batch 100 loss: 1.0305333179235459
 batch 200 loss: 0.9471209472417832
 batch 300 loss: 0.9196749871969223
 batch 400 loss: 0.8973283874988556
lr 0.01 -> 0.01
Train accuracy 70.8:
Test accuracy 71.1:
EPOCH 3:
 batch 100 loss: 0.8728197151422501
 batch 200 loss: 0.866802961230278
 batch 300 loss: 0.8557087063789368
 batch 400 loss: 0.8331390446424485
lr 0.01 -> 0.01
Train accuracy 71.3:
Test accuracy 71.5:
EPOCH 4:
 batch 100 loss: 0.8410170811414719
 batch 200 loss: 0.8116308754682541
 batch 300 loss: 0.8235679805278778
 batch 400 loss: 0.8323886543512344
lr 0.01 -> 0.01
Train accuracy 71.7:
Test accuracy 71.8:
EPOCH 5:
 batch 100 loss: 0.7986624300479889
 batch 200 loss: 0.8358564710617066
 batch 300 loss: 0.8008440059423446
 batch 400 loss:

### Plotting- lr=0.01, bs=128, l2reg=1e-7

In [None]:
import numpy as np
epochs_arr = np.arange(0, len(losses_arr[0]))

In [None]:
# Plot training loss
x = np.array(epochs_arr)
for i,train_loss in enumerate(losses_arr):
  ypoints = np.array(train_loss)
  plt.plot(x, ypoints, label = name[i])

plt.xlabel("Epochs")
plt.ylabel("train Loss")
plt.legend()
plt.title('LR=0.01, L2Reg=1e-7, Step, Batch size=128')
plt.show()

In [None]:
# Plot training accuracy
x = np.array(epochs_arr)
for i,train_acc in enumerate(train_accuracy_arr):
  ypoints = np.array(train_acc)
  plt.plot(x, ypoints, label = name[i])

plt.xlabel("Epochs")
plt.ylabel("train Accuracy")
plt.legend()
plt.title('LR=0.01, L2Reg=1e-7, Step, Batch size=128')
plt.show()

In [None]:
# Plot test accuracy
x = np.array(epochs_arr)
for i,test_acc in enumerate(test_accuracy_arr):
  ypoints = np.array(test_acc)
  plt.plot(x, ypoints, label = name[i])

plt.xlabel("Epochs")
plt.ylabel("Test Accuracy")
plt.legend()
plt.title('LR=0.01, L2Reg=1e-7, Step, Batch size=128')
plt.show()

In [None]:
# Plot test and training accuracy
x = np.array(epochs_arr)
for i,lt in enumerate([0,1]):
  ypoints_1 = np.array(train_accuracy_arr[i])
  ypoints_2 = np.array(test_accuracy_arr[i])
  plt.plot(x, ypoints_1, label = 'Train')
  plt.plot(x, ypoints_2, label = 'Test')
  plt.xlabel("Epochs")
  plt.ylabel("Accuracy")
  plt.legend()
  plt.title(F'LR=0.01, Batch_size=128, L2reg=1e-7, Optimizer={name[i]}')
  plt.show()

### Load model

In [None]:
model_save_name = '1695168105559.8071_Step_bs128_e30_SGD_ss10_gamma0.1_mnist'
path = F"{model_save_name}.pt"
net.load_state_dict(t.load(path))

In [None]:
test(net,test_loader)

In [None]:
#hello kuch likhlo warna inactivity timeout de dega yeh