# Final Project : Ranger Optimizer v.s Adam Optimizer
Compare Ranger Optimizer and Adam Optimizer performance on ResNet-18 model with CIFAR-10 dataset

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from ranger import Ranger 
from ranger import RangerVA 
from ranger import RangerQH

## Loading and normalizing CIFAR 10

In [3]:
train_transform = transforms.Compose(
    [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
)
test_transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
)

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=8)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=8)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


## Load ResNet-18 Model

In [4]:
resnet18 = models.resnet18()

### Deploy to GPU

In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [9]:
resnet18.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

### Set CUDA random seed

In [6]:
SEED = 520
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


## Define Loss function and  Adam optimizer
- First try
    - learning rate = 1e-3
    - weight decay = 0.3
    

In [8]:
import torch.optim as optim
criterion = torch.nn.CrossEntropyLoss()
optimizer_adam = optim.Adam(resnet18.parameters(), lr=0.001, weight_decay=0.3)

## Train the ResNet-18 with Adam Optimizer on GPU

In [11]:
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs ; data is a list of [inputs, labels]
        inputs, labels = data 
        inputs = inputs.to(device)
        labels = labels.to(device)
        # zero the parameter gradients
        optimizer_adam.zero_grad()
        # forward + backward + optimize
        outputs = resnet18(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_adam.step()
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999: 
            # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print('Finished Training')

[1,  2000] loss: 2.913
[1,  4000] loss: 3.139
[1,  6000] loss: 3.162
[1,  8000] loss: 3.178
[1, 10000] loss: 3.173
[1, 12000] loss: 3.174
[2,  2000] loss: 3.169
[2,  4000] loss: 3.171
[2,  6000] loss: 3.165
[2,  8000] loss: 3.161
[2, 10000] loss: 3.157
[2, 12000] loss: 3.147
[3,  2000] loss: 3.146
[3,  4000] loss: 3.143
[3,  6000] loss: 3.139
[3,  8000] loss: 3.140
[3, 10000] loss: 3.136
[3, 12000] loss: 3.132
[4,  2000] loss: 3.133
[4,  4000] loss: 3.132
[4,  6000] loss: 3.128
[4,  8000] loss: 3.128
[4, 10000] loss: 3.127
[4, 12000] loss: 3.125
[5,  2000] loss: 3.127
[5,  4000] loss: 3.124
[5,  6000] loss: 3.127
[5,  8000] loss: 3.124
[5, 10000] loss: 3.123
[5, 12000] loss: 3.124
[6,  2000] loss: 3.123
[6,  4000] loss: 3.122
[6,  6000] loss: 3.120
[6,  8000] loss: 3.123
[6, 10000] loss: 3.123
[6, 12000] loss: 3.120
[7,  2000] loss: 3.119
[7,  4000] loss: 3.119
[7,  6000] loss: 3.118
[7,  8000] loss: 3.117
[7, 10000] loss: 3.115
[7, 12000] loss: 3.119
[8,  2000] loss: 3.117
[8,  4000] 