In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.parallel
import torch.nn.functional as F
# import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import numpy as np
from adastab import AdaStab
from OurAdam import Adam
%matplotlib inline
import matplotlib.pyplot as plt
import scipy.io as sio

In [2]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

In [3]:
batch_size = 128
learning_rate = 1e-4
epochs = 10

In [4]:
transform_train = transforms.Compose([
    # transforms.RandomCrop(32, padding=4),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, 
                                       transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
def cifartest():
#     correct = 0
#     total = 0
#     with torch.no_grad():
        test_loss = 0
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = Variable(inputs, volatile=True),Variable(targets, volatile=True)
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss
            
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
        print('test loss:{}'.format(test_loss/(batch_idx+1)))
        
        
        
        acc = 100.*correct/total
        print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))
        
        return test_loss/(batch_idx+1), acc


#             progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
# % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

    

In [6]:
def cifartrain():
    for epoch in range(max_epoch):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
        # get the inputs
            inputs, labels = data
            inputs = Variable(inputs)
            labels = Variable(labels)
            inputs = inputs.cuda()
            labels = labels.cuda()

        # zero the parameter gradients
            optimizer.zero_grad()

        # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss
        
        print('epoch{}, loss{}'.format(epoch, running_loss/(i+1)))
        LOSS[0, epoch] = running_loss/(i+1)
        # Test every epoch
        Test_LOSS[0, epoch], TEST_Acc[0, epoch] = cifartest()
    
    print('Finished Training')
    return LOSS, Test_LOSS

In [7]:
max_epoch=10
# No test

### AMSGrad

In [60]:
# learning_rate = 1e-4
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()

epoch0, lossVariable containing:
 1.3046
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.8326
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.5857
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.3919
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2294
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1249
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  7.9192
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  5.5913
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  4.5502
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  3.2193
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [9]:
# learning_rate = 9e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()

epoch0, lossVariable containing:
 1.2789
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.8110
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.5733
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.3706
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2018
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1176
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  6.9849
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  3.9018
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  2.1295
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  1.2511
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [10]:
# learning_rate = 8e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()

epoch0, lossVariable containing:
 1.3372
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.8678
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6165
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4049
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2368
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1195
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  7.0967
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  4.0191
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  2.3084
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  9.9793
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [11]:
# learning_rate = 7e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()

epoch0, lossVariable containing:
 1.3526
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.8906
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6332
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.3964
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2059
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1009
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  5.4517
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  2.2952
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  1.1344
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  5.8130
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [12]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
LOSS, Test_LOSS= cifartrain()

epoch0, lossVariable containing:
 1.3770
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9170
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6561
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4329
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2370
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1104
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  5.2461
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  2.3108
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-03 *
  9.7373
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  4.7481
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [13]:
learning_rate = 5e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
LOSS, Test_LOSS= cifartrain()

epoch0, lossVariable containing:
 1.3987
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9349
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6723
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4353
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2254
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
1.00000e-02 *
  9.9996
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  4.6122
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  2.0491
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  1.0172
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  6.2306
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [14]:
# learning_rate = 4e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()

epoch0, lossVariable containing:
 1.4387
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9976
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.7389
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4958
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2694
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1239
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  5.3022
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  2.3029
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  1.3496
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  8.2493
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [15]:
# learning_rate = 3e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()

epoch0, lossVariable containing:
 1.5133
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 1.1000
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.8518
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.6116
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.3754
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1989
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  9.3962
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  4.7172
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  2.6213
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  1.6869
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


### Longer Training

In [12]:
max_epoch=100

In [13]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
LOSS, Test_LOSS= cifartrain()
sio.savemat('amsgrad_res.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.3702
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9068
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6575
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4340
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2329
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1203
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  5.4533
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  2.4356
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-03 *
  9.6874
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  4.9833
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-03 *
  3.4243
[torch.cuda.FloatTensor of size 1

epoch83, lossVariable containing:
1.00000e-04 *
  1.9236
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-04 *
  1.9162
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-04 *
  1.9710
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-04 *
  1.9420
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-04 *
  1.8586
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-04 *
  1.8672
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-04 *
  1.8335
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-04 *
  1.7722
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-04 *
  1.7807
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch92, lossVariable containing:
1.00000e-04 *
  1.7081
[torch.cuda.FloatTensor of size 1 

In [14]:
# learning_rate = 5e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()
# sio.savemat('amsgrad_res1.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.3858
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9398
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6760
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4355
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2204
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1019
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  4.4211
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  1.9033
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-03 *
  8.5368
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  5.4792
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-03 *
  4.0852
[torch.cuda.FloatTensor of size 1

epoch83, lossVariable containing:
1.00000e-04 *
  2.5994
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-04 *
  2.5262
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-04 *
  2.5318
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-04 *
  2.3552
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-04 *
  2.6358
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-04 *
  2.4442
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-04 *
  2.3429
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-04 *
  2.3568
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-04 *
  2.3070
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch92, lossVariable containing:
1.00000e-04 *
  2.3768
[torch.cuda.FloatTensor of size 1 

In [17]:
# learning_rate = 6e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, betas=(0.9, 0.99), amsgrad=True)
# LOSS, Test_LOSS= cifartrain()
# sio.savemat('amsgrad_res2.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.3698
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9132
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6540
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4146
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2094
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
1.00000e-02 *
  9.4387
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  3.9940
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  1.7212
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-03 *
  9.6116
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  6.2651
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-03 *
  4.9588
[torch.cuda.FloatT

epoch82, lossVariable containing:
1.00000e-04 *
  3.2641
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch83, lossVariable containing:
1.00000e-04 *
  3.2866
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-04 *
  3.0632
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-04 *
  3.0229
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-04 *
  3.0981
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-04 *
  3.1734
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-04 *
  3.0203
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-04 *
  2.8576
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-04 *
  3.0485
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-04 *
  2.8847
[torch.cuda.FloatTensor of size 1 

In [18]:
learning_rate = 5.5e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
LOSS, Test_LOSS= cifartrain()
sio.savemat('nosadam_res2.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.3817
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9280
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6726
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4438
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2349
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1129
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  5.3407
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  2.7391
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  1.4453
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  6.5580
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-03 *
  3.6362
[torch.cuda.FloatTensor of size 1

epoch83, lossVariable containing:
1.00000e-05 *
  9.2639
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-05 *
  9.1441
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-05 *
  9.6740
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-05 *
  8.6898
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-05 *
  8.3940
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-05 *
  8.4343
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-05 *
  8.0498
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-05 *
  8.0564
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-05 *
  8.1853
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch92, lossVariable containing:
1.00000e-05 *
  7.5345
[torch.cuda.FloatTensor of size 1 

In [19]:
# learning_rate = 4.5e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
# LOSS, Test_LOSS= cifartrain()
# sio.savemat('nosadam_res3.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.4180
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9779
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.7326
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4975
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2847
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1392
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  6.2383
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  3.1329
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  1.4930
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  1.0283
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-03 *
  5.3139
[torch.cuda.FloatTensor of size 1

KeyboardInterrupt: 

# Adam

In [8]:
max_epoch=10

In [9]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=False)
LOSS, Test_LOSS= cifartrain()
# sio.savemat('adam_res.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.3776
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9123
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6554
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4343
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2325
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1172
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  6.4895
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  4.8780
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  6.5994
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  7.0446
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [10]:
learning_rate = 5e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=False)
LOSS, Test_LOSS= cifartrain()
# sio.savemat('adam_res.mat',{'loss':LOSS})

KeyboardInterrupt: 

In [11]:
learning_rate = 4e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=False)
LOSS, Test_LOSS= cifartrain()
# sio.savemat('adam_res.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.4502
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 1.0165
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.7631
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.5216
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2969
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1413
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  6.9286
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  4.2577
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  3.5951
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  4.9119
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [12]:
max_epoch=100

In [13]:
learning_rate = 4e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=False)
LOSS, Test_LOSS= cifartrain()
sio.savemat('adam_res1.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.4323
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9892
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.7245
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4780
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2504
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1129
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  5.0909
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  3.0962
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  4.1710
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  7.2261
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-02 *
  4.8190
[torch.cuda.FloatTensor of size 1

epoch83, lossVariable containing:
1.00000e-03 *
  5.8893
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-02 *
  1.6204
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-02 *
  1.7327
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-02 *
  1.2838
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-03 *
  5.0789
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-03 *
  2.4896
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-03 *
  6.8553
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-02 *
  2.2075
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-02 *
  1.1914
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch92, lossVariable containing:
1.00000e-03 *
  7.8539
[torch.cuda.FloatTensor of size 1 

In [15]:
learning_rate = 4e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate,betas=(0.9, 0.99), amsgrad=False)
LOSS, Test_LOSS= cifartrain()
sio.savemat('adam_res2.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.4254
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9677
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.7123
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4668
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2510
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1298
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  7.8508
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  6.2696
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  5.4074
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  4.7083
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-02 *
  4.7992
[torch.cuda.FloatTensor of size 1

epoch83, lossVariable containing:
1.00000e-02 *
  1.0707
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-03 *
  9.9261
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-03 *
  8.5545
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-02 *
  1.0604
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-03 *
  9.6010
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-03 *
  9.1138
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-03 *
  9.7684
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-02 *
  1.0246
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-03 *
  9.9778
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch92, lossVariable containing:
1.00000e-03 *
  8.9885
[torch.cuda.FloatTensor of size 1 

## AdaStab

In [None]:
# learning_rate = 1e-4
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
# LOSS, Test_LOSS= cifartrain()

In [None]:
# learning_rate = 2e-4
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
# LOSS, Test_LOSS= cifartrain()

In [None]:
learning_rate = 5e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
LOSS, Test_LOSS= cifartrain()

In [None]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
LOSS, Test_LOSS= cifartrain()