In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.parallel
import torch.nn.functional as F
# import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import numpy as np
from adastab import AdaStab
from OurAdam import Adam
%matplotlib inline
import matplotlib.pyplot as plt
import scipy.io as sio

In [2]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

In [3]:
batch_size = 128
learning_rate = 1e-4
epochs = 10

In [4]:
transform_train = transforms.Compose([
    # transforms.RandomCrop(32, padding=4),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, 
                                       transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
def cifartest():
        correct = 0
        total = torch.FloatTensor([0])
#     with torch.no_grad():
        test_loss = 0
        
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = Variable(inputs, volatile=True),Variable(targets, volatile=True)
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss
            
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += (predicted==targets).sum()
            
        print('test loss:{}'.format(test_loss/(batch_idx+1)))
        
        
#         print(total)
#         print(correct)
#         total = Variable(total, requires_grad=False)
#         total = total.cuda()
        correct = correct.data[0]
#         correct = torch.FloatTensor(correct)
#         correct = Varaible(correct, requires_grad=False)
#         correct = correct.cuda()
#         correct = torch.Tensor(correct)
        acc = correct/total
        print('Accuracy of the network on the 10000 test images: %f' % (
         acc))
        
        return test_loss/(batch_idx+1), acc

In [7]:
def cifartrain():
    for epoch in range(max_epoch):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
        # get the inputs
            inputs, labels = data
            inputs = Variable(inputs)
            labels = Variable(labels)
            inputs = inputs.cuda()
            labels = labels.cuda()

        # zero the parameter gradients
            optimizer.zero_grad()

        # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss
        
        print('epoch{}, loss{}'.format(epoch, running_loss/(i+1)))
        LOSS[0, epoch] = running_loss/(i+1)
        # Test every epoch
        Test_LOSS[0, epoch], TEST_Acc[0, epoch] = cifartest()
    
    print('Finished Training')
    return LOSS, Test_LOSS, TEST_Acc

In [59]:
max_epoch=10
# No test

### AMSGrad

In [15]:
# learning_rate = 7e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()

In [21]:
# learning_rate = 6e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# TEST_Acc = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.1, amsgrad=True)
# LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.4267
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.1588
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.015400
epoch1, lossVariable containing:
 0.9894
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.9523
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.002600
epoch2, lossVariable containing:
 0.7920
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8006
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.012700
epoch3, lossVariable containing:
 0.6621
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.7656
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.023700
epoch4, lossVariable containing:
 0.5738
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

In [22]:
# learning_rate = 6e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# TEST_Acc = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.2, amsgrad=True)
# LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.4708
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.1770
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.018900
epoch1, lossVariable containing:
 1.0185
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.9832
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.022200
epoch2, lossVariable containing:
 0.8357
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8277
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.005800
epoch3, lossVariable containing:
 0.7187
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.7435
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011400
epoch4, lossVariable containing:
 0.6420
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

In [24]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.3, amsgrad=True)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.5183
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2324
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.021400
epoch1, lossVariable containing:
 1.0536
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.9876
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.022000
epoch2, lossVariable containing:
 0.8667
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8873
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.014200
epoch3, lossVariable containing:
 0.7541
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.7527
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011700
epoch4, lossVariable containing:
 0.6842
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

In [26]:
# learning_rate = 6e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# TEST_Acc = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.5, amsgrad=True)
# LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.5818
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2862
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.009000
epoch1, lossVariable containing:
 1.1629
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.0834
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.022500
epoch2, lossVariable containing:
 0.9596
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.9240
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.007900
epoch3, lossVariable containing:
 0.8386
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8606
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.003300
epoch4, lossVariable containing:
 0.7649
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

In [25]:
# learning_rate = 1e-4
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# TEST_Acc = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.3, amsgrad=True)
# LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.4528
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.1747
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.015200
epoch1, lossVariable containing:
 1.0529
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.9826
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.002500
epoch2, lossVariable containing:
 0.8919
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8673
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.000600
epoch3, lossVariable containing:
 0.7935
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8222
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.010400
epoch4, lossVariable containing:
 0.7463
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

In [30]:
# learning_rate = 5e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()

KeyboardInterrupt: 

In [14]:
# learning_rate = 4e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()

epoch0, lossVariable containing:
 1.4387
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9976
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.7389
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4958
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2694
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1239
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  5.3022
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  2.3029
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  1.3496
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  8.2493
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [15]:
# learning_rate = 3e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()

epoch0, lossVariable containing:
 1.5133
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 1.1000
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.8518
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.6116
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.3754
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1989
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  9.3962
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  4.7172
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  2.6213
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  1.6869
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


Adam

In [27]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.3, amsgrad=False)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.5102
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2209
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.009200
epoch1, lossVariable containing:
 1.0599
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.9607
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.012800
epoch2, lossVariable containing:
 0.8640
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8654
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.024800
epoch3, lossVariable containing:
 0.7568
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8255
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.012600
epoch4, lossVariable containing:
 0.6856
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

In [28]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.4, amsgrad=False)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.5454
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2125
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.010200
epoch1, lossVariable containing:
 1.0935
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.0395
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.006000
epoch2, lossVariable containing:
 0.9070
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.9144
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.010500
epoch3, lossVariable containing:
 0.7877
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8214
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.014700
epoch4, lossVariable containing:
 0.7214
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

In [29]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.2, amsgrad=False)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.4784
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.1815
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.014700
epoch1, lossVariable containing:
 1.0455
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.9942
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.013200
epoch2, lossVariable containing:
 0.8522
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8351
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.002500
epoch3, lossVariable containing:
 0.7291
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.7927
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.020800
epoch4, lossVariable containing:
 0.6514
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

In [30]:
max_epoch = 50

In [31]:
# learning_rate = 6e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# TEST_Acc = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.4, amsgrad=False)
# LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.5556
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2139
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.013400
epoch1, lossVariable containing:
 1.0915
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.0190
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.017400
epoch2, lossVariable containing:
 0.9137
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8617
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.001200
epoch3, lossVariable containing:
 0.7977
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8162
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.001800
epoch4, lossVariable containing:
 0.7221
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

KeyboardInterrupt: 

In [33]:
# learning_rate = 6e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# TEST_Acc = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.8, amsgrad=False)
# LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.6386
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.3364
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.025500
epoch1, lossVariable containing:
 1.1931
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.1296
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.003500
epoch2, lossVariable containing:
 1.0328
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.9860
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011100
epoch3, lossVariable containing:
 0.9128
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8906
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.001800
epoch4, lossVariable containing:
 0.8372
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

KeyboardInterrupt: 

In [34]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.6, amsgrad=False)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.6091
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.3274
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.021700
epoch1, lossVariable containing:
 1.1801
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.0966
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.016400
epoch2, lossVariable containing:
 0.9945
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.9390
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.024500
epoch3, lossVariable containing:
 0.8654
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8358
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.019300
epoch4, lossVariable containing:
 0.7924
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

epoch36, lossVariable containing:
 1.4480
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.5606
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.022900
epoch37, lossVariable containing:
 1.5693
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.6718
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.005500
epoch38, lossVariable containing:
 1.7003
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.7991
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.022900
epoch39, lossVariable containing:
 1.8385
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.9238
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.009700
epoch40, lossVariable containing:
 1.9698
[torch.cuda.FloatTensor of size 1 (GPU 0)]

te

In [35]:
sio.savemat('adam_res_gen6.mat',{'loss':LOSS, 'test_loss':Test_LOSS, 'test_acc':Test_Acc})

In [36]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.7, amsgrad=False)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.6283
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.3517
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.006900
epoch1, lossVariable containing:
 1.1905
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.1275
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.003400
epoch2, lossVariable containing:
 1.0000
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.9999
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.002300
epoch3, lossVariable containing:
 0.8791
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8537
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.014900
epoch4, lossVariable containing:
 0.8022
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

epoch36, lossVariable containing:
 1.5961
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.7015
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.004700
epoch37, lossVariable containing:
 1.7226
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.8126
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.013400
epoch38, lossVariable containing:
 1.8526
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.9335
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.000300
epoch39, lossVariable containing:
 1.9776
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.0483
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.024700
epoch40, lossVariable containing:
 2.0904
[torch.cuda.FloatTensor of size 1 (GPU 0)]

te

In [37]:
sio.savemat('adam_res_gen7.mat',{'loss':LOSS, 'test_loss':Test_LOSS, 'test_acc':Test_Acc})

In [38]:
# learning_rate = 6e-6
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# TEST_Acc = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.4, amsgrad=False)
# LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 2.1447
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.9494
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.013000
epoch1, lossVariable containing:
 1.7880
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.6589
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.023200
epoch2, lossVariable containing:
 1.5619
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.4829
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.006700
epoch3, lossVariable containing:
 1.3894
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.3433
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.005100
epoch4, lossVariable containing:
 1.2468
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

KeyboardInterrupt: 

In [39]:
# learning_rate = 3e-6
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# TEST_Acc = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.3, amsgrad=False)
# LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 2.2263
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.1082
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.015900
epoch1, lossVariable containing:
 1.9870
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.8871
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.007200
epoch2, lossVariable containing:
 1.8153
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.7496
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.018000
epoch3, lossVariable containing:
 1.6876
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.6318
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011000
epoch4, lossVariable containing:
 1.5718
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

KeyboardInterrupt: 

In [40]:
learning_rate = 1e-4
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.1, amsgrad=False)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 1.3574
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.0729
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.005200
epoch1, lossVariable containing:
 0.9455
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.8893
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.000200
epoch2, lossVariable containing:
 0.7811
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.7924
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011000
epoch3, lossVariable containing:
 0.6825
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 0.7339
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.010000
epoch4, lossVariable containing:
 0.6183
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

KeyboardInterrupt: 

In [41]:
learning_rate = 1e-6
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.3, amsgrad=False)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 2.2795
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.2377
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011800
epoch1, lossVariable containing:
 2.2019
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.1625
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.005000
epoch2, lossVariable containing:
 2.1280
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.0905
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.015000
epoch3, lossVariable containing:
 2.0573
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.0203
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.021000
epoch4, lossVariable containing:
 1.9854
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

epoch36, lossVariable containing:
 0.9743
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2501
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.023500
epoch37, lossVariable containing:
 0.9482
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2437
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.023500
epoch38, lossVariable containing:
 0.9211
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2456
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.025100
epoch39, lossVariable containing:
 0.8988
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2406
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.025300
epoch40, lossVariable containing:
 0.8714
[torch.cuda.FloatTensor of size 1 (GPU 0)]

te

In [42]:
sio.savemat('adam_res_gen.mat',{'loss':LOSS, 'test_loss':Test_LOSS, 'test_acc':Test_Acc})

Adam temporarily uses the above result

In [47]:
max_epoch=100

In [48]:
learning_rate = 9e-7
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.3, amsgrad=False)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 2.3181
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.2909
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.024600
epoch1, lossVariable containing:
 2.2621
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.2322
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.007900
epoch2, lossVariable containing:
 2.1859
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.1406
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011600
epoch3, lossVariable containing:
 2.0908
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.0543
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.000900
epoch4, lossVariable containing:
 2.0177
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

epoch36, lossVariable containing:
 1.0371
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2497
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.019100
epoch37, lossVariable containing:
 1.0137
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2432
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.021000
epoch38, lossVariable containing:
 0.9915
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2440
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.021600
epoch39, lossVariable containing:
 0.9653
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2381
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.025200
epoch40, lossVariable containing:
 0.9423
[torch.cuda.FloatTensor of size 1 (GPU 0)]

te

KeyboardInterrupt: 

In [49]:
learning_rate = 8e-7
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.3, amsgrad=False)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 2.2920
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.2725
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.003200
epoch1, lossVariable containing:
 2.2521
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.2337
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.014700
epoch2, lossVariable containing:
 2.2108
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.1868
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.010200
epoch3, lossVariable containing:
 2.1541
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.1198
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.013100
epoch4, lossVariable containing:
 2.0802
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

epoch36, lossVariable containing:
 1.1549
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2778
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.008800
epoch37, lossVariable containing:
 1.1336
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2700
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011300
epoch38, lossVariable containing:
 1.1149
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2634
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.014900
epoch39, lossVariable containing:
 1.0960
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2555
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.020700
epoch40, lossVariable containing:
 1.0756
[torch.cuda.FloatTensor of size 1 (GPU 0)]

te

epoch72, lossVariable containing:
 0.4512
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2794
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.004200
epoch73, lossVariable containing:
 0.4345
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2905
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.004000
epoch74, lossVariable containing:
 0.4218
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2877
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.000600
epoch75, lossVariable containing:
 0.4036
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2898
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.001300
epoch76, lossVariable containing:
 0.3912
[torch.cuda.FloatTensor of size 1 (GPU 0)]

te

In [50]:
sio.savemat('adam_res_gen8.mat',{'loss':LOSS, 'test_loss':Test_LOSS, 'test_acc':Test_Acc})

##  adam's best

In [10]:
max_epoch=100

In [11]:
learning_rate = 7e-7
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.3, amsgrad=False)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 2.2980
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.2682
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.013400
epoch1, lossVariable containing:
 2.2383
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.2103
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.012300
epoch2, lossVariable containing:
 2.1806
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.1541
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.020500
epoch3, lossVariable containing:
 2.1258
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.1008
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.006400
epoch4, lossVariable containing:
 2.0722
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

epoch36, lossVariable containing:
 1.2447
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.3298
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011700
epoch37, lossVariable containing:
 1.2273
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.3202
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.014500
epoch38, lossVariable containing:
 1.2106
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.3123
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.018800
epoch39, lossVariable containing:
 1.1946
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.3027
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.023400
epoch40, lossVariable containing:
 1.1759
[torch.cuda.FloatTensor of size 1 (GPU 0)]

te

epoch72, lossVariable containing:
 0.6075
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2533
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.005700
epoch73, lossVariable containing:
 0.5929
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2532
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.001500


KeyboardInterrupt: 

In [None]:
sio.savemat('adam_res_gen77.mat',{'loss':LOSS, 'test_loss':Test_LOSS, 'test_acc':Test_Acc})

AmsGrad

In [43]:
learning_rate = 1e-6
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.3, amsgrad=True)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 2.3233
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.2876
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.004000
epoch1, lossVariable containing:
 2.2440
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.1998
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.024000
epoch2, lossVariable containing:
 2.1409
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.0872
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.006500
epoch3, lossVariable containing:
 2.0406
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.0027
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.014400
epoch4, lossVariable containing:
 1.9683
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

epoch36, lossVariable containing:
 0.9907
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2160
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.008100
epoch37, lossVariable containing:
 0.9688
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2150
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.008800
epoch38, lossVariable containing:
 0.9472
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2104
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.012300
epoch39, lossVariable containing:
 0.9232
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2072
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011400
epoch40, lossVariable containing:
 0.9040
[torch.cuda.FloatTensor of size 1 (GPU 0)]

te

In [44]:
sio.savemat('amsgrad_res_gen.mat',{'loss':LOSS, 'test_loss':Test_LOSS, 'test_acc':Test_Acc})

In [51]:
learning_rate = 8e-7
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.3, amsgrad=True)
LOSS, Test_LOSS, Test_Acc= cifartrain()

epoch0, lossVariable containing:
 2.2992
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.2770
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.016400
epoch1, lossVariable containing:
 2.2550
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.2320
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.006800
epoch2, lossVariable containing:
 2.2015
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.1676
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.020900
epoch3, lossVariable containing:
 2.1247
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 2.0846
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.012700
epoch4, lossVariable containing:
 2.0461
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test lo

epoch36, lossVariable containing:
 1.1595
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2855
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.006500
epoch37, lossVariable containing:
 1.1404
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2786
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011600
epoch38, lossVariable containing:
 1.1258
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2730
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.011900
epoch39, lossVariable containing:
 1.1052
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2672
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.015600
epoch40, lossVariable containing:
 1.0880
[torch.cuda.FloatTensor of size 1 (GPU 0)]

te

epoch72, lossVariable containing:
 0.5003
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2911
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.001100
epoch73, lossVariable containing:
 0.4858
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2922
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.000900
epoch74, lossVariable containing:
 0.4699
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.2945
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.002500
epoch75, lossVariable containing:
 0.4598
[torch.cuda.FloatTensor of size 1 (GPU 0)]

test loss:Variable containing:
 1.3045
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Accuracy of the network on the 10000 test images: 0.024100
epoch76, lossVariable containing:
 0.4387
[torch.cuda.FloatTensor of size 1 (GPU 0)]

te

In [52]:
sio.savemat('amsgrad_res_gen8.mat',{'loss':LOSS, 'test_loss':Test_LOSS, 'test_acc':Test_Acc})

## amsgrad's best

In [None]:
learning_rate = 7e-7
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
TEST_Acc = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay=0.3, amsgrad=True)
LOSS, Test_LOSS, Test_Acc= cifartrain()

In [None]:
sio.savemat('amsgrad_res_gen77.mat',{'loss':LOSS, 'test_loss':Test_LOSS, 'test_acc':Test_Acc})

### Longer Training

In [12]:
max_epoch=100

In [13]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
LOSS, Test_LOSS= cifartrain()
sio.savemat('amsgrad_res.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.3702
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9068
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6575
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4340
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2329
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1203
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  5.4533
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  2.4356
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-03 *
  9.6874
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  4.9833
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-03 *
  3.4243
[torch.cuda.FloatTensor of size 1

epoch83, lossVariable containing:
1.00000e-04 *
  1.9236
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-04 *
  1.9162
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-04 *
  1.9710
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-04 *
  1.9420
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-04 *
  1.8586
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-04 *
  1.8672
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-04 *
  1.8335
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-04 *
  1.7722
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-04 *
  1.7807
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch92, lossVariable containing:
1.00000e-04 *
  1.7081
[torch.cuda.FloatTensor of size 1 

In [14]:
# learning_rate = 5e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=True)
# LOSS, Test_LOSS= cifartrain()
# sio.savemat('amsgrad_res1.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.3858
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9398
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6760
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4355
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2204
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1019
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  4.4211
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  1.9033
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-03 *
  8.5368
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  5.4792
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-03 *
  4.0852
[torch.cuda.FloatTensor of size 1

epoch83, lossVariable containing:
1.00000e-04 *
  2.5994
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-04 *
  2.5262
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-04 *
  2.5318
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-04 *
  2.3552
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-04 *
  2.6358
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-04 *
  2.4442
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-04 *
  2.3429
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-04 *
  2.3568
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-04 *
  2.3070
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch92, lossVariable containing:
1.00000e-04 *
  2.3768
[torch.cuda.FloatTensor of size 1 

In [17]:
# learning_rate = 6e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = Adam(net.parameters(), lr=learning_rate, betas=(0.9, 0.99), amsgrad=True)
# LOSS, Test_LOSS= cifartrain()
# sio.savemat('amsgrad_res2.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.3698
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9132
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6540
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4146
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2094
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
1.00000e-02 *
  9.4387
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  3.9940
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  1.7212
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-03 *
  9.6116
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  6.2651
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-03 *
  4.9588
[torch.cuda.FloatT

epoch82, lossVariable containing:
1.00000e-04 *
  3.2641
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch83, lossVariable containing:
1.00000e-04 *
  3.2866
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-04 *
  3.0632
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-04 *
  3.0229
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-04 *
  3.0981
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-04 *
  3.1734
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-04 *
  3.0203
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-04 *
  2.8576
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-04 *
  3.0485
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-04 *
  2.8847
[torch.cuda.FloatTensor of size 1 

In [18]:
learning_rate = 5.5e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
LOSS, Test_LOSS= cifartrain()
sio.savemat('nosadam_res2.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.3817
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9280
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6726
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4438
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2349
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1129
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  5.3407
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  2.7391
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  1.4453
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-03 *
  6.5580
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-03 *
  3.6362
[torch.cuda.FloatTensor of size 1

epoch83, lossVariable containing:
1.00000e-05 *
  9.2639
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-05 *
  9.1441
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-05 *
  9.6740
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-05 *
  8.6898
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-05 *
  8.3940
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-05 *
  8.4343
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-05 *
  8.0498
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-05 *
  8.0564
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-05 *
  8.1853
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch92, lossVariable containing:
1.00000e-05 *
  7.5345
[torch.cuda.FloatTensor of size 1 

In [19]:
# learning_rate = 4.5e-5
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
# LOSS, Test_LOSS= cifartrain()
# sio.savemat('nosadam_res3.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.4180
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9779
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.7326
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4975
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2847
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1392
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  6.2383
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  3.1329
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  1.4930
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  1.0283
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-03 *
  5.3139
[torch.cuda.FloatTensor of size 1

KeyboardInterrupt: 

# Adam

In [8]:
max_epoch=10

In [9]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=False)
LOSS, Test_LOSS= cifartrain()
# sio.savemat('adam_res.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.3776
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9123
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.6554
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4343
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2325
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1172
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  6.4895
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  4.8780
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  6.5994
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  7.0446
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [10]:
learning_rate = 5e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=False)
LOSS, Test_LOSS= cifartrain()
# sio.savemat('adam_res.mat',{'loss':LOSS})

KeyboardInterrupt: 

In [11]:
learning_rate = 4e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=False)
LOSS, Test_LOSS= cifartrain()
# sio.savemat('adam_res.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.4502
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 1.0165
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.7631
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.5216
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2969
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1413
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  6.9286
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  4.2577
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  3.5951
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  4.9119
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Finished Training


In [12]:
max_epoch=100

In [13]:
learning_rate = 4e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate, amsgrad=False)
LOSS, Test_LOSS= cifartrain()
sio.savemat('adam_res1.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.4323
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9892
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.7245
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4780
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2504
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1129
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  5.0909
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  3.0962
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  4.1710
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  7.2261
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-02 *
  4.8190
[torch.cuda.FloatTensor of size 1

epoch83, lossVariable containing:
1.00000e-03 *
  5.8893
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-02 *
  1.6204
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-02 *
  1.7327
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-02 *
  1.2838
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-03 *
  5.0789
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-03 *
  2.4896
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-03 *
  6.8553
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-02 *
  2.2075
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-02 *
  1.1914
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch92, lossVariable containing:
1.00000e-03 *
  7.8539
[torch.cuda.FloatTensor of size 1 

In [15]:
learning_rate = 4e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = Adam(net.parameters(), lr=learning_rate,betas=(0.9, 0.99), amsgrad=False)
LOSS, Test_LOSS= cifartrain()
sio.savemat('adam_res2.mat',{'loss':LOSS})

epoch0, lossVariable containing:
 1.4254
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch1, lossVariable containing:
 0.9677
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch2, lossVariable containing:
 0.7123
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch3, lossVariable containing:
 0.4668
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch4, lossVariable containing:
 0.2510
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch5, lossVariable containing:
 0.1298
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch6, lossVariable containing:
1.00000e-02 *
  7.8508
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch7, lossVariable containing:
1.00000e-02 *
  6.2696
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch8, lossVariable containing:
1.00000e-02 *
  5.4074
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch9, lossVariable containing:
1.00000e-02 *
  4.7083
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch10, lossVariable containing:
1.00000e-02 *
  4.7992
[torch.cuda.FloatTensor of size 1

epoch83, lossVariable containing:
1.00000e-02 *
  1.0707
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch84, lossVariable containing:
1.00000e-03 *
  9.9261
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch85, lossVariable containing:
1.00000e-03 *
  8.5545
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch86, lossVariable containing:
1.00000e-02 *
  1.0604
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch87, lossVariable containing:
1.00000e-03 *
  9.6010
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch88, lossVariable containing:
1.00000e-03 *
  9.1138
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch89, lossVariable containing:
1.00000e-03 *
  9.7684
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch90, lossVariable containing:
1.00000e-02 *
  1.0246
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch91, lossVariable containing:
1.00000e-03 *
  9.9778
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch92, lossVariable containing:
1.00000e-03 *
  8.9885
[torch.cuda.FloatTensor of size 1 

## AdaStab

In [None]:
# learning_rate = 1e-4
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
# LOSS, Test_LOSS= cifartrain()

In [None]:
# learning_rate = 2e-4
# net = ResNet18()
# net = net.cuda()
# criterion = nn.CrossEntropyLoss()
# LOSS = np.zeros([1, max_epoch])
# Test_LOSS = np.zeros([1, max_epoch])
# optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
# LOSS, Test_LOSS= cifartrain()

In [None]:
learning_rate = 5e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
LOSS, Test_LOSS= cifartrain()

In [None]:
learning_rate = 6e-5
net = ResNet18()
net = net.cuda()
criterion = nn.CrossEntropyLoss()
LOSS = np.zeros([1, max_epoch])
Test_LOSS = np.zeros([1, max_epoch])
optimizer = AdaStab(net.parameters(), lr=learning_rate, gamma=0.05)
LOSS, Test_LOSS= cifartrain()