In [1]:
from torch.optim.lr_scheduler import _LRScheduler
# from .optimizer import Optimizer

class randomLR(_LRScheduler):


    def __init__(self, optimizer, base_lr, max_lr, step_size=100, last_epoch=-1):
        
        self.step_size = step_size    
        self.max_lrs = self._format_param('max_lr', optimizer, max_lr)
        super(randomLR, self).__init__(optimizer, last_epoch)
        self.base_lrs = self._format_param('base_lr', optimizer, base_lr)
        
        
    def _format_param(self, name, optimizer, param):
        """Return correctly formatted lr/momentum for each param group."""
        if isinstance(param, (list, tuple)):
            if len(param) != len(optimizer.param_groups):
                raise ValueError("expected {} values for {}, got {}".format(
                    len(optimizer.param_groups), name, len(param)))
            return param
        else:
            return [param] * len(optimizer.param_groups)

    def get_lr(self):
        if not self._get_lr_called_within_step:
            warnings.warn("To get the last learning rate computed by the scheduler, "
                          "please use `get_last_lr()`.", UserWarning)

        if (self.last_epoch == 0) or (self.last_epoch % self.step_size == 0):
            lrs = []
            for base_lr, max_lr in zip(self.base_lrs, self.max_lrs):
                lrs.append(np.random.uniform(base_lr, max_lr))
            return lrs
                
        return [group['lr'] for group in self.optimizer.param_groups]

In [2]:
from torch.optim.lr_scheduler import _LRScheduler
# from .optimizer import Optimizer

class MCLR(_LRScheduler):


    def __init__(self, optimizer, base_lr, max_lr, p=0.9, step_size=100, last_epoch=-1):
        
        self.step_size = step_size    
        self.max_lrs = self._format_param('max_lr', optimizer, max_lr)
#         self.base_lrs = self._format_param('base_lr', optimizer, base_lr)
        super(MCLR, self).__init__(optimizer, last_epoch)
        self.base_lrs = self._format_param('base_lr', optimizer, base_lr)
        self.onestep = (max_lr-base_lr)/step_size
        self.p = p
        self.flag = 0
        
        
    def _format_param(self, name, optimizer, param):
        """Return correctly formatted lr/momentum for each param group."""
        if isinstance(param, (list, tuple)):
            if len(param) != len(optimizer.param_groups):
                raise ValueError("expected {} values for {}, got {}".format(
                    len(optimizer.param_groups), name, len(param)))
            return param
        else:
            return [param] * len(optimizer.param_groups)

    def get_lr(self):
        if not self._get_lr_called_within_step:
            warnings.warn("To get the last learning rate computed by the scheduler, "
                          "please use `get_last_lr()`.", UserWarning)
            

            
        if self.last_epoch == 0:
            lrs = []
            for base_lr in self.base_lrs:
                lrs.append(base_lr)

            return lrs
        
            
        elif self.last_epoch == 1:
            lrs = []
            for base_lr in self.base_lrs:
                lrs.append(base_lr+self.onestep)
                
            return lrs
                
        else:
            
            lrs = []
            last_lr = optimizer.param_groups[0]['lr']
            
            
            if last_lr >= self.max_lrs[0]:
                self.p = min(self.p, 1-self.p)
                for max_lr in self.max_lrs:
                    lrs.append(max_lr-self.onestep)
                
            elif last_lr <= self.base_lrs[0]:
                self.p = max(self.p, 1-self.p)
                for base_lr in self.base_lrs:
                    lrs.append(base_lr+self.onestep)
                    
            else:
                tp = np.random.uniform(0,1)
                
                if tp <= self.p:
                    temp_lr = last_lr + self.onestep

                    for max_lr in self.max_lrs:
                        if temp_lr >= max_lr:
                            lrs.append(max_lr)
                        else:
                            lrs.append(temp_lr)


                        
                else:
                    temp_lr = last_lr - self.onestep

                    for base_lr in self.base_lrs:
                        
                        if temp_lr <= base_lr:
                            lrs.append(base_lr)
                        else:
                            lrs.append(temp_lr)

                        
            return lrs
                
            
        

#         if (self.last_epoch == 0) or (self.last_epoch % self.step_size == 0):
#             lrs = []
#             p = np.random.uniform(0,1)
#             for base_lr, max_lr in zip(self.base_lrs, self.max_lrs):
#                 p = np.random.uniform(0,1)
#                 lr = optimizer.param_groups[0]['lr']
#                 threshold = (lr-base_lr)/(max_lr-base_lr)
#                 if p>= threshold:
#                     lrs.append(np.random.uniform(lr, max_lr))
#                 else:
#                     lrs.append(np.random.uniform(base_lr, lr))
#             return lrs
                
#         return [group['lr'] for group in self.optimizer.param_groups]


In [9]:
from scipy.stats import levy_stable
import numpy as np
import torch
import math
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import optim
import os

PATH_base = './3FCN-MNIST-constant-1'

try:
    os.mkdir(PATH_base)
except OSError as exc:
    pass

In [10]:
# A simple FCN
class simpleNet(nn.Module):

    def __init__(self, input_dim=28*28 , width=128, depth=3, num_classes=10):
        super(simpleNet, self).__init__()
        self.input_dim = input_dim 
        self.width = width
        self.depth = depth
        self.num_classes = num_classes
        
        layers = self.get_layers()

        self.fc = nn.Sequential(
            nn.Linear(self.input_dim, self.width, bias=False),
            nn.ReLU(inplace=True),
            *layers,
            nn.Linear(self.width, self.num_classes, bias=False),
        )

    def get_layers(self):
        layers = []
        for i in range(self.depth - 2):
            layers.append(nn.Linear(self.width, self.width, bias=False))
            layers.append(nn.ReLU())
        return layers

    def forward(self, x):
        x = x.view(x.size(0), self.input_dim)
        x = self.fc(x)
        return x

In [11]:
batch_size= 100
data_tf = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize([0.5], [0.5])])
'''
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=data_tf, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=data_tf)
'''
train_dataset = datasets.MNIST(root='./data', train=True, transform=data_tf, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=data_tf)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

lr_list = [0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1]
epoch = 200

In [12]:
for i in range(len(lr_list)):
    learning_rate = lr_list[i]
    trainErrorList=[]
    trainAccList=[]
    
    PATH = PATH_base + '/LR' + '{}'.format(i)
    try:
        os.mkdir(PATH)
    except OSError as exc:
        pass
    
                
    model = simpleNet()
    if torch.cuda.is_available():
            model = model.cuda()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    
    iter_count = 0
    iter_name = 1
    for l in range(epoch):
        train_acc=0
        for data in train_loader:
            img, label = data
            # img=img.view(img.size(0),-1)
            if torch.cuda.is_available():
                img = img.cuda()
                label = label.cuda()
            else:
                img = Variable(img)
                label = Variable(label)
            out = model(img)
            loss = criterion(out, label)
            print_loss = loss.data.item()
            _, pred = torch.max(out.data, 1)
            train_acc += pred.eq(label.view_as(pred)).sum().item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            iter_count += 1
            if iter_count > 800000:
                break
            if iter_count > 799000:
                tmp_path = PATH + '/model' + '{}'.format(iter_name) +'.pth'
                torch.save(model, tmp_path)
                iter_name += 1
        if iter_count > 800000:
            break
        trainErrorList.append(loss.data.item())
        trainAccList.append(train_acc/60000)
    print(learning_rate)

0.02
0.03
0.04
0.05
0.06
0.07
0.08
0.09
0.1


In [13]:
trainAccList

[0.8359,
 0.9378333333333333,
 0.9555333333333333,
 0.9644333333333334,
 0.9698166666666667,
 0.9739166666666667,
 0.97775,
 0.9768666666666667,
 0.98145,
 0.9833333333333333,
 0.98565,
 0.9870833333333333,
 0.9876,
 0.9901333333333333,
 0.99025,
 0.9910166666666667,
 0.9923666666666666,
 0.9929333333333333,
 0.9937833333333334,
 0.9947,
 0.9953,
 0.9961833333333333,
 0.9955666666666667,
 0.997,
 0.9970333333333333,
 0.9978,
 0.99705,
 0.9982833333333333,
 0.9986,
 0.9991,
 0.9991833333333333,
 0.9994333333333333,
 0.9993166666666666,
 0.9995833333333334,
 0.9996833333333334,
 0.9997,
 0.9994833333333333,
 0.9997666666666667,
 0.9999166666666667,
 0.9998833333333333,
 0.99995,
 0.9999666666666667,
 0.9999666666666667,
 0.9999666666666667,
 0.9999666666666667,
 1.0,
 0.9999833333333333,
 0.9999833333333333,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.9999833333333333,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,