Skip to content
Permalink
Browse files

newest

  • Loading branch information
blackfeather-wang committed Sep 26, 2019
1 parent 7828c49 commit 2bb3dbba5e67dc84615ad026cd03da158c84166a
Showing with 118 additions and 23 deletions.
  1. BIN .DS_Store
  2. BIN ISDA-cifar-2.png
  3. +11 −2 README.md
  4. +84 −0 networks/shake_shake.py
  5. +23 −21 train.py
BIN +0 Bytes (100%) .DS_Store
Binary file not shown.
BIN -87.6 KB (70%) ISDA-cifar-2.png
Binary file not shown.
@@ -33,6 +33,15 @@ CUDA_VISIBLE_DEVICES=0 python train.py --dataset cifar100 --model wideresnet --l
```

Train Wide-ResNet-28-10 on CIFAR-10 / 100 with ISDA and AutoAugment

```
CUDA_VISIBLE_DEVICES=0 python train.py --dataset cifar10 --model wideresnet --layers 28 --widen-factor 10 --combine-ratio 0.5 --droprate 0.3 --cos_lr --autoaugment
CUDA_VISIBLE_DEVICES=0 python train.py --dataset cifar100 --model wideresnet --layers 28 --widen-factor 10 --combine-ratio 0.5 --droprate 0.3 --cos_lr --autoaugment
```


Train Shake-Shake(26, 2x112d) on CIFAR-10 / 100 with ISDA and AutoAugment

```
@@ -77,8 +86,8 @@ fc = Full_layer(model.feature_num, class_num)
The model needs to output deep features instead of inference results:

```python
optimizer = torch.optim.SGD([{'params_model': model.parameters()},
{'params_fc': fc.parameters()}],
optimizer = torch.optim.SGD([{'params': model.parameters()},
{'params': fc.parameters()}],
......)
......
from ISDA import ISDALoss
@@ -1,6 +1,7 @@
# -*-coding:utf-8-*-

import torch
import math
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
@@ -140,3 +141,86 @@ def shake_resnet26_2x64d(num_classes):

def shake_resnet26_2x112d(num_classes):
return ShakeResNet(depth=26, base_width=112, num_classes=num_classes)



class ShakeBottleNeck(nn.Module):

def __init__(self, in_ch, mid_ch, out_ch, cardinary, stride=1):
super(ShakeBottleNeck, self).__init__()
self.equal_io = in_ch == out_ch
self.shortcut = None if self.equal_io else Shortcut(in_ch, out_ch, stride=stride)

self.branch1 = self._make_branch(in_ch, mid_ch, out_ch, cardinary, stride)
self.branch2 = self._make_branch(in_ch, mid_ch, out_ch, cardinary, stride)

def forward(self, x):
h1 = self.branch1(x)
h2 = self.branch2(x)
h = ShakeShake.apply(h1, h2, self.training)
h0 = x if self.equal_io else self.shortcut(x)
return h + h0

def _make_branch(self, in_ch, mid_ch, out_ch, cardinary, stride=1):
return nn.Sequential(
nn.Conv2d(in_ch, mid_ch, 1, padding=0, bias=False),
nn.BatchNorm2d(mid_ch),
nn.ReLU(inplace=False),
nn.Conv2d(mid_ch, mid_ch, 3, padding=1, stride=stride, groups=cardinary, bias=False),
nn.BatchNorm2d(mid_ch),
nn.ReLU(inplace=False),
nn.Conv2d(mid_ch, out_ch, 1, padding=0, bias=False),
nn.BatchNorm2d(out_ch))


class ShakeResNeXt(nn.Module):

def __init__(self, depth, w_base, cardinary, label):
super(ShakeResNeXt, self).__init__()
n_units = (depth - 2) // 9
n_chs = [64, 128, 256, 1024]
self.n_chs = n_chs
self.in_ch = n_chs[0]

self.c_in = nn.Conv2d(3, n_chs[0], 3, padding=1)
self.layer1 = self._make_layer(n_units, n_chs[0], w_base, cardinary)
self.layer2 = self._make_layer(n_units, n_chs[1], w_base, cardinary, 2)
self.layer3 = self._make_layer(n_units, n_chs[2], w_base, cardinary, 2)
self.feature_num = n_chs[3]
# self.fc_out = nn.Linear(n_chs[3], label)

# Initialize paramters
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.bias.data.zero_()

def forward(self, x):
h = self.c_in(x)
h = self.layer1(h)
h = self.layer2(h)
h = self.layer3(h)
h = F.relu(h)
h = F.avg_pool2d(h, 8)
h = h.view(-1, self.n_chs[3])
# h = self.fc_out(h)
return h

def _make_layer(self, n_units, n_ch, w_base, cardinary, stride=1):
layers = []
mid_ch, out_ch = n_ch * (w_base // 64) * cardinary, n_ch * 4
for i in range(n_units):
layers.append(ShakeBottleNeck(self.in_ch, mid_ch, out_ch, cardinary, stride=stride))
self.in_ch, stride = out_ch, 1
return nn.Sequential(*layers)


def shake_resnext29_2x4x64d(num_classes):
return ShakeResNeXt(depth=29, w_base=64, cardinary=4, label=num_classes)


@@ -147,10 +147,10 @@
'weight_decay': 5e-4,
},
'densenet_bc': {
'epochs': 350,
'epochs': 300,
'batch_size': 64,
'initial_learning_rate': 0.1,
'changing_lr': [150, 225, 300],
'changing_lr': [150, 200, 250],
'lr_decay_rate': 0.1,
'momentum': 0.9,
'nesterov': True,
@@ -176,11 +176,7 @@
'nesterov': True,
'weight_decay': 5e-4,
},
}


if args.dataset == 'cifar10':
training_configurations['shake_shake'] = {
'shake_shake': {
'epochs': 1800,
'batch_size': 64,
'initial_learning_rate': 0.1,
@@ -189,18 +185,20 @@
'momentum': 0.9,
'nesterov': True,
'weight_decay': 1e-4,
}
elif args.dataset == 'cifar100':
training_configurations['shake_shake'] = {
},
'shake_shake_x': {
'epochs': 1800,
'batch_size': 128,
'initial_learning_rate': 0.2,
'batch_size': 64,
'initial_learning_rate': 0.1,
'changing_lr': [],
'lr_decay_rate': 0.1,
'momentum': 0.9,
'nesterov': True,
'weight_decay': 1e-4,
}
},
}



record_path = './ISDA test/' + str(args.dataset) \
+ '_' + str(args.model) \
@@ -241,14 +239,14 @@ def main():

if args.augment:
if args.autoaugment:
print('AutoAugment!')
print('Autoaugment')
transform_train = transforms.Compose([
transforms.ToTensor(),
transforms.Lambda(lambda x: F.pad(x.unsqueeze(0),
(4, 4, 4, 4), mode='reflect').squeeze()),
transforms.ToPILImage(),
transforms.RandomCrop(32), CIFAR10Policy(),
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32),
transforms.RandomHorizontalFlip(), CIFAR10Policy(),
transforms.ToTensor(),
Cutout(n_holes=args.n_holes, length=args.length),
normalize,
@@ -339,6 +337,10 @@ def main():
if args.widen_factor == 96:
model = networks.shake_shake.shake_resnet26_2x32d(class_num)

elif args.model == 'shake_shake_x':

model = networks.shake_shake.shake_resnext29_2x4x64d(class_num)

if not os.path.isdir(check_point):
mkdir_p(check_point)

@@ -414,7 +416,8 @@ def main():
np.savetxt(accuracy_file, np.array(val_acc))

print('Best accuracy: ', best_prec1)
val_acc.append(sum(val_acc[len(val_acc) - 10:]) / 10)
print('Average accuracy', sum(val_acc[len(val_acc) - 10:]) / 10)
# val_acc.append(sum(val_acc[len(val_acc) - 10:]) / 10)
# np.savetxt(val_acc, np.array(val_acc))
np.savetxt(accuracy_file, np.array(val_acc))

@@ -433,13 +436,12 @@ def train(train_loader, model, fc, criterion, optimizer, epoch):

end = time.time()
for i, (x, target) in enumerate(train_loader):
target = target.cuda(async=True)
target = target.cuda()
x = x.cuda()
input_var = torch.autograd.Variable(x)
target_var = torch.autograd.Variable(target)

# compute output

loss, output = criterion(model, fc, input_var, target_var, ratio)

# measure accuracy and record loss
@@ -487,7 +489,7 @@ def validate(val_loader, model, fc, criterion, epoch):

end = time.time()
for i, (input, target) in enumerate(val_loader):
target = target.cuda(async=True)
target = target.cuda()
input = input.cuda()
input_var = torch.autograd.Variable(input)
target_var = torch.autograd.Variable(target)
@@ -584,7 +586,7 @@ def update(self, val, n=1):


def adjust_learning_rate(optimizer, epoch):
"""Sets the learning rate to the initial LR divided by 5 at 60th, 120th and 160th epochs"""
"""Sets the learning rate"""
if not args.cos_lr:
if epoch in training_configurations[args.model]['changing_lr']:
for param_group in optimizer.param_groups:

0 comments on commit 2bb3dbb

Please sign in to comment.
You can’t perform that action at this time.