In [2]:
from __future__ import print_function

import numpy as np
from chainer.cuda import cupy as cp
import chainer
import chainer.links as L
from chainer import training
from chainer.training import extensions
from copy import deepcopy
from chainer.datasets import get_cifar10
from linalg import maxpercentile, rank_maxpercentile, rank_minpercentile
import time
import net
import augmentation
import pdb

def conv_distortion(databatch, device, test=False):
   """converter a cifar datasethez: elvegzi az augmentaciot es meghagyja a cimkeket"""
   batchsize = len(databatch)
   inputdata = [datatuple[0] for datatuple in databatch]
   labels = np.array([datatuple[1] for datatuple in databatch])
   if device>=0:
      inputdata = chainer.cuda.to_gpu(inputdata, device)
      labels = chainer.cuda.to_gpu(labels, device)
   distorted_input = augmentation.distortion_batch(inputdata, device, test=test)
   return (distorted_input, labels)   

def grads (optimizer, data, label, mins=None, ranges=None, xp=np, percentage=15, exponent=0.5):
   opt = deepcopy (optimizer)
   model = opt.target
   net = opt.target.predictor
   model.cleargrads ()
   opt.update (model, data, label)
   listoflinks = list(net.links(skipself=True))
   gradients = xp.concatenate([maxpercentile(xp.ravel(link.W.grad), 15, xp) for link in listoflinks])
   gradients = xp.ndarray.astype (gradients, xp.float32)
   # scale norm + power norm:
   normalized = xp.divide ((gradients-mins), xp.where (ranges!=0, ranges, 1))
   n = xp.where (gradients!=0, normalized, 0)
   out = xp.sign(n)*xp.power(xp.abs(n), exponent)
   return out

def grad_gen_new (updater, iterator, mins=None, ranges=None, test=False, xp=np, realbatchsize=25, percentage=15, exponent=0.125):
   for (i, batch)  in enumerate (iterator):
      batchsize=len (batch)
      in_arr, truelabels = updater.converter (batch, updater.device)
      random_labels = xp.random.randint (10, size=(batchsize,))
      optimizer = updater._optimizers ['main']
      gradients = xp.concatenate(([xp.expand_dims(grads(optimizer, xp.expand_dims(in_arr[j], 0), xp.expand_dims(random_labels[j], 0), mins, ranges, xp, percentage, exponent), axis=0) for j in range(batchsize)]))
      gradients = xp.ndarray.astype (gradients, xp.float32)
      for k in range (int (batchsize/realbatchsize)):
         g = gradients [k*realbatchsize:(k+1)* realbatchsize, :]
         o = truelabels [k*realbatchsize:(k+1)*realbatchsize]
         yield g, o

batchsize=50
learnrate=0.05
out="result"
epoch=200
gpu=0
original=23047
normalization="std"
percentage=15
exponent=0.5
sizes = [5, 100, 25]

train, test = get_cifar10()

model_old = L.Classifier(net.cnn_cifar())

optimizer_old = chainer.optimizers.SGD(learnrate)
optimizer_old.setup(model_old)
optimizer_old.add_hook(chainer.optimizer.WeightDecay(5e-4))
    
train_iter_old = chainer.iterators.SerialIterator (train [0:int (len (train)/2) ], 1, repeat=False, shuffle=False)
test_iter_old = chainer.iterators.SerialIterator(test, batchsize,
                                                repeat=False, shuffle=False)
# Set up a trainer
updater_old = training.StandardUpdater(train_iter_old, optimizer_old, converter=conv_distortion, device=gpu)
trainer_old = training.Trainer(updater_old, (1000, 'epoch'), out=out)

chainer.serializers.load_npz ('./result_original/snapshot_iter_{}'.format(original), trainer_old, strict=False)

if gpu >= 0:
   # Make a specified GPU current
   chainer.cuda.get_device_from_id(gpu).use()
   model_old.to_gpu(gpu)  # Copy the model to the GPU
   xp = cp
else:
   xp = np

mins=xp.load("CNNgradmin_{}.npy".format(original))
maxes=xp.load("CNNgradmax_{}.npy".format(original))
ranges = maxes-mins

linksizes = [link.W.size for link in list(model_old.predictor.links(skipself=True))]
dividers = xp.cumsum(xp.array(linksizes))[:-1].tolist()

model=net.gradnet(input_dividers = dividers, middle_sizes = sizes)

optimizer = chainer.optimizers.SGD()
optimizer.setup(model)

if gpu >=0 :
   chainer.cuda.get_device_from_id(gpu).use()
   model.to_gpu(gpu)

train_iter_new = chainer.iterators.SerialIterator (train [int (len (train)/2)  : len (train)], 25, repeat=True, shuffle=True)
gg = grad_gen_new (updater_old, train_iter_new, mins, ranges, test=False, xp=xp, realbatchsize=25, percentage=percentage, exponent=exponent)

t0=time.time()
while train_iter_new.epoch < epoch:
   grads_train, target_train = gg.__next__()
   grads_train = chainer.Variable(grads_train)
   target_train = chainer.Variable(target_train)
   if gpu >=0:
      grads_train.to_gpu(gpu)
      target_train.to_gpu(gpu)

   # Calculate the prediction of the network
   prediction_train = model(grads_train)
   # Calculate the loss with softmax_cross_entropy
   loss = chainer.functions.softmax_cross_entropy(prediction_train, target_train)
   # Calculate the gradients in the network
   model.cleargrads()
   loss.backward()
   # Update all the trainable paremters
   optimizer.update()
   if optimizer.t%50==0:
      print(optimizer.t)
                      
   if optimizer.t%500==0:
      print (optimizer.t)
      chainer.serializers.save_npz('gradnet_{}_{}_{}_{}_{}/snapshot_iter_{}'.format(original, normalization, s1, s2, s3, optimizer.t), model)
      t1=time.time()
      print("time:", t1-t0)

50
100


KeyboardInterrupt: 