In [1]:
from __future__ import division

seed_value= 0

import os
os.environ['PYTHONHASHSEED']=str(seed_value)
import random
random.seed(seed_value)
import numpy as np
np.random.seed(seed_value)
import tensorflow as tf
tf.set_random_seed(seed_value)
from keras import backend as K
import os
import keras
import pickle
import os.path
#import matplotlib.pyplot as plt
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from keras.callbacks import LambdaCallback
from keras.callbacks import TensorBoard
from sklearn.preprocessing import LabelBinarizer
from keras import initializers
from keras.layers import Input
import keras.backend as K
from keras.models import Model
from keras import backend as K
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator


# Hyperparameters
batch_size = 128
num_classes = 10
epochs = 200


# Load CIFAR10 Data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()


# convert to one hot encoing 
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
K.set_image_dim_ordering('tf')

K.image_dim_ordering(), x_train.shape


def build_model(setseed):
    """
    Builds test Keras model for Alexnet CIFAR-10
    :param loss (str): Type of loss - must be one of Keras accepted keras losses
    :return: Keras dense model of predefined structure
    """
    input = Input(shape=[32, 32,3])
    conv1 = Conv2D(48, (3,3), strides=(2,2), activation='relu', padding='same', kernel_initializer=initializers.glorot_uniform(seed = setseed), bias_initializer=initializers.glorot_uniform(seed = setseed))(input)
    mp1 = MaxPooling2D(pool_size=(2, 2), strides=(2,2))(conv1)
    ban1 = BatchNormalization()(mp1)
    
    conv2 = Conv2D(96, (3,3), activation='relu', padding='same', kernel_initializer=initializers.glorot_uniform(seed = setseed), bias_initializer=initializers.glorot_uniform(seed = setseed))(ban1)
    mp2 = MaxPooling2D(pool_size=(3, 3), strides=(2,2))(conv2)
    ban2 = BatchNormalization()(mp2)    
    
    conv3 = Conv2D(192, (3,3), activation='relu', padding='same', kernel_initializer=initializers.glorot_uniform(seed = setseed), bias_initializer=initializers.glorot_uniform(seed = setseed))(ban2)
    conv4 = Conv2D(192, (3,3), activation='relu', padding='same', kernel_initializer=initializers.glorot_uniform(seed = setseed), bias_initializer=initializers.glorot_uniform(seed = setseed))(conv3)
    conv5 = Conv2D(256, (3,3), activation='relu', padding='same', kernel_initializer=initializers.glorot_uniform(seed = setseed), bias_initializer=initializers.glorot_uniform(seed = setseed))(conv4)
    
    mp3 = MaxPooling2D(pool_size=(3, 3), strides=(2,2))(conv5)
    ban3 = BatchNormalization()(mp3) 
    flat= Flatten()(ban3)
    dens1=Dense(512, activation='tanh', kernel_initializer=initializers.glorot_uniform(seed = setseed), bias_initializer=initializers.glorot_uniform(seed = setseed))(flat)
    drop1=Dropout(0.5)(dens1)
    dens2=Dense(256, activation='tanh', kernel_initializer=initializers.glorot_uniform(seed = setseed), bias_initializer=initializers.glorot_uniform(seed = setseed))(drop1)
    drop2=Dropout(0.5)(dens2)   
    probs=Dense(num_classes, activation='softmax', kernel_initializer=initializers.glorot_uniform(seed = setseed), bias_initializer=initializers.glorot_uniform(seed = setseed))(drop2)
    
    model = Model(input=input, output=probs)
    model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics = ['accuracy'])

    return model



all_model = [None,None,None]
losses = [None,None,None]

prediction=[]

all_score =[0,0,0]
gr=[]
wr=[]
xwr=[]

for i in range(3):
    np.random.seed(25+i)
    model = build_model(i+2)
    all_model[i]=model
    
for i in range(3):    
    weights = all_model[i].trainable_weights # weight tensors
    weights = [weight for weight in weights] # filter down weights tensors to only ones which are trainable
    gradients = all_model[i].optimizer.get_gradients(all_model[i].total_loss, weights) # gradient tensors
    gr.append(gradients)
    wr.append(weights)
    xweights = all_model[i].non_trainable_weights # weight tensors
    xweights = [weight for weight in xweights] # filter down weights tensors to only ones which are trainable
    xwr.append(xweights)

    losses[i]=all_model[i].total_loss
    prediction.append(all_model[i].output)
    
    
model.summary()

input_tensors = [all_model[0].inputs[0], # input data
                 all_model[0].sample_weights[0], # how much to weight each sample by
                 all_model[0].targets[0], # labels
                 K.learning_phase(), # train or test mode
                 all_model[1].inputs[0], # input data
                 all_model[1].sample_weights[0], # how much to weight each sample by
                 all_model[1].targets[0], # labels
                 all_model[2].inputs[0], # input data
                 all_model[2].sample_weights[0], # how much to weight each sample by
                 all_model[2].targets[0], # labels
                ]

minlos = K.argmin(losses)

grr=[]
for x in gr:
    for y in x:
        grr.append(y)

upd_test= K.function(inputs=input_tensors, outputs=[ losses[0], losses[1], losses[2], minlos, prediction[0], prediction[1], prediction[2] ])





grad_best=[]
grad_non0 = []
grad_non1 = []


weig_best=[]
weig_non0 = []
weig_non1 = []

xweig_best=[]
xweig_non0 = []
xweig_non1 = []




for i in range(len(gr[0])):
    gr_ck=tf.concat([gr[0][i],gr[1][i], gr[2][i]],0)
    newshape = (3, ) + (tuple(wr[0][i].shape))

    
    gr_ck2=tf.reshape(gr_ck, newshape)
    
    bb = gr_ck2[minlos]
    grad_best.append(bb)
    
    nbb0 = gr_ck2[0:minlos]                       #[0,enk) U (enk,] araliklarinin birlesimi bize nonbesti verecek
    nbb1 = gr_ck2[minlos+1:]                      #[0,enk) U (enk,] araliklarinin birlesimi bize nonbesti verecek
    nbc = tf.concat([nbb0,nbb1], 0)    
    nbc = tf.reshape(nbc, (-1,))
    newshape2 = (2, ) + (tuple(wr[0][i].shape))
    
    nbc2 = tf.reshape(nbc, newshape2) 
    nb0 = nbc2[0]
    nb1 = nbc2[1]
    grad_non0.append(nb0)
    grad_non1.append(nb1)
    

    wr_ck=tf.concat([wr[0][i],wr[1][i], wr[2][i]],0)
    
    newshape = (3, ) + (tuple(wr[0][i].shape))
    wr_ck2=tf.reshape(wr_ck, newshape) 
    bb2 = wr_ck2[minlos]
    weig_best.append(bb2)
    
    #wb = wr_ck[minlos]
    wnbb0 = wr_ck2[0:minlos]                       #[0,enk) U (enk,] araliklarinin birlesimi bize nonbesti verecek
    wnbb1 = wr_ck2[minlos+1:]                      #[0,enk) U (enk,] araliklarinin birlesimi bize nonbesti verecek
    wnbc = tf.concat([wnbb0,wnbb1],0)    
    wnbc = tf.reshape(wnbc, (-1,))
    newshape2 = (2, ) + (tuple(wr[0][i].shape))
    
    wnbc2 =tf.reshape(wnbc, newshape2)
    wnb0 = wnbc2[0]
    wnb1 = wnbc2[1]
    weig_non0.append(wnb0)
    weig_non1.append(wnb1)
    
    if i<len(xwr[0]):
        print (i)
        xwr_ck=tf.concat([xwr[0][i],xwr[1][i], xwr[2][i]], 0)

        newshape = (3, ) + (tuple(xwr[0][i].shape))
        
        xwr_ck2=tf.reshape(xwr_ck, newshape)  
        xbb2 = xwr_ck2[minlos]
        xweig_best.append(xbb2)

        #wb = wr_ck[minlos]
        xwnbb0 = xwr_ck2[0:minlos]                       #[0,enk) U (enk,] araliklarinin birlesimi bize nonbesti verecek
        xwnbb1 = xwr_ck2[minlos+1:]                      #[0,enk) U (enk,] araliklarinin birlesimi bize nonbesti verecek
        xwnbc = tf.concat([xwnbb0,xwnbb1], 0)    
        
        xwnbc = tf.reshape(xwnbc, (-1,))
        newshape2 = (2, ) + (tuple(xwr[0][i].shape))
         
        xwnbc2 = tf.reshape(xwnbc, newshape2) 
        xwnb0 = xwnbc2[0]
        xwnb1 = xwnbc2[1]
        xweig_non0.append(xwnb0)
        xweig_non1.append(xwnb1)
    else:
        pass

los=tf.stack([losses[0], losses[1], losses[2]])

newshape = (3, )
los2=tf.reshape(los, newshape) 
losbest = los2[minlos]

#wb = wr_ck[minlos]
los_0 = los2[0:minlos]                       #[0,enk) U (enk,] araliklarinin birlesimi bize nonbesti verecek
los_1 = los2[minlos+1:]                      #[0,enk) U (enk,] araliklarinin birlesimi bize nonbesti verecek
loswnbc = tf.concat([los_0,los_1],0)    
loswnbc = tf.reshape(loswnbc,(-1,))
newshape2 = (2, )

loswnbc2 = tf.reshape(loswnbc, newshape2)
losss0 = loswnbc2[0]
losss1 = loswnbc2[1]


eps = 1.5

mn0 = [tf.keras.backend.l2_normalize((best-nonbest)*(losbest-losss0)/tf.reduce_sum( tf.pow((best-nonbest),2)+eps))   for best, nonbest in zip(weig_best, weig_non0)]

mn1 = [tf.keras.backend.l2_normalize((best-nonbest)*(losbest-losss1)/tf.reduce_sum( tf.pow((best-nonbest),2)+eps))   for best, nonbest in zip(weig_best, weig_non1)]


lr=0.1

nCom0 = [non- lr* grad - lr/10 * mn for mn, grad, non in zip(mn0,grad_non0, weig_non0 )]

nCom1 = [non- lr* grad - lr/10 * mn for mn, grad, non in zip(mn1,grad_non1, weig_non1 )]

xbest = [ -lr * nc + non for nc, non in zip(grad_best, weig_best)]

upd2 = [
    tf.assign(param_i, v)
    for param_i, v in zip(wr[2], xbest)
]

upd2.extend(
        [tf.assign(param_i, v)
        for param_i, v in zip(xwr[2], xweig_best)]
    )

upd2.extend(
        [tf.assign(param_i, v)
        for param_i, v in zip(wr[1], nCom0)]
    )
upd2.extend(
        [tf.assign(param_i, v)
        for param_i, v in zip(xwr[1], xweig_non0)]
    )
upd2.extend(
        [tf.assign(param_i, v)
        for param_i, v in zip(wr[0], nCom1)]
    )
upd2.extend(
        [tf.assign(param_i, v)
        for param_i, v in zip(xwr[0], xweig_non1)]
    )


upd_bb2= K.function(inputs=input_tensors, outputs=[ losses[0], losses[1], losses[2], minlos, prediction[0], prediction[1], prediction[2] ], updates=upd2)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255


datagen = ImageDataGenerator(
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    zca_epsilon=1e-06,  # epsilon for ZCA whitening
    rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
    # randomly shift images horizontally (fraction of total width)
    width_shift_range=0.1,
    # randomly shift images vertically (fraction of total height)
    height_shift_range=0.1,
    shear_range=0.,  # set range for random shear
    zoom_range=0.,  # set range for random zoom
    channel_shift_range=0.,  # set range for random channel shifts
    # set mode for filling points outside the input boundaries
    fill_mode='nearest',
    cval=0.,  # value used for fill_mode = "constant"
    horizontal_flip=True,  # randomly flip images
    vertical_flip=False,  # randomly flip images
    # set rescaling factor (applied before any other transformation)
    rescale=None,
    # set function that will be applied on each input
    preprocessing_function=None,
    # image data format, either "channels_first" or "channels_last"
    data_format=None)

# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(x_train)

datagentest = ImageDataGenerator()

# alfa 0.1 beta 0.5 for nonbest, 0.1 alfa for best
from sklearn.metrics import accuracy_score

lossepoch=[]
lossepoch_test=[]
lossx=[]
acctra=[]
loss_test=[]
acc_test=[]
skip=[]

for f in range(200):
    tr1=[]
    tr2=[]
    res1=[]
    res2=[]
    print('Epoch', f)
    print ('train')
    batches = 0
    for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=batch_size):
        K.set_learning_phase(1)
        for i in range(len(all_model)):
            all_model[i].layers[14].rate= 0.5
            all_model[i].layers[16].rate= 0.5
        inputs = [x_batch, # X
                  np.ones(y_batch.shape[0]), # sample weights
                  y_batch, # y
                  1, # learning phase in TEST mode
                  x_batch, # X
                  np.ones(y_batch.shape[0]), # sample weights
                  y_batch, # y
                  x_batch, # X
                  np.ones(y_batch.shape[0]), # sample weights
                  y_batch, # y
                 ]
        ll = upd_bb2(inputs)
        yhat=ll[6]
        lossepoch.append(ll[2])
        tr1.append(ll[2])
        tr2.append(accuracy_score(np.argmax(y_batch,axis=1), np.argmax(yhat,axis=1)))
        skip.append(ll[3])
        batches += 1
        if batches > len(x_train) / batch_size:
            # we need to break the loop by hand because
            # the generator loops indefinitely
            break
    m=(len(x_train) / batch_size)-int((len(x_train) / batch_size))
    tr1[-1]*=m
    tr2[-1]*=m
    lossx.append(np.mean(tr1))
    acctra.append(np.mean(tr2))
    print ('train loss score is :'+str(np.mean(tr1)))
    print ('train acc score is :'+str(np.mean(tr2)))
    print ('test')
    batchesx = 0
    for x_batch, y_batch in datagentest.flow(x_test, y_test, batch_size=batch_size):
        K.set_learning_phase(0)
        for i in range(len(all_model)):
            all_model[i].layers[14].rate= 0
            all_model[i].layers[16].rate= 0
        inputs = [x_batch, # X
                  np.ones(y_batch.shape[0]), # sample weights
                  y_batch, # y
                  1, # learning phase in TEST mode
                  x_batch, # X
                  np.ones(y_batch.shape[0]), # sample weights
                  y_batch, # y
                  x_batch, # X
                  np.ones(y_batch.shape[0]), # sample weights
                  y_batch, # y
                 ]
        ll = upd_test(inputs)
        yhat=ll[6]
        lossepoch_test.append(ll[2])
        res1.append(ll[2])
        res2.append(accuracy_score(np.argmax(y_batch,axis=1), np.argmax(yhat,axis=1)))
        batchesx += 1
        if batchesx >= len(x_test) / batch_size:
            break
    m=(len(x_test) / batch_size)-int((len(x_test) / batch_size))
    res1[-1]*=m
    res2[-1]*=m
    loss_test.append(np.mean(res1))
    acc_test.append(np.mean(res2))
    print ('test loss score is :'+str(np.mean(res1)))
    print ('test acc score is :'+str(np.mean(res2)))

print (np.max(acc_test), np.argmax(acc_test))
print (np.min(loss_test), np.argmin(loss_test))
print (np.min(lossx), np.argmin(lossx))


np.savetxt("alg4_1_lossepoch_reducesum.csv", lossepoch, delimiter=",", fmt='%s')
np.savetxt("alg4_1_lossepoch_test_reducesum.csv", lossepoch_test, delimiter=",", fmt='%s')
np.savetxt("alg4_1_loss_tra_reducesum.csv", lossx, delimiter=",", fmt='%s')
np.savetxt("alg4_1_skip_reducesum.csv", skip, delimiter=",", fmt='%s')
np.savetxt("alg4_1_acc_tra_reducesum.csv", acctra, delimiter=",", fmt='%s')
np.savetxt("alg4_1_loss_test_reducesum.csv", loss_test, delimiter=",", fmt='%s')
np.savetxt("alg4_1_acc_test_reducesum.csv", acc_test, delimiter=",", fmt='%s')

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.









Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.





Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 16, 16, 48)        1344      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 8, 8, 48)          0         
_________________________________________________________________
batch_normalization_7 (Batch (None, 8, 8, 48)          192       
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 8, 8, 96)          41568     
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 3, 3, 96)          0         
________________________________________________