In [1]:
import os, sys
import numpy as np
import tensorflow as tf ## Use Tensorflow Version 1
import tflib as lib
import tflib.plot
import tflib.cifar10_fed
import tflib.sn as sn

In [2]:
# Download CIFAR-10 (Python version) at
# https://www.cs.toronto.edu/~kriz/cifar.html and fill in the path to the
# extracted files here!
DATA_DIR = 'Cifar10'
if len(DATA_DIR) == 0:
    raise Exception('Please specify path to data directory in gan_cifar.py!')

In [3]:
BATCH_SIZE = 10 # Batch size
TEST_BATCH_SIZE = 1000
Sample_size= 50000
ITERS = 10000
INPUT_DIM = 3*32*32 # Number of pixels in CIFAR
nodes = 100
maximize_iters = 10
test_iters = 100
noise_std = 5.0
tau=1

address = 'cifar_fedOT_inception'+'_samplesize_'+str(Sample_size)+'_nodes_'+str(nodes)+'noise_constant_var_'+str(noise_std)

if not os.path.exists(address):
    os.makedirs(address)

In [4]:
def alexnet(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False,num=1):
    """AlexNet architecture
        two [convolution 5x5 -> max-pool 3x3 -> local-response-normalization] modules 
        followed by two fully connected layers with 384 and 192 hidden units, respectively. 
        Finally a NUM_CLASSES-way linear layer is used for prediction
    """
    input_data_reshaped = tf.reshape(input_data,[-1,32,32,3])
    conv = sn.conv2d(input_data_reshaped, [5, 5, 3, 96], scope_name='conv1'+'_num_'+str(num), spectral_norm=False, reuse=reuse)
    conv1 = tf.nn.relu(conv, name='conv1_relu'+'_num_'+str(num))
    pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                           padding='VALID', name='pool1'+'_num_'+str(num))
    norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1'+'_num_'+str(num))
    
    conv = sn.conv2d(norm1, [5, 5, 96, 256], scope_name='conv2'+'_num_'+str(num), spectral_norm=False, reuse=reuse)
    conv2 = tf.nn.relu(conv, name='conv2_relu'+'_num_'+str(num))
    pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                           padding='VALID', name='pool2'+'_num_'+str(num))
    norm2 =  tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2'+'_num_'+str(num))
    
    reshape = tf.reshape(norm2, [-1, 7*7*256])
    lin = sn.linear(reshape, 384, scope_name='linear1'+'_num_'+str(num), spectral_norm=False, reuse=reuse)
    lin1 = tf.nn.relu(lin, name='linear1_relu'+'_num_'+str(num))

    lin = sn.linear(lin1, 192, scope_name='linear2'+'_num_'+str(num), spectral_norm=False, reuse=reuse)
    lin2 = tf.nn.relu(lin, name='linear2_relu'+'_num_'+str(num))

    fc = sn.linear(lin2, num_classes, scope_name='fc'+'_num_'+str(num), spectral_norm=False, reuse=reuse)
        
    return fc


In [7]:
LAMBDA_0 = 1.0
LAMBDA_1 = 10.0
adv_stepsize = 2.0

real_data_int = tf.placeholder(tf.int32, shape=[BATCH_SIZE*nodes, INPUT_DIM])
real_data = 2*((tf.cast(real_data_int, tf.float32)/255.)-.5)
theta=tf.Variable(tf.zeros(shape=[nodes, INPUT_DIM],dtype=tf.float32),dtype=tf.float32,name='Theta')
theta_1=tf.Variable(tf.ones(shape=[nodes, INPUT_DIM],dtype=tf.float32),dtype=tf.float32,name='Theta_quad')
maxVar = tf.Variable(tf.zeros(shape=[nodes, INPUT_DIM],dtype=tf.float32),dtype=tf.float32,name='maxVar')
maxVar_1 = tf.Variable(tf.zeros(shape=[nodes, INPUT_DIM],dtype=tf.float32),dtype=tf.float32,name='maxVar_quad')
label = tf.placeholder(tf.int64, shape=[BATCH_SIZE*nodes])

data_perturbed_list = []
data_perturbed_list_max = []
for i in range(nodes):
    data_perturbed_list.append( tf.multiply(real_data[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:],theta_1[i,:])+theta[i,:])

data_perturbed = tf.stack(data_perturbed_list)
data_perturbed_max = tf.reduce_mean(data_perturbed,reduction_indices=[1])
data_perturbed_pow_2_max = tf.reduce_mean(data_perturbed**2,reduction_indices=[1])

NN_out_perturbed_list = []
for i in range(nodes):
    NN_out_perturbed_list.append(alexnet(tf.squeeze(data_perturbed[i,:,:]),num=i)) 
NN_out_perturbed =  tf.stack(NN_out_perturbed_list)
NN_out_perturbed = tf.reshape(NN_out_perturbed,[BATCH_SIZE*nodes, 10])

train_acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(NN_out_perturbed,axis=1),label),dtype=tf.float32))

train_loss= tf.reduce_mean( tf.reduce_logsumexp(NN_out_perturbed,reduction_indices=[1])
                           - tf.diag_part(tf.gather(NN_out_perturbed,label,axis=1)))
max_loss = tf.reduce_sum(tf.multiply(data_perturbed_max,maxVar-tf.reduce_mean(maxVar,reduction_indices=[0]) ) )
max_loss_1 = tf.reduce_sum(tf.multiply(data_perturbed_pow_2_max,maxVar_1-tf.reduce_mean(maxVar_1,reduction_indices=[0]) ) )
train_loss_2 = (train_loss - LAMBDA_0*tf.reduce_sum(maxVar**2) - LAMBDA_1*tf.reduce_sum(maxVar_1**2) 
                + adv_stepsize*(max_loss+max_loss_1)  )

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Tensor("Reshape_201:0", shape=(1000, 10), dtype=float32)
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [9]:
valid_data_int = tf.placeholder(tf.int64, shape=[TEST_BATCH_SIZE, INPUT_DIM])
valid_data = 2.*((tf.cast(valid_data_int, tf.float32)/255.)-.5)
valid_label = tf.placeholder(tf.int64, shape=[TEST_BATCH_SIZE])

test_size= int(TEST_BATCH_SIZE/nodes)
valid_NN_out_list = []
for i in range(nodes):
    valid_NN_out_list.append(
        alexnet(tf.multiply(valid_data[i*test_size:(i+1)*test_size,:],theta_1[i,:])+theta[i,:],num=i,reuse=True))

valid_NN_out = tf.stack(valid_NN_out_list)
valid_NN_out = tf.reshape(valid_NN_out,[TEST_BATCH_SIZE,10])

valid_acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(valid_NN_out,axis=1),valid_label),dtype=tf.float32))
valid_loss= tf.reduce_mean( tf.log(tf.reduce_sum(tf.exp(valid_NN_out),reduction_indices=[1]))
                          - tf.diag_part(tf.gather(valid_NN_out,valid_label,axis=1))  )

saver = tf.train.Saver()
params = tf.trainable_variables()
nn_vars=[]
for i in range(nodes):
    nn_vars.append([var for var in params if ('num_'+str(i)) in var.name])
nn_params = [var for var in params if (('Theta' in var.name) or (('_num') in var.name))]
max_params=[var for var in params if 'maxVar' in var.name]

Classifier_train_op = tf.train.GradientDescentOptimizer(
        learning_rate=1e-4
    ).minimize(train_loss_2, var_list=nn_params)

Max_train_op = tf.train.GradientDescentOptimizer(
        learning_rate=1e-3
    ).minimize(-train_loss_2, var_list=max_params)

assign_op = [nn_vars[j][i].assign( (1./nodes)*sum(nn_vars[k][i] for k in range(nodes))) for i in range(len(nn_vars[0])) for j in range(nodes)]

In [10]:

train_gen, dev_gen = lib.cifar10_fed.load(BATCH_SIZE, TEST_BATCH_SIZE, data_dir=DATA_DIR, k= nodes,sample_size=Sample_size)
def inf_train_gen():
    while True:
        for elements in train_gen():
            for (images,targets) in elements:
                yield ((images.reshape((-1,3,32,32))).transpose((0,2,3,1))).reshape((-1,3*32*32)),targets
            
def inf_test_gen():
    while True:
        for elements in dev_gen():
            for (images,targets) in elements:
                yield ((images.reshape((-1,3,32,32))).transpose((0,2,3,1))).reshape((-1,3*32*32)),targets
            

In [None]:
train_loss_arr = []
train_acc_arr= []
train_loss_perturbed_arr = []
train_acc_perturbed_arr= []
valid_acc_arr = []
valid_acc_perturbed_arr = []

np.random.seed(1234)
perturbation_add_train = noise_std*np.random.normal(size=[nodes,INPUT_DIM])
matrix_mult_train = (noise_std/(np.sqrt(INPUT_DIM)))*np.random.normal(size=[nodes,INPUT_DIM,INPUT_DIM])

with tf.Session() as session:


    session.run(tf.initialize_all_variables())
    gen = inf_train_gen()
    gen_test = inf_test_gen()
    _data_agg = np.zeros([BATCH_SIZE*nodes,INPUT_DIM],dtype=np.float32)
    _data_perturbed_agg = np.zeros([BATCH_SIZE*nodes,INPUT_DIM],dtype=np.float32)
    _labels_agg = np.zeros([BATCH_SIZE*nodes],dtype=np.int64)
    for iteration in range(ITERS):
        
        start_time = time.time()
        
        for k in range(nodes):    
            
            data_inf = next(gen)
            _data = data_inf[0]
            _data_agg[k*BATCH_SIZE:(k+1)*BATCH_SIZE,:] = 2.*(_data/255.-0.5)
            _data_agg[k*BATCH_SIZE:(k+1)*BATCH_SIZE,:] += np.matmul(_data_agg[k*BATCH_SIZE:(k+1)*BATCH_SIZE,:],
                                                                   np.squeeze(matrix_mult_train[k,:,:]))
            _data_agg[k*BATCH_SIZE:(k+1)*BATCH_SIZE,:] +=  perturbation_add_train[k,:]
            _labels_agg[k*BATCH_SIZE:(k+1)*BATCH_SIZE] = data_inf[1]
        
        _data = np.int32(127.5*_data_agg+127.5)    
        _labels = _labels_agg    
        
        for _ in range(tau):
            for _ in range(maximize_iters):
                session.run(Max_train_op,feed_dict={real_data_int: _data,label: _labels})                                                         
            session.run(Classifier_train_op,feed_dict={real_data_int: _data,label: _labels})                                                         
        
        _,_train_loss_perturbed,_train_acc_perturbed = session.run([assign_op,train_loss,train_acc],
                                                              feed_dict={real_data_int: _data,label: _labels})                                                         

        train_loss_perturbed_arr.append(_train_loss_perturbed)
        train_acc_perturbed_arr.append(_train_acc_perturbed)
        
        # Write logs every 500 iters
        
        if iteration % 500 == 0:
            test_data_inf = next(gen_test)
            _data_valid = test_data_inf[0]
            _labels_valid = test_data_inf[1]
            
            test_size= int(TEST_BATCH_SIZE/nodes)
            _data_agg = np.zeros([test_size*nodes,INPUT_DIM],dtype=np.float32)
            for k in range(nodes):    
            
                _data = _data_valid[k*test_size:(k+1)*test_size,:]
                _data_agg[k*test_size:(k+1)*test_size,:] = 2.*(_data/255.-0.5)
                _data_agg[k*test_size:(k+1)*test_size,:] += np.matmul(_data_agg[k*test_size:(k+1)*test_size,:],
                                                                       np.squeeze(matrix_mult_train[k,:,:]))
                _data_agg[k*test_size:(k+1)*test_size,:] +=  perturbation_add_train[k,:]
        
            _data_valid = np.int32(127.5*_data_agg+127.5)    


            _valid_acc  = session.run(valid_acc, feed_dict={valid_data_int: _data_valid,valid_label:_labels_valid})       
            
            _valid_acc_perturbed  = session.run(valid_acc, feed_dict={valid_data_int: _data_valid,valid_label:_labels_valid})      

            valid_acc_arr.append(_valid_acc)
            valid_acc_perturbed_arr.append(_valid_acc_perturbed)
            
            np.save(address+'/train_loss_perturbed_arr',train_loss_perturbed_arr)
            np.save(address+'/train_acc_perturbed_arr',train_acc_perturbed_arr)
            np.save(address+'/valid_acc_arr',valid_acc_arr)
            np.save(address+'/valid_acc_perturbed_arr',valid_acc_perturbed_arr)

            
        if iteration % 50 == 0 or iteration<10:
            lib.plot.flush()

        lib.plot.tick()