In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
sess = tf.InteractiveSession()

g = tf.Graph()

In [2]:
# Parameters
learning_rate = 1e-3
datasetReviewNum = 50
batch_size = 100
display_step = 50

# synapctic loss regularizer
gamma = 0.1

# Network Parameters
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
dropout = 0.5 # Dropout, probability to keep units

# Moving Average Parameters
alpha = 0.95
alphaOmega = alpha
alphaDelta = alpha
alphaParam = 0.5

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
withSI = True

keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)

In [3]:
def calcSynapseWeight(omega, delta, epsilon = 1e-8):
    # input:
    # omega: param length, unscaled importance
    # delta: param length, scaling value
    # epsilon: 1 length, lower bound for determinant
    # output: omega/(delta^2 + epsilon). The power and divisions are elementwise
    return omega/(delta**2 + epsilon)

In [4]:
def synapseLoss(synapseWeight, theta, thetaStar):
    # input: 
    # synapseWeight: param length
    # theta: param length, current param value
    # thetaStar: param length, moving average value of params
    # output: synapseWeight*(theta - thetaStar)^2 all multiplications are elementwise
    return synapseWeight*((theta - thetaStar)**2)

In [5]:
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')


# Create model
def conv_net(x, weights, biases, dropout):
    # Reshape input picture
    x = tf.reshape(x, shape=[-1, 28, 28, 1])

    # Convolution Layer
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # Max Pooling (down-sampling)
    conv1 = maxpool2d(conv1, k=2)

    # Convolution Layer
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max Pooling (down-sampling)
    conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Apply Dropout
    fc1 = tf.nn.dropout(fc1, dropout)

    # Output, class prediction
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

In [6]:
# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1': tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1)),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2': tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1)),
    # fully connected, 7*7*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.truncated_normal([7*7*64, 1024], stddev=0.1)),
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.truncated_normal([1024, n_classes], stddev=0.1))
}

biases = {
    'bc1': tf.Variable(tf.constant(0.1, shape=[32])),
    'bc2': tf.Variable(tf.constant(0.1, shape=[64])),
    'bd1': tf.Variable(tf.constant(0.1, shape=[1024])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))
}

numParamMats = len(tf.trainable_variables())

synapseWeight = [tf.Variable(tf.zeros(var.shape), trainable = False) for var in tf.trainable_variables()]
numParams = tf.cast(tf.add_n([tf.size(tf.trainable_variables()[i]) for i in range(numParamMats)]), tf.float32)
# Define moving average operations
with tf.name_scope("emaOp"):
    emaOmegaOp = tf.train.ExponentialMovingAverage(alphaOmega, zero_debias = True, name = 'movingAvgOmegaOp')
    emaDeltaOp = tf.train.ExponentialMovingAverage(alphaDelta, zero_debias = True, name = 'movingDeltaAvgOp')
    emaParamOp = tf.train.ExponentialMovingAverage(alphaParam, zero_debias = False, name = 'movingParamAvgOp')

In [7]:
with tf.name_scope("emaParamUpdate"):
    emaParamOp_assign = emaParamOp.apply(tf.trainable_variables())

# Define NN upto the loss
with tf.name_scope("NN"):
        pred = conv_net(x, weights, biases, keep_prob)

with tf.name_scope("eval"):
    with tf.control_dependencies([pred]):
        correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

with tf.name_scope("loss"):
    with tf.control_dependencies([emaParamOp_assign]):
        classCost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        synapseCost = [synapseLoss(synapseWeight[i], tf.trainable_variables()[i], emaParamOp.average(tf.trainable_variables()[i])) for i in range(numParamMats)]
        sc = tf.add_n([tf.reduce_sum(synapseCost[i]) for i in range(numParamMats)])#/numParams
        if (withSI == True):
            cost = classCost + gamma*sc
        else:
            cost = classCost

# Define old variable param
with tf.name_scope("oldParam"):
    oldParams = [tf.trainable_variables()[i]*tf.constant(1.) for i in range(numParamMats)]

In [8]:
with tf.name_scope("optimize"):
    with tf.control_dependencies(oldParams):
        # Calculate gradients with respect to this loss  
        classGrad = tf.gradients(classCost, tf.trainable_variables(), name = 'MGRADIENT')
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

with tf.name_scope("emaUpdate"):
    with tf.control_dependencies([optimizer]):
        deltaParams = [tf.trainable_variables()[i] - oldParams[i] for i in range(numParamMats)]
        score = [tf.maximum(-classGrad[i]*deltaParams[i], tf.constant(0.)) for i in range(numParamMats)]
        emaOmegaOp_assign = emaOmegaOp.apply(score)
        emaDeltaOp_assign = emaDeltaOp.apply(deltaParams)

with tf.name_scope("assignSynapseWeight"):
    with tf.control_dependencies([emaOmegaOp_assign, emaDeltaOp_assign, emaParamOp_assign]):
        assignSynapseWeightOp = [synapseWeight[i].assign(calcSynapseWeight(emaOmegaOp.average(score[i]), emaDeltaOp.average(deltaParams[i]))) for i in range(numParamMats)]        

with tf.name_scope("init"):
    init = tf.global_variables_initializer()

In [None]:

    # Define NN upto the loss
    # grad = Calculate gradients with respect to this loss
    # define MA ops:
        # exponentialAverage(omega, grad*(oldParam-flatennedParam), alphaOmega)
        # exponentialAverage(delta, flatennedParam-oldParam, alphaDelta)
        # exponentialAverage(paramStar, flatennedParam, alphaParam)
    # assign MA output to vars.
    
    # flatennedParam = flatten(param) # make sure the ordering is the same as grad output
    # oldParam = flatennedParam
    # synWeight = calcSynapseWeight(omegaMA, deltaMA)
    # minimize (loss + synapseLoss(synWeight, param, thetaMA))
    
    # update MAs with the minimize output's control dependency
    
# TODO: learn how to copy parameters to be used as oldParams

In [None]:
# Launch the graph
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
training_iters = int(datasetReviewNum* mnist.train.images.shape[0]/batch_size)
writer = tf.summary.FileWriter('./logs/synapticIntelligence', graph=tf.get_default_graph())
with tf.Session() as sess:
    sess.run(init)
    step = 1
    # Keep training until reach max iterations
    while step < training_iters:
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Run optimization op (backprop)
        #sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
        #                               keep_prob: dropout})
        
        #loss, acc, s, nump, sync = sess.run([classCost, accuracy, score, numParams, sc], feed_dict={x: batch_x,
        #                                                      y: batch_y,
        #                                                      keep_prob: dropout})
        loss, classLoss, sLoss, acc, _ = sess.run([cost, classCost, sc, accuracy, assignSynapseWeightOp], 
                                          feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        #loss, classLoss, acc, _ = sess.run([cost, classCost, accuracy, optimizer], 
        #                                  feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        
        
        if step % display_step == 0:
            print "sample time " + "{:.2f}".format((step*batch_size/ np.float32(mnist.train.images.shape[0]))) + \
                  ", Minibatch classLoss= " + "{:.6f}".format(classLoss) + \
                  ", sLoss= " + "{:.6f}".format(sLoss) + \
                  ", Loss= " + "{:.6f}".format(loss) + \
                    ", Tr Acc= " + "{:.5f}".format(acc)
                    
        if step % (display_step*5) == 0:
            acc_list = []
            fromIx_list = []
            toIx_list = []
            nTestBatch = int(np.floor(mnist.test.images.shape[0]/batch_size))
            for i in range(nTestBatch):
                fromIx = i*batch_size
                toIx = min((i+1)*batch_size, mnist.test.images.shape[0])         
                curAcc = sess.run(accuracy, feed_dict={x: mnist.test.images[fromIx:toIx],
                                                       y: mnist.test.labels[fromIx:toIx],
                                                       keep_prob: 1.})
                fromIx_list.append(fromIx)
                toIx_list.append(toIx)
                acc_list.append(curAcc)
            print "Testing Accuracy:", np.mean(acc_list)
            
        step += 1
    
    acc_list = []
    fromIx_list = []
    toIx_list = []
    nTestBatch = int(np.floor(mnist.test.images.shape[0]/batch_size))
    for i in range(nTestBatch):
        fromIx = i*batch_size
        toIx = min((i+1)*batch_size, mnist.test.images.shape[0])         
        curAcc = sess.run(accuracy, feed_dict={x: mnist.test.images[fromIx:toIx],
                                               y: mnist.test.labels[fromIx:toIx],
                                               keep_prob: 1.})
        fromIx_list.append(fromIx)
        toIx_list.append(toIx)
        acc_list.append(curAcc)
    print "Testing Accuracy:", np.mean(acc_list)     
    sess.close()
    print "Optimization Finished!"
writer.close()
# Calculate accuracy for 256 mnist test images


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
sample time 0.09, Minibatch classLoss= 0.639127, sLoss= 0.133357, Loss= 0.652463, Tr Acc= 0.79000
sample time 0.18, Minibatch classLoss= 0.345801, sLoss= 0.077813, Loss= 0.353583, Tr Acc= 0.87000
sample time 0.27, Minibatch classLoss= 0.435721, sLoss= 0.038561, Loss= 0.439577, Tr Acc= 0.90000
sample time 0.36, Minibatch classLoss= 0.195080, sLoss= 0.064221, Loss= 0.201502, Tr Acc= 0.95000
sample time 0.45, Minibatch classLoss= 0.074921, sLoss= 0.080159, Loss= 0.082937, Tr Acc= 0.99000
Testing Accuracy: 0.966
sample time 0.55, Minibatch classLoss= 0.052160, sLoss= 0.051112, Loss= 0.057272, Tr Acc= 0.99000
sample time 0.64, Minibatch classLoss= 0.169466, sLoss= 0.066657, Loss= 0.176132, Tr Acc= 0.97000
sample time 0.73, Minibatch classLoss= 0.196654, sLoss= 0.065818, Loss= 0.203236, Tr Acc= 0.9300

sample time 7.18, Minibatch classLoss= 0.043086, sLoss= 0.011844, Loss= 0.044271, Tr Acc= 0.98000
sample time 7.27, Minibatch classLoss= 0.032998, sLoss= 0.010715, Loss= 0.034069, Tr Acc= 0.98000
Testing Accuracy: 0.9922
sample time 7.36, Minibatch classLoss= 0.024368, sLoss= 0.057123, Loss= 0.030081, Tr Acc= 0.98000
sample time 7.45, Minibatch classLoss= 0.012227, sLoss= 0.080488, Loss= 0.020276, Tr Acc= 0.99000
sample time 7.55, Minibatch classLoss= 0.002214, sLoss= 0.077079, Loss= 0.009922, Tr Acc= 1.00000
sample time 7.64, Minibatch classLoss= 0.011789, sLoss= 0.070022, Loss= 0.018791, Tr Acc= 0.99000
sample time 7.73, Minibatch classLoss= 0.005326, sLoss= 0.009936, Loss= 0.006319, Tr Acc= 1.00000
Testing Accuracy: 0.9901
sample time 7.82, Minibatch classLoss= 0.002301, sLoss= 0.051193, Loss= 0.007420, Tr Acc= 1.00000
sample time 7.91, Minibatch classLoss= 0.049376, sLoss= 0.023268, Loss= 0.051703, Tr Acc= 0.99000
sample time 8.00, Minibatch classLoss= 0.009110, sLoss= 0.054472, Lo

In [None]:
np.floor(np.ndarray(shape=(3)))[0:1]