In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib.learn.python.learn.datasets.mnist import DataSet
from tensorflow.contrib.learn.python.learn.datasets import base
from tensorflow.python.framework import dtypes
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython import display
from copy import deepcopy
%matplotlib inline
from tensorflow.examples.tutorials.mnist import input_data
import logic 

In [2]:
#Activation function and its derivative
def sigma(x):
    return tf.div(tf.constant(1.0),
                  tf.add(tf.constant(1.0), tf.exp(tf.negative(x))))

def sigmaprime(x):
    return tf.multiply(sigma(x), tf.subtract(tf.constant(1.0), sigma(x)))

In [3]:
#Create 10 permuted MNIST datasets
def permute_mnist(mnist):
    perm_inds = range(mnist.train.images.shape[1])
    np.random.shuffle(perm_inds)
    mnist2 = deepcopy(mnist)
    sets = ["train", "validation", "test"]
    for set_name in sets:
        this_set = getattr(mnist2, set_name) # shallow copy
        this_set._images = np.transpose(np.array([this_set.images[:,c] for c in perm_inds]))
    return mnist2

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
mnist_list = [mnist]
for j in xrange(9):
    mnist_list.append(permute_mnist(mnist))

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [4]:
#Define a 2 layer feedfoward network with 100 hidden neurons 
a_0 = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

middle = 100

w_1 = tf.Variable(tf.truncated_normal([785, middle], stddev=0.1))
w_2 = tf.Variable(tf.truncated_normal([middle+1, 10], stddev=0.1))

w_old_1 = tf.Variable(tf.zeros([785, middle]))
w_old_2 = tf.Variable(tf.zeros([middle+1, 10]))



#Conceptors for used spaces
A_0 = np.zeros([785, 785])
A_1 = np.zeros([middle+1, middle+1])

#Conceptors for free spaces
F_0 = tf.Variable(tf.eye(785))
F_1 = tf.Variable(tf.eye(middle+1))


#Forward Pass, ab_i is the state vector together with bias
ab_0 = tf.concat([a_0, tf.tile(tf.ones([1,1]), [tf.shape(a_0)[0], 1])], 1)
z_1 = tf.matmul(ab_0, w_1)
a_1 = sigma(z_1)
ab_1 = tf.concat([a_1, tf.tile(tf.ones([1,1]), [tf.shape(a_1)[0], 1])], 1)
z_2 = tf.matmul(ab_1, w_2)
a_2 = sigma(z_2)

diff = tf.subtract(a_2, y)


#Backward Pass
reg2 = tf.Variable(0.001)
reg1 = tf.Variable(0.001)

d_z_2 = tf.multiply(diff, sigmaprime(z_2))
d_w_2 = tf.matmul(tf.transpose(tf.matmul(ab_1,F_1)), d_z_2)

inc_w_2 = tf.subtract(w_2, w_old_2)
reg_w_2 = tf.multiply(reg2, inc_w_2)
d_w_2 = tf.add(d_w_2, reg_w_2)


d_ab_1 = tf.matmul(d_z_2, tf.transpose(w_2))
d_a_1 = d_ab_1[:, :-1]
d_z_1 = tf.multiply(d_a_1, sigmaprime(z_1))
d_w_1 = tf.matmul(tf.transpose(tf.matmul(ab_0,F_0)), d_z_1)

inc_w_1 = tf.subtract(w_1, w_old_1)
reg_w_1 = tf.multiply(reg1, inc_w_1)
d_w_1 = tf.add(d_w_1, reg_w_1)

eta = tf.constant(0.1)
step = [
    tf.assign(w_1,
            tf.subtract(w_1, tf.multiply(eta, d_w_1)))

  , tf.assign(w_2,
            tf.subtract(w_2, tf.multiply(eta, d_w_2)))
]

#Compute Classification Accuracy
acct_mat = tf.equal(tf.argmax(a_2, 1), tf.argmax(y, 1))
acct_res = tf.reduce_sum(tf.cast(acct_mat, tf.float32))

#Update the old weights, which are the weights before training a task
updateW_old = [tf.assign(w_old_1, w_1), tf.assign(w_old_2, w_2)]

In [5]:
#Initialize variables
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [6]:
#Training the network sequentially on the 10 datasets
task_num = 10
avg_list = []
SA0_list = []
SA1_list = []
prev_list = [[] for x in xrange(task_num)]
     

#Iterate over 10 tasks
for j in xrange(task_num):
    print "Training MNIST %d" % (j+1)
    
    #Update the parameters for 10000 times
    for i in xrange(10000):
        batch_xs, batch_ys = mnist_list[j].train.next_batch(50)

        sess.run(step, feed_dict = {a_0: batch_xs, y: batch_ys})
        
        #Print validation accuracy every 1000 steps
        if i % 1000 == 0:
            res = sess.run(acct_res, feed_dict =
                           {a_0:mnist_list[j].validation.images[:1000],
                            y : mnist_list[j].validation.labels[:1000]})
            print "Validation accuracy:", res/1000

    sess.run(updateW_old)
    
    print "w_1 norm", sess.run(tf.reduce_sum(tf.norm(w_1)))
    print "w_2 norm", sess.run(tf.reduce_sum(tf.norm(w_2)))
    
    #Print the accuracies on testing set of the task just trained on
    res = sess.run(acct_res, feed_dict =
                           {a_0: mnist_list[j].test.images[:100000],
                            y : mnist_list[j].test.labels[:100000]})
    print "Accuracy on Current Dataset", res/mnist.test.labels.shape[0]
    
    res_sum = 0
    
    print "Test on all Previous Datasets:" 
    for i in xrange(j+1):
        res = sess.run(acct_res, feed_dict =
                               {a_0: mnist_list[i].test.images[:100000],
                                y : mnist_list[i].test.labels[:100000]})
        acc_res = res/mnist.test.labels.shape[0]
        print acc_res
        prev_list[i].append(acc_res)
        res_sum += acc_res
        avg_res = res_sum/(j+1)
    print "Current Average Accuracy:", avg_res

    avg_list.append(avg_res)
    
    #Collecting activation vectors to compute conceptors on the input layer
    batch_xs, batch_ys = mnist_list[j].train.next_batch(500)
    a0_collection = batch_xs
    ab0_collection = sess.run(ab_0, feed_dict = {a_0: a0_collection})

    alpha = 4
    R_a0 = (ab0_collection.T).dot(ab0_collection) / ab0_collection.shape[0]
    U_a0, S_a0, _ = np.linalg.svd(R_a0)
    S_C0 = (np.diag(S_a0).dot(np.linalg.inv(np.diag(S_a0) + alpha ** (-2) * np.eye(ab0_collection.shape[1]))))
    S0 = np.diag(S_C0)    
    C0 = U_a0.dot(np.diag(S0)).dot(U_a0.T)

    #Collecting activation vectors to compute conceptors on the hidden layer
    ab1_collection = sess.run(ab_1, feed_dict = {a_0: a0_collection})
    
    alpha1 = 4
    R_a1 = (ab1_collection.T).dot(ab1_collection) / ab1_collection.shape[0]
    U_a1, S_a1, _ = np.linalg.svd(R_a1)
    S_C1 = (np.diag(S_a1).dot(np.linalg.inv(np.diag(S_a1) + alpha1 ** (-2) * np.eye(ab1_collection.shape[1]))))
    S1 = np.diag(S_C1)
    C1 = U_a1.dot(np.diag(S1)).dot(U_a1.T)

    #Update the conceptors for used spaces on each layer
    A_0 = logic.OR(C0, A_0)
    A_1 = logic.OR(C1, A_1)

    #Update the conceptors for free space on each layer
    F0 = logic.NOT(A_0)
    F1 = logic.NOT(A_1)
    
    updateF = [tf.assign(F_0, tf.cast(F0, tf.float32)), tf.assign(F_1, tf.cast(F1, tf.float32))]
    sess.run(updateF)

Training MNIST 1
Validation accuracy: 0.1
Validation accuracy: 0.56
Validation accuracy: 0.856
Validation accuracy: 0.954
Validation accuracy: 0.955
Validation accuracy: 0.961
Validation accuracy: 0.959
Validation accuracy: 0.968
Validation accuracy: 0.967
Validation accuracy: 0.964
w_1 norm 30.829035
w_2 norm 25.061602
Accuracy on Current Dataset 0.9707
Test on all Previous Datasets:
0.9707
Current Average Accuracy: 0.9707
Training MNIST 2
Validation accuracy: 0.081
Validation accuracy: 0.946
Validation accuracy: 0.959
Validation accuracy: 0.964
Validation accuracy: 0.968
Validation accuracy: 0.972
Validation accuracy: 0.973
Validation accuracy: 0.974
Validation accuracy: 0.97
Validation accuracy: 0.973
w_1 norm 41.850765
w_2 norm 31.380337
Accuracy on Current Dataset 0.9739
Test on all Previous Datasets:
0.9699
0.9739
Current Average Accuracy: 0.9719
Training MNIST 3
Validation accuracy: 0.13
Validation accuracy: 0.951
Validation accuracy: 0.958
Validation accuracy: 0.965
Validation 