In [5]:
import tensorflow as tf
import numpy as np
import time
import csv
import matplotlib.pyplot as plt

np.set_printoptions(precision=4, suppress=True)

In [6]:
seed=0
tf.set_random_seed(seed)

mnist = tf.keras.datasets.mnist
(x_train_data, y_train_cold),(x_test_data,y_test_cold) = mnist.load_data()
x_train_data, x_test_data = x_train_data / 255.0, x_test_data / 255.0

In [7]:
with tf.Session() as sess:
    y_train = sess.run(tf.one_hot(y_train_cold,10))
    y_test =  sess.run(tf.one_hot(y_test_cold,10))

In [8]:
x_train=[]
x_test=[]

n_train = len(x_train_data)
n_test = len(x_test_data)

for i in range(len(x_train_data)):
    x_train.append(np.ndarray.flatten(x_train_data[i]))
    
for i in range(len(x_test_data)):
    x_test.append(np.ndarray.flatten(x_test_data[i]))

In [9]:
x_train = np.reshape(x_train,[n_train,784])
x_test = np.reshape(x_test,[n_test, 784])

In [10]:
sigma=0.001
batch_size=1000

In [39]:
def build_graph(n_hl1, ii, update_rule):
    g=tf.Graph()
    with g.as_default():        
        init=tf.global_variables_initializer()
        
        x = tf.placeholder(dtype=tf.float32, shape=[None, 784], name='x')
        y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
        
        xavier = tf.contrib.layers.xavier_initializer(seed=0)

        w1 = tf.Variable(xavier(shape = ([n_hl1,784])))
        b1 = tf.Variable(xavier(shape = ([n_hl1])))

        w2 = tf.Variable(xavier([10,n_hl1]), dtype=tf.float32)
        b2 = tf.Variable(xavier([10]), dtype=tf.float32)
        
        lr = tf.Variable(xavier([1] , dtype=tf.float32), name='lr')
        
        h1 = tf.matmul(x, tf.transpose(w1))
        x1 = tf.nn.relu(tf.add(h1,b1))
        h2 =  tf.matmul(x1, tf.transpose(w2))
        pred = tf.nn.softmax(tf.add(h2, b2))

        error=tf.reduce_mean(np.square(pred-y),1)
        
        if(update_rule=='ip' or update_rule=='np'):
            if(update_rule=='ip'):
                s_in = tf.random_normal(shape = [batch_size,784], mean=0, stddev=sigma, dtype=tf.float32, name='s_in')
                s_1 = tf.zeros([1], dtype=tf.float32, name='s_1')
                s_2 = tf.zeros([1], dtype=tf.float32, name='s_2')
            else:
                s_in = tf.zeros([1], dtype=tf.float32, name='s_in')
                s_1 = tf.random_normal(shape = [batch_size,n_hl1], mean=0, stddev=sigma, dtype=tf.float32, name='s_1')
                s_2 = tf.random_normal(shape = [batch_size,10], mean=0, stddev=sigma, dtype=tf.float32, name='s_2')
                
            h1_star = tf.matmul(tf.add(x,s_in), tf.transpose(w1))
            x1_star = tf.nn.relu(tf.add(tf.add(h1_star, b1), s_1))
            h2_star = tf.matmul(x1_star, tf.transpose(w2))
            pred_star = tf.nn.softmax(tf.add(tf.add(h2_star, b2), s_2))

            error_star=tf.reduce_mean(np.square(pred_star-y),1)
            var = sigma ** 2
            k = -lr * (error_star - error) / var
        
            if(update_rule=='ip'):
                del_h1=h1_star-h1
                del_h2=h2_star-h2
    
            elif(update_rule=='np'):
                del_h1=s_1
                del_h2=s_2
            
            delta_w1=tf.einsum('ki,kj->kij', del_h1, x)
            delta_w2=tf.einsum('ki,kj->kij', del_h2, h1)
                
            delta_w1=tf.einsum('kij,k->kij',delta_w1,k)
            delta_w2=tf.einsum('kij,k->kij',delta_w2,k)
            delta_b1=tf.einsum('ki,k->ki',del_h1,k)
            delta_b2=tf.einsum('ki,k->ki',del_h2,k)
    
            delta_w1=tf.reduce_mean(delta_w1,0)
            delta_w2=tf.reduce_mean(delta_w2,0)
            delta_b1=tf.reduce_mean(delta_b1,0)
            delta_b2=tf.reduce_mean(delta_b2,0)
            
            norm_delta_w2=tf.linalg.norm(delta_w2)
            tf.identity(norm_delta_w2, 'norm_delta_w2')
            
            update_w1=tf.assign(w1, tf.add(w1,delta_w1), name='update_w1')
            update_w2=tf.assign(w2, tf.add(w2,delta_w2), name='update_w2')
            update_b1=tf.assign(b1, tf.add(b1,delta_b1), name='update_b1')
            update_b2=tf.assign(b2, tf.add(b2,delta_b2), name='update_b2')
        
        change_lr=tf.assign(lr, tf.reshape(learning_rates[ii], [1]), name='change_lr')
        norm_w2=tf.linalg.norm(w2)
        tf.identity(norm_w2,'norm_w2')
        
        optimizer=tf.train.GradientDescentOptimizer(learning_rate=lr[0], name='optimizer').minimize(error)

        is_correct = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
        accuracy = 100*tf.reduce_mean(tf.cast(is_correct, tf.float32))
        tf.identity(accuracy, 'accuracy')
    
    return g

In [23]:
def find_acc(which_acc):
    acc=[]
    if(which_acc=='test'):
        acc.append(sess.run('accuracy:0', feed_dict={'x:0':x_test, 'y:0':y_test}))
        
    elif(which_acc=='train'):
        n=int(n_train/n_test)
        for i in range(n):
            acc.append(sess.run('accuracy:0', feed_dict={'x:0':x_train[i*n_test:(i+1)*n_test], 'y:0':y_train[i*n_test:(i+1)*n_test]}))
    else:
        print('wrong accuracy requested!!')
    return np.mean(acc)

In [19]:
def print_acc():    
    train_acc.append(find_acc('train'))
    test_acc.append(find_acc('test'))
    print('epoch : ', i+1, '  test_acc : ', test_acc[-1])
    sec=int(time.time()-startii)
    print(int(sec/60),'m ', int(sec%60),'s')
    
    return test_acc[-1] 

In [31]:
def train_network():
    for j in range(int(n_train/batch_size)):
        ind = np.random.randint(0, n_train, size=(batch_size))
        
        if(update_rule=='sgd'):
            ops=['optimizer']
            
        elif(update_rule=='ip' or 'np'):
            updates= ['update_w1:0', 'update_w2:0', 'update_b1:0', 'update_b2:0']
            squiggles= ['s_in:0', 's_1:0', 's_2:0']
            ops=[squiggles, updates]

        else:
            ops=[]

        sess.run(ops, feed_dict = {'x:0':x_train[ind], 'y:0':y_train[ind]})
    
    return

In [40]:
def write_in_file():
    row=test_acc
    with open('Desktop/code/ip/test_acc.csv', 'a') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerow(row)

    row=train_acc
    with open('Desktop/code/ip/train_acc.csv', 'a') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerow(row)

    row=w2_norm
    with open('Desktop/code/ip/norm_w1.csv', 'a') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerow(row)

    row=delta_w2_norm
    with open('Desktop/code/ip/delta_norm_w1.csv', 'a') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerow(row)
    
    csvFile.close()
    return

In [43]:
n_epochs=2000
interval=5

hl_sizes = [100, 300,1000, 3000]
learning_rates = np.arange(0.04,0.01,-0.005).astype(np.float32)
test_acc=[]
train_acc=[]
delta_w2_norm=[]
update_rule='ip'

In [44]:
start=time.time()

for ii in range(len(hl_sizes)):
    startii=time.time()
    test_acc=[]
    w2_norm=[]
    graph=build_graph(hl_sizes[ii], ii, update_rule)
    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())
        sess.run('change_lr:0')
        print('TRAINING : ', hl_sizes[ii],' hidden units', '\nlearning rate : ', sess.run('lr:0'), '\n')
        for i in range(n_epochs):
            if i%interval==0:
                if(print_acc()>98):
                    break
                    
            train_network()
                
        sess.close()
        write_in_file()

sec=int(time.time()-start)
print('TOTAL TIME : ', int(sec/60),'m ', int(sec%60),'s')

TRAINING :  100  hidden units 
learning rate :  [0.04] 

epoch :  1   test_acc :  4.69
0 m  0 s
epoch :  6   test_acc :  41.61
0 m  4 s
epoch :  11   test_acc :  58.48
0 m  8 s
epoch :  16   test_acc :  68.72
0 m  12 s
epoch :  21   test_acc :  75.28
0 m  16 s
epoch :  26   test_acc :  83.91
0 m  20 s
epoch :  31   test_acc :  85.77
0 m  24 s
epoch :  36   test_acc :  87.11
0 m  28 s
epoch :  41   test_acc :  87.98
0 m  32 s


KeyboardInterrupt: 

In [None]:
fig, ax1 = plt.subplots(figsize=(8, 5.5))
xx=np.arange(0,n_epochs, interval)
ax1.set_xlabel('epochs')
ax1.set_ylabel('accuracy')
ax1.plot(xx, test_acc, color='tab:red', label='test accuracy')
ax1.plot(xx, train_acc, color='tab:blue', label='training accuracy')
ax1.legend()

axes = plt.gca()

ax1.set_facecolor("#ffffb3")

ax1.annotate(str(round(test_acc[-1],1)),xy=(xx[-1]-0.5,test_acc[-1]+0.7), color='tab:red')
ax1.annotate(str(round(train_acc[-1],1)),xy=(xx[-1]-0.5,train_acc[-1]-2.2), color='tab:blue')

plt.grid(True, color="#93a1a1", alpha=0.3)
plt.show()
plt.savefig('plot1.svg', dpi=500)