In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from keras.datasets import mnist

Using TensorFlow backend.


### Load Data

In [2]:
def make_oh(y):
    N = len(y)
    n_classes = len(np.unique(y))

    y_oh = np.zeros((N,n_classes))

    for i in range(N):
        col = int(y[i])
        y_oh[i,col] = 1

    return y_oh

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
y_train_2 = make_oh(y_train)
y_test_2 = make_oh(y_test)

### Mexican Hat Network

In [5]:
x = tf.placeholder(dtype = tf.float32, shape = (None,28,28), name = 'x_input')
y = tf.placeholder(dtype = tf.float32, shape = (None,10), name = 'labels')

d_1 = tf.get_variable(name = 'dilation_1',dtype = tf.float32, initializer=tf.constant(0.2, dtype = tf.float32))
t_1 = tf.get_variable(name = 'translation_1', dtype = tf.float32, initializer=tf.constant(0.0, dtype = tf.float32))

d_2 = tf.get_variable(name = 'dilation_2',dtype = tf.float32, initializer=tf.constant(0.2, dtype = tf.float32))
t_2 = tf.get_variable(name = 'translation_2', dtype = tf.float32, initializer=tf.constant(0.0, dtype = tf.float32))

d_3 = tf.get_variable(name = 'dilation_3',dtype = tf.float32, initializer=tf.constant(0.2, dtype = tf.float32))
t_3 = tf.get_variable(name = 'translation_3', dtype = tf.float32, initializer=tf.constant(0.0, dtype = tf.float32))

In [6]:
def wavelet_act(x,d,t):
    x = tf.multiply(d,(x-t))
    return x*tf.exp(-0.5*x**2)

In [7]:
def mexican_hat_wt(x,d,t):
    x = (x-t)
    tf_pi = tf.constant(np.pi,dtype=tf.float32)
    #f = tf.constant(0.1, dtype = tf.float32)
    #return (1-x**2)*tf.exp(-x**2)
    return (1-2*(tf_pi**2)*(d**2)*(x**2))*tf.exp(-(tf_pi**2)*(d**2)*x**2)

In [8]:
#check_wt = mexican_hat_wt(x,d_1,t_1)

x_new = tf.expand_dims(x,axis = -1)

z_1 = tf.layers.conv2d(x_new,32,[3,3],activation=None)
a_1 = mexican_hat_wt(z_1,d_1,t_1)

z_2 = tf.layers.conv2d(a_1,64,[3,3],activation=None)
a_2 = mexican_hat_wt(z_2,d_2,t_2)

max_pool_1 = tf.layers.max_pooling2d(a_2,[2,2],[2,2])

flat = tf.layers.flatten(max_pool_1)

fc_a_1 = tf.layers.Dense(32,activation='relu')(flat)
#fc_a_1 = mexican_hat_wt(fc_z_1,d_3,t_3)


logits = tf.layers.Dense(10)(fc_a_1)
y_hat = tf.argmax(tf.nn.softmax(logits),axis = 1)

acc = tf.reduce_mean(tf.cast(tf.equal(y_hat,tf.argmax(y,axis = 1)),dtype = tf.float32))

In [9]:
loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels = y, logits=logits)
optimizer = tf.train.AdamOptimizer()

tr_step = optimizer.minimize(loss)

### Mexican Hat Training

In [19]:
epochs = 20
split_size = 100
val_size = 1000

In [20]:
with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    for i in range(epochs):
        print('Epoch: {}'.format(i+1))
        j = 0 
        for batch_x,batch_y in zip(np.split(x_train,split_size),np.split(y_train_2,split_size)):
            #n = np.random.randint(0,1200)
            #plt.imshow(batch_x)
            run_options = tf.RunOptions(report_tensor_allocations_upon_oom = True)
            this_acc,_ = sess.run([acc,tr_step],feed_dict = {x:batch_x,y:batch_y}, options=run_options)
            
            if j%100 == 0 or j == split_size-1:
                
                idxs = np.random.randint(0,len(y_test_2),size=val_size)
                
                x_this_test = x_test[idxs]
                y_this_test = y_test_2[idxs]
                
                this_test_acc = sess.run(acc, feed_dict = {x:x_this_test,y:y_this_test}, options=run_options)
                #this_d, this_t = sess.run([d_1,t_1])
                print('It {:d}:\tTrain Acc {:02.2f}%\n\tTest Acc {:02.2f}%\n'.format(j+1,this_acc*100,this_test_acc*100))  
                #print('d: {:01.2f}, t: {:01.2f}\n'.format(this_d,this_t))
            j+=1
    
    #check_values = np.linspace(-10,10,num = 784)
    #check_values = np.reshape(check_values,(1,784))

    #edo = sess.run(check_wt,feed_dict = {x:check_values})

Epoch: 1
It 1:	Train Acc 11.00%
	Test Acc 8.80%

It 100:	Train Acc 45.83%
	Test Acc 40.70%

Epoch: 2
It 1:	Train Acc 44.33%
	Test Acc 42.10%

It 100:	Train Acc 77.67%
	Test Acc 74.50%

Epoch: 3
It 1:	Train Acc 77.33%
	Test Acc 75.20%

It 100:	Train Acc 90.33%
	Test Acc 88.80%

Epoch: 4
It 1:	Train Acc 89.17%
	Test Acc 87.20%

It 100:	Train Acc 95.50%
	Test Acc 94.60%

Epoch: 5
It 1:	Train Acc 95.17%
	Test Acc 94.40%

It 100:	Train Acc 96.83%
	Test Acc 96.90%

Epoch: 6
It 1:	Train Acc 96.50%
	Test Acc 96.80%

It 100:	Train Acc 97.67%
	Test Acc 97.20%

Epoch: 7
It 1:	Train Acc 97.00%
	Test Acc 97.70%

It 100:	Train Acc 98.00%
	Test Acc 98.10%

Epoch: 8
It 1:	Train Acc 97.67%
	Test Acc 96.90%

It 100:	Train Acc 98.67%
	Test Acc 98.00%

Epoch: 9
It 1:	Train Acc 98.17%
	Test Acc 98.10%

It 100:	Train Acc 98.50%
	Test Acc 98.00%

Epoch: 10
It 1:	Train Acc 98.50%
	Test Acc 98.00%

It 100:	Train Acc 98.50%
	Test Acc 98.10%

Epoch: 11
It 1:	Train Acc 98.67%
	Test Acc 98.40%

It 100:	Train Acc 9

In [None]:
plt.plot(np.reshape(check_values,(784,)),np.reshape(edo,(784,)))

### ReLU Network

In [5]:
x = tf.placeholder(tf.float32, shape = (None,28,28))
y = tf.placeholder(tf.float32, shape = (None,10), name = 'labels')

In [6]:
x_new = tf.expand_dims(x,axis = -1)

a_1 = tf.layers.conv2d(x_new,32,[3,3],activation='relu')
a_2 = tf.layers.conv2d(a_1,64,[3,3],activation='relu')

max_pool_1 = tf.layers.max_pooling2d(a_2,[2,2],[2,2])

flat = tf.layers.flatten(max_pool_1)
fc_a_1 = tf.layers.Dense(32,activation='relu')(flat)

logits = tf.layers.Dense(10)(fc_a_1)
y_hat = tf.argmax(tf.nn.softmax(logits),axis = 1)

acc = tf.reduce_mean(tf.cast(tf.equal(y_hat,tf.argmax(y,axis = 1)),dtype = tf.float32))

In [7]:
loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels = y, logits=logits)
optimizer = tf.train.AdamOptimizer()

tr_step = optimizer.minimize(loss)

### ReLU Training

In [8]:
epochs = 50
split_size = 50
val_size = 1000

In [9]:
with tf.Session() as sess:

    sess.run(tf.global_variables_initializer())
    for i in range(epochs):
        print('Epoch: {}'.format(i+1))
        j = 0 
        for batch_x,batch_y in zip(np.split(x_train,split_size),np.split(y_train_2,split_size)):
            this_acc,_ = sess.run([acc,tr_step],feed_dict = {x:batch_x,y:batch_y})

            if j%100 == 0 or j ==split_size-1:
                
                idxs = np.random.randint(0,len(y_test_2),size=val_size)
                
                x_this_test = x_test[idxs]
                y_this_test = y_test_2[idxs]
                
                this_test_acc = sess.run(acc, feed_dict = {x:x_this_test,y:y_this_test})
                print('It {:d}:\tTrain Acc {:02.2f}%\n\tTest Acc {:02.2f}%\n'.format(j+1,this_acc*100,this_test_acc*100))  
            j+=1

Epoch: 1
It 1:	Train Acc 15.00%
	Test Acc 10.00%

It 50:	Train Acc 42.25%
	Test Acc 41.30%

Epoch: 2
It 1:	Train Acc 39.92%
	Test Acc 41.00%

It 50:	Train Acc 76.00%
	Test Acc 74.60%

Epoch: 3
It 1:	Train Acc 73.67%
	Test Acc 76.00%

It 50:	Train Acc 94.67%
	Test Acc 91.80%

Epoch: 4
It 1:	Train Acc 92.17%
	Test Acc 91.70%

It 50:	Train Acc 95.92%
	Test Acc 92.80%

Epoch: 5
It 1:	Train Acc 93.92%
	Test Acc 94.70%

It 50:	Train Acc 96.67%
	Test Acc 95.30%

Epoch: 6
It 1:	Train Acc 95.00%
	Test Acc 94.90%

It 50:	Train Acc 97.42%
	Test Acc 95.50%

Epoch: 7
It 1:	Train Acc 96.25%
	Test Acc 95.80%

It 50:	Train Acc 97.58%
	Test Acc 95.20%

Epoch: 8
It 1:	Train Acc 96.67%
	Test Acc 96.10%

It 50:	Train Acc 97.67%
	Test Acc 94.90%

Epoch: 9
It 1:	Train Acc 96.83%
	Test Acc 96.40%

It 50:	Train Acc 97.75%
	Test Acc 96.70%

Epoch: 10
It 1:	Train Acc 97.00%
	Test Acc 95.90%

It 50:	Train Acc 98.08%
	Test Acc 96.70%

Epoch: 11
It 1:	Train Acc 97.17%
	Test Acc 96.50%

It 50:	Train Acc 98.25%
	Tes