## pure tf for benchmarking

In [3]:
'''
# if working on laptop on local docker:
nvidia-docker run -p 8710:8888 -v ~/dl_cas/:/notebooks -p 8711:6006 -v /cluster/home/sick/:/notebooks/local -it oduerr/tf_docker:gpu_r
'''

'\n# if working on laptop on local docker:\ndocker run -p 4242:8888 -v ~/dl_cas/:/notebooks -p 6006:6006 -it oduerr/tf_docker:tf1_py3\n'

## Imports

In [4]:
# python module imports needed in customized functions:
import numpy as np
import tensorflow as tf
import sys


In [5]:
# additional imports of python modules

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as imgplot
import time
import pandas as pd
#tf.set_random_seed(1)
#np.random.seed(1)
import sys
tf.__version__, sys.version_info



('1.4.1',
 sys.version_info(major=3, minor=5, micro=2, releaselevel='final', serial=0))

In [6]:
def my_fc_bn(Ylogits, offset, scope):
    with tf.variable_scope(scope) as v_scope:
        mean, variance = tf.nn.moments(Ylogits, [0])
        m = mean
        v = variance
        bnepsilon = 1e-8 #A small float number to avoid dividing by 0
        Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon)
        return Ybn

In [7]:
def convertToOneHot(vector, num_classes=None):
    result = np.zeros((len(vector), num_classes), dtype='int32')
    result[np.arange(len(vector)), vector] = 1
    return result

# Data read-in 

### Load small external MNIST data set when for working local on windows

In [8]:

# upload mnist_4000.pkl.gz which we have used in the DL course to home
# To be compatible with python3 and python2
try:
    import cPickle as pickle
except ImportError:
    import pickle
import gzip

with gzip.open('mnist_4000.pkl.gz', 'rb') as f:
    if sys.version_info.major > 2:
        (X,y) = pickle.load(f, encoding='latin1')
    else:
        (X,y) = pickle.load(f)
PIXELS = len(X[0,0,0,:])

# if images are not flatten (like in mnist) we need first to flatten them
# now flatten images for fc ladder

X = X.reshape([4000, 784])
#X = X/255 # is already normalized

print("small data before split X.shape", X.shape)
print("small data before  y.shape", y.shape) 

x_train = X[0:3000]
y_train = y[0:3000]
x_test = X[3000:4000]
y_test = y[3000:4000]


print("small data x_train.shape:", x_train.shape)
print("small data y_train.shape:",y_train.shape)
print("small data x_test.shape:",x_test.shape)
print("small data y_test.shape:",y_test.shape)

num_class= len(np.unique(y))
print("num_class:",num_class)



small data before split X.shape (4000, 784)
small data before  y.shape (4000,)
small data x_train.shape: (3000, 784)
small data y_train.shape: (3000,)
small data x_test.shape: (1000, 784)
small data y_test.shape: (1000,)
num_class: 10


### Or load full MNIST dataset directly from internet

In [9]:
'''
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# if images are not flatten (like in mnist) we need first to flatten them
# now flatten images for fc ladder

x_train = x_train.reshape(-1,784)
x_test = x_test.reshape(-1,784)

print("large data x_train.shape:", x_train.shape)
print("large data y_train.shape:",y_train.shape)
print("large data x_test.shape:",x_test.shape)
print("large data x_test.shape:",y_test.shape)
'''

'\nfrom keras.datasets import mnist\n\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\n\n# if images are not flatten (like in mnist) we need first to flatten them\n# now flatten images for fc ladder\n\nx_train = x_train.reshape(-1,784)\nx_test = x_test.reshape(-1,784)\n\nprint("large data x_train.shape:", x_train.shape)\nprint("large data y_train.shape:",y_train.shape)\nprint("large data x_test.shape:",x_test.shape)\nprint("large data x_test.shape:",y_test.shape)\n'

In [10]:
x_train[:200].shape

(200, 784)

In [11]:
np.mean(x_train[:,200])

-0.0042952602

In [12]:
np.std(x_train[:,200])

0.83271211

# Lets construct a fc NN (784->500->50->10) without noise and unsupervised task to get a benchmark for the loss and accuracy

In [13]:
# reset the default graph
tf.reset_default_graph()

In [14]:
# define placeholder which we need later to feed in our data:
# be sure that input data is normalized
x = tf.placeholder(tf.float32, shape=[None, 784], name='x_data')
y_true = tf.placeholder(tf.float32, shape=[None, 10], name='y_data')

In [15]:
# our benchmark model hast 3 hidden layers
# x:h0:784 -> h1:500 -> h2:50 -> h3:10 (softmax)

W1 = tf.Variable(tf.random_uniform(shape=[784, 500],minval=-0.05, maxval=0.05))  
Blt1 = tf.Variable(tf.zeros([500]))
B1 = tf.Variable(tf.zeros([500]))
W2 = tf.Variable(tf.random_uniform(shape=[500, 50],minval=-0.05, maxval=0.05))
Blt2 = tf.Variable(tf.zeros([50]))
B2 = tf.Variable(tf.zeros([50]))
W3 = tf.Variable(tf.random_uniform(shape=[50, 10],minval=-0.05, maxval=0.05))
Blt3 = tf.Variable(tf.zeros([10]))
B3 = tf.Variable(tf.zeros([10]))


In [16]:
# define the model architecture as encoder in ladder: 

#x = my_norm(x, "initial_z_trafo")

hn_lt_1 = tf.matmul(x, W1) + Blt1 
hn_bn_1 = my_fc_bn(Ylogits=hn_lt_1, offset=B1, scope="bn")
hn_nlt_1 = tf.nn.relu(hn_bn_1)

hn_lt_2 = tf.matmul(hn_nlt_1, W2) + Blt2
hn_bn_2 = my_fc_bn(Ylogits=hn_lt_2, offset=B2, scope="bn")
hn_nlt_2 = tf.nn.relu(hn_bn_2)

hn_lt_3 = tf.matmul(hn_nlt_2, W3) + Blt3
hn_bn_3 = hn_lt_3 #my_fc_bn(Ylogits=hn_lt_3, offset=B3, scope="bn")
out = tf.nn.softmax(hn_bn_3)  # TODO : IS THIS ERROR PRONE? 

In [17]:
########################################################################
# supervised loss
############################################################################

# cross-entropy loss function (= -sum(Y_i * log(Yi)) ), 
# TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
# problems with log(0) which is NaN

with tf.name_scope("loss_supervised"):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=hn_bn_3, labels=y_true)
    # loss from supervised learning:
    loss_supervised = tf.reduce_mean(cross_entropy) 

In [18]:
# train Optimizer
# if using Adam it is important that the definigion of the train step has 
# has to appear before the intialization step (since inside adam optimizer variables
# are defined which then have to be intialized as well)
with tf.name_scope("train_step"):
    train_op = tf.train.AdamOptimizer(0.001).minimize(loss_supervised)
    #train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss_supervised)

In [19]:
with tf.name_scope("initialize"):
    init_op = tf.global_variables_initializer() 

In [20]:
'''
## test dimensions
#init_op = tf.global_variables_initializer() 
# run the graph
sess = tf.Session()
sess.run(init_op) #initialization on the concrete realization of the graph
'''


'\n## test dimensions\n#init_op = tf.global_variables_initializer() \n# run the graph\nsess = tf.Session()\nsess.run(init_op) #initialization on the concrete realization of the graph\n'

In [21]:

'''
loss_, _, out_= sess.run(
    feed_dict={x:x_train[0:5], y_true:convertToOneHot(y_train[0:5], 10)},
    fetches=(loss_supervised, train_op, out)) 
'''

'\nloss_, _, out_= sess.run(\n    feed_dict={x:x_train[0:5], y_true:convertToOneHot(y_train[0:5], 10)},\n    fetches=(loss_supervised, train_op, out)) \n'

In [22]:
#We want to visualize the development of the following variables in tensorboard:
for v in tf.trainable_variables():
    print("Adding ", v.name)
    tf.summary.histogram(v.name, v)

Adding  Variable:0
INFO:tensorflow:Summary name Variable:0 is illegal; using Variable_0 instead.
Adding  Variable_1:0
INFO:tensorflow:Summary name Variable_1:0 is illegal; using Variable_1_0 instead.
Adding  Variable_2:0
INFO:tensorflow:Summary name Variable_2:0 is illegal; using Variable_2_0 instead.
Adding  Variable_3:0
INFO:tensorflow:Summary name Variable_3:0 is illegal; using Variable_3_0 instead.
Adding  Variable_4:0
INFO:tensorflow:Summary name Variable_4:0 is illegal; using Variable_4_0 instead.
Adding  Variable_5:0
INFO:tensorflow:Summary name Variable_5:0 is illegal; using Variable_5_0 instead.
Adding  Variable_6:0
INFO:tensorflow:Summary name Variable_6:0 is illegal; using Variable_6_0 instead.
Adding  Variable_7:0
INFO:tensorflow:Summary name Variable_7:0 is illegal; using Variable_7_0 instead.
Adding  Variable_8:0
INFO:tensorflow:Summary name Variable_8:0 is illegal; using Variable_8_0 instead.


In [23]:
# We want to visualize the development of the loss in tensorboard

tf.summary.scalar("loss_supervised", loss_supervised)

<tf.Tensor 'loss_supervised_1:0' shape=() dtype=string>

In [24]:
! rm -rf /tmp/ladder

In [25]:
! mkdir /tmp/ladder

In [26]:
! ls /tmp/ladder

In [27]:
# collect all summaries for tensorboard and define the directory for saved summary files 

merged_summary_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter("/tmp/ladder", tf.get_default_graph())

In [30]:
sess = tf.Session() 
sess.run(init_op)

In [31]:
# check the shape of the feeds:
#x = tf.placeholder(tf.float32, shape=[None, 784], name='x_data')
#y_true = tf.placeholder(tf.float32, shape=[None, 10], name='y_data')
print("x_train.shape:", x_train.shape)  
print("convertToOneHot(y_train, 10).shape:", convertToOneHot(y_train, 10).shape)  #

x_train.shape: (3000, 784)
convertToOneHot(y_train, 10).shape: (3000, 10)


In [32]:
idx = np.random.permutation(len(x_train)) #Easy minibatch of size 128
print(idx)
i=23
print(idx[i*128:(i*128)+128])
np.random.permutation(idx[i*128:(i*128)+128])

[1938  546   96 ..., 2130 1083  509]
[1782 1812  746 2242 2597  282 1597 2474 2914  716 1410 2924  276 2111 2386
 1436 1923 1445 1240  804 1623 1824 2737 1053 1055  788 2400 1570  106 1748
 2373 2985  620 1557 2858  739  328 1501 2056  852 2900 2453 1935   68 1309
 1296 2039  209  438 2079  500 2392  401 2130 1083  509]


array([2400,  620, 2474, 1296,  282,  739,  716,  276,  328, 2924, 1240,
       1812, 1597, 2111, 2056,  788, 1782, 2242,  106, 2900, 2985, 2597,
        804, 1748, 1824, 2130, 2858, 1501, 1570, 2453, 2039,  401, 1923,
         68,  209, 1935,  509, 1083, 2079, 2914, 1445, 1309, 2373, 1410,
       2386, 1436,  438, 1053, 2392, 1055, 1623, 2737,  500,  746,  852,
       1557])

In [33]:
vals = []
for i in range(240):
    idx = np.random.permutation(len(x_train))[0:128] #Easy minibatch of size 128
    #print(idx[0])
    loss_, _, res_ = sess.run((loss_supervised, train_op, out), 
                              feed_dict={x:x_train[idx], y_true:convertToOneHot(y_train[idx], 10)})
    if (i % 23 == 0):#50
        acc = np.average(np.argmax(res_, axis = 1) == y_train[idx])
        # Get the results for the validation results 
        loss_v, res_val, summary_ = sess.run([loss_supervised, out, merged_summary_op], 
                                                          feed_dict={x:x_test, 
                                                                     y_true:convertToOneHot(y_test, 10)})
        summary_writer.add_summary(summary_, i)
        acc_v = np.average(np.argmax(res_val, axis = 1) == y_test)
        vals.append([loss_, acc, loss_v, acc_v])
        print("{} Training: loss {} acc {} Validation: loss {} acc {}".format(i, loss_, acc, loss_v, acc_v))
 

0 Training: loss 2.299053192138672 acc 0.1640625 Validation: loss 2.1674246788024902 acc 0.422
23 Training: loss 1.3796892166137695 acc 0.8984375 Validation: loss 1.438981294631958 acc 0.851
46 Training: loss 0.9846053719520569 acc 0.9453125 Validation: loss 1.0292503833770752 acc 0.904
69 Training: loss 0.6669174432754517 acc 0.9609375 Validation: loss 0.7443663477897644 acc 0.911
92 Training: loss 0.49896496534347534 acc 0.9296875 Validation: loss 0.5589761734008789 acc 0.922
115 Training: loss 0.32968568801879883 acc 0.96875 Validation: loss 0.4491449296474457 acc 0.926
138 Training: loss 0.1967659443616867 acc 1.0 Validation: loss 0.381673127412796 acc 0.935
161 Training: loss 0.2063540518283844 acc 0.984375 Validation: loss 0.32997456192970276 acc 0.942
184 Training: loss 0.11490478366613388 acc 0.9921875 Validation: loss 0.29464325308799744 acc 0.942
207 Training: loss 0.08265043795108795 acc 1.0 Validation: loss 0.27058395743370056 acc 0.946
230 Training: loss 0.0720877647399902

In [None]:
vals = []
idx = np.random.permutation(len(x_train))
for j in range(0,10):
    for i in range(23):
        idx1 = np.random.permutation(idx[i*128:(i*128)+128])
        loss_, _, res_ = sess.run((loss_supervised, train_op, out), 
                                  feed_dict={x:x_train[idx1], y_true:convertToOneHot(y_train[idx1], 10)})
        if (i % 1 == 0):#50
            acc = np.average(np.argmax(res_, axis = 1) == y_train[idx1])
            # Get the results for the validation results 
            loss_v, res_val, summary_ = sess.run([loss_supervised, out, merged_summary_op], 
                                                              feed_dict={x:x_test, 
                                                                         y_true:convertToOneHot(y_test, 10)})
            summary_writer.add_summary(summary_, i)
            acc_v = np.average(np.argmax(res_val, axis = 1) == y_test)
            vals.append([loss_, acc, loss_v, acc_v])
            print("{} Training: loss {} acc {} Validation: loss {} acc {}".format(i, loss_, acc, loss_v, acc_v))


In [None]:
# compare to directory in tf.summary.FileWriter
#! tensorboard --logdir /tmp/ladder/
# check docker call and go to http://srv-lab-t-697:8711