## pure tf for benchmarking

In [1]:
'''
# if working on laptop on local docker:
docker run -p 4242:8888 -v ~/dl_cas/:/notebooks -p 6006:6006 -it oduerr/tf_docker:tf1_py3
'''

'\n# if working on laptop on local docker:\ndocker run -p 4242:8888 -v ~/dl_cas/:/notebooks -p 6006:6006 -it oduerr/tf_docker:tf1_py3\n'

## Imports

In [2]:
# python module imports needed in customized functions:
import numpy as np
import tensorflow as tf
import sys


In [3]:
# additional imports of python modules

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as imgplot
import time
import pandas as pd
#tf.set_random_seed(1)
#np.random.seed(1)
import sys
tf.__version__, sys.version_info



('1.0.0',
 sys.version_info(major=3, minor=4, micro=3, releaselevel='final', serial=0))

In [4]:
def my_fc_bn(Ylogits, offset, scope):
    with tf.variable_scope(scope) as v_scope:
        mean, variance = tf.nn.moments(Ylogits, [0])
        m = mean
        v = variance
        bnepsilon = 1e-8 #A small float number to avoid dividing by 0
        Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon)
        return Ybn

In [5]:
def convertToOneHot(vector, num_classes=None):
    result = np.zeros((len(vector), num_classes), dtype='int32')
    result[np.arange(len(vector)), vector] = 1
    return result

# Data read-in 

### Load small external MNIST data set when for working local on windows

In [6]:

# upload mnist_4000.pkl.gz which we have used in the DL course to home
# To be compatible with python3 and python2
try:
    import cPickle as pickle
except ImportError:
    import pickle
import gzip

with gzip.open('mnist_4000.pkl.gz', 'rb') as f:
    if sys.version_info.major > 2:
        (X,y) = pickle.load(f, encoding='latin1')
    else:
        (X,y) = pickle.load(f)
PIXELS = len(X[0,0,0,:])

# if images are not flatten (like in mnist) we need first to flatten them
# now flatten images for fc ladder

X = X.reshape([4000, 784])
#X = X/255 # is already normalized

print("small data before split X.shape", X.shape)
print("small data before  y.shape", y.shape) 

x_train = X[0:3000]
y_train = y[0:3000]
x_test = X[3000:4000]
y_test = y[3000:4000]


print("small data x_train.shape:", x_train.shape)
print("small data y_train.shape:",y_train.shape)
print("small data x_test.shape:",x_test.shape)
print("small data y_test.shape:",y_test.shape)

num_class= len(np.unique(y))
print("num_class:",num_class)



small data before split X.shape (4000, 784)
small data before  y.shape (4000,)
small data x_train.shape: (3000, 784)
small data y_train.shape: (3000,)
small data x_test.shape: (1000, 784)
small data y_test.shape: (1000,)
num_class: 10


### Or load full MNIST dataset directly from internet

In [7]:
'''
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# if images are not flatten (like in mnist) we need first to flatten them
# now flatten images for fc ladder

x_train = x_train.reshape(-1,784)
x_test = x_test.reshape(-1,784)

print("large data x_train.shape:", x_train.shape)
print("large data y_train.shape:",y_train.shape)
print("large data x_test.shape:",x_test.shape)
print("large data x_test.shape:",y_test.shape)
'''

'\nfrom keras.datasets import mnist\n\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\n\n# if images are not flatten (like in mnist) we need first to flatten them\n# now flatten images for fc ladder\n\nx_train = x_train.reshape(-1,784)\nx_test = x_test.reshape(-1,784)\n\nprint("large data x_train.shape:", x_train.shape)\nprint("large data y_train.shape:",y_train.shape)\nprint("large data x_test.shape:",x_test.shape)\nprint("large data x_test.shape:",y_test.shape)\n'

In [8]:
x_train[:200].shape

(200, 784)

In [9]:
np.mean(x_train[:,200])

-0.0042952602

In [10]:
np.std(x_train[:,200])

0.83271211

# Lets construct a fc NN (784->500->50->10) without noise and unsupervised task to get a benchmark for the loss and accuracy

In [11]:
# reset the default graph
tf.reset_default_graph()

In [12]:
# define placeholder which we need later to feed in our data:
# be sure that input data is normalized
x = tf.placeholder(tf.float32, shape=[None, 784], name='x_data')
y_true = tf.placeholder(tf.float32, shape=[None, 10], name='y_data')

In [13]:
# our benchmark model hast 3 hidden layers
# x:h0:784 -> h1:500 -> h2:50 -> h3:10 (softmax)

W1 = tf.Variable(tf.random_uniform(shape=[784, 500],minval=-0.05, maxval=0.05))  
Blt1 = tf.Variable(tf.zeros([500]))
B1 = tf.Variable(tf.zeros([500]))
W2 = tf.Variable(tf.random_uniform(shape=[500, 50],minval=-0.05, maxval=0.05))
Blt2 = tf.Variable(tf.zeros([50]))
B2 = tf.Variable(tf.zeros([50]))
W3 = tf.Variable(tf.random_uniform(shape=[50, 10],minval=-0.05, maxval=0.05))
Blt3 = tf.Variable(tf.zeros([10]))
B3 = tf.Variable(tf.zeros([10]))


In [14]:
# define the model architecture as encoder in ladder: 

#x = my_norm(x, "initial_z_trafo")

hn_lt_1 = tf.matmul(x, W1) + Blt1 
hn_bn_1 = my_fc_bn(Ylogits=hn_lt_1, offset=B1, scope="bn")
hn_nlt_1 = tf.nn.relu(hn_bn_1)

hn_lt_2 = tf.matmul(hn_nlt_1, W2) + Blt2
hn_bn_2 = my_fc_bn(Ylogits=hn_lt_2, offset=B2, scope="bn")
hn_nlt_2 = tf.nn.relu(hn_bn_2)

hn_lt_3 = tf.matmul(hn_nlt_2, W3) + Blt3
hn_bn_3 = hn_lt_3 #my_fc_bn(Ylogits=hn_lt_3, offset=B3, scope="bn")
out = tf.nn.softmax(hn_bn_3)  # TODO : IS THIS ERROR PRONE? 

In [15]:
########################################################################
# supervised loss
############################################################################

# cross-entropy loss function (= -sum(Y_i * log(Yi)) ), 
# TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
# problems with log(0) which is NaN

with tf.name_scope("loss_supervised"):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=hn_bn_3, labels=y_true)
    # loss from supervised learning:
    loss_supervised = tf.reduce_mean(cross_entropy) 

In [16]:
with tf.name_scope("initialize"):
    init_op = tf.global_variables_initializer() 

In [17]:
# train by using  SGD Optimizer
with tf.name_scope("train_step"):
    #train_op = tf.train.AdamOptimizer(0.001).minimize(loss_supervised)
    train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss_supervised)

In [18]:
'''
## test dimensions
#init_op = tf.global_variables_initializer() 
# run the graph
sess = tf.Session()
sess.run(init_op) #initialization on the concrete realization of the graph
'''


'\n## test dimensions\n#init_op = tf.global_variables_initializer() \n# run the graph\nsess = tf.Session()\nsess.run(init_op) #initialization on the concrete realization of the graph\n'

In [19]:

'''
loss_, _, out_= sess.run(
    feed_dict={x:x_train[0:5], y_true:convertToOneHot(y_train[0:5], 10)},
    fetches=(loss_supervised, train_op, out)) 
'''

'\nloss_, _, out_= sess.run(\n    feed_dict={x:x_train[0:5], y_true:convertToOneHot(y_train[0:5], 10)},\n    fetches=(loss_supervised, train_op, out)) \n'

In [20]:
#We want to visualize the development of the following variables in tensorboard:
for v in tf.trainable_variables():
    print("Adding ", v.name)
    tf.summary.histogram(v.name, v)

Adding  Variable:0
INFO:tensorflow:Summary name Variable:0 is illegal; using Variable_0 instead.
Adding  Variable_1:0
INFO:tensorflow:Summary name Variable_1:0 is illegal; using Variable_1_0 instead.
Adding  Variable_2:0
INFO:tensorflow:Summary name Variable_2:0 is illegal; using Variable_2_0 instead.
Adding  Variable_3:0
INFO:tensorflow:Summary name Variable_3:0 is illegal; using Variable_3_0 instead.
Adding  Variable_4:0
INFO:tensorflow:Summary name Variable_4:0 is illegal; using Variable_4_0 instead.
Adding  Variable_5:0
INFO:tensorflow:Summary name Variable_5:0 is illegal; using Variable_5_0 instead.
Adding  Variable_6:0
INFO:tensorflow:Summary name Variable_6:0 is illegal; using Variable_6_0 instead.
Adding  Variable_7:0
INFO:tensorflow:Summary name Variable_7:0 is illegal; using Variable_7_0 instead.
Adding  Variable_8:0
INFO:tensorflow:Summary name Variable_8:0 is illegal; using Variable_8_0 instead.


In [21]:
# We want to visualize the development of the loss in tensorboard

tf.summary.scalar("loss_supervised", loss_supervised)

<tf.Tensor 'loss_supervised_1:0' shape=() dtype=string>

In [22]:
! rm -rf /tmp/ladder

In [23]:
! mkdir /tmp/ladder

In [24]:
! ls /tmp/ladder

In [25]:
# collect all summaries for tensorboard and define the directory for saved summary files 

merged_summary_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter("/tmp/ladder", tf.get_default_graph())

In [26]:
sess = tf.Session() 
sess.run(init_op)

In [27]:
# check the shape of the feeds:
#x = tf.placeholder(tf.float32, shape=[None, 784], name='x_data')
#y_true = tf.placeholder(tf.float32, shape=[None, 10], name='y_data')
print("x_train.shape:", x_train.shape)  
print("convertToOneHot(y_train, 10).shape:", convertToOneHot(y_train, 10).shape)  #

x_train.shape: (3000, 784)
convertToOneHot(y_train, 10).shape: (3000, 10)


In [28]:
vals = []
for i in range(100):
    idx = np.random.permutation(len(x_train))[0:128] #Easy minibatch of size 128
    loss_, _, res_ = sess.run((loss_supervised, train_op, out), 
                              feed_dict={x:x_train[idx], y_true:convertToOneHot(y_train[idx], 10)})
    if (i % 1 == 0):#50
        acc = np.average(np.argmax(res_, axis = 1) == y_train[idx])
        # Get the results for the validation results 
        loss_v, res_val, summary_ = sess.run([loss_supervised, out, merged_summary_op], 
                                                          feed_dict={x:x_test, 
                                                                     y_true:convertToOneHot(y_test, 10)})
        summary_writer.add_summary(summary_, i)
        acc_v = np.average(np.argmax(res_val, axis = 1) == y_test)
        vals.append([loss_, acc, loss_v, acc_v])
        print("{} Training: loss {} acc {} Validation: loss {} acc {}".format(i, loss_, acc, loss_v, acc_v))
 

0 Training: loss 2.3339920043945312 acc 0.0703125 Validation: loss 2.3193774223327637 acc 0.08
1 Training: loss 2.3128790855407715 acc 0.1015625 Validation: loss 2.3134565353393555 acc 0.086
2 Training: loss 2.313842535018921 acc 0.0859375 Validation: loss 2.3077828884124756 acc 0.092
3 Training: loss 2.328007221221924 acc 0.0625 Validation: loss 2.3025190830230713 acc 0.098
4 Training: loss 2.3062968254089355 acc 0.09375 Validation: loss 2.297375440597534 acc 0.104
5 Training: loss 2.2976977825164795 acc 0.1015625 Validation: loss 2.2919349670410156 acc 0.118
6 Training: loss 2.3095879554748535 acc 0.078125 Validation: loss 2.2874443531036377 acc 0.126
7 Training: loss 2.2870492935180664 acc 0.125 Validation: loss 2.282801628112793 acc 0.134
8 Training: loss 2.2657928466796875 acc 0.1796875 Validation: loss 2.277101516723633 acc 0.152
9 Training: loss 2.2916653156280518 acc 0.125 Validation: loss 2.2719051837921143 acc 0.166
10 Training: loss 2.277186155319214 acc 0.1328125 Validation

In [29]:
# compare to directory in tf.summary.FileWriter
#! tensorboard --logdir /tmp/ladder/
# check docker call and go to http://srv-lab-t-697:8711