In [None]:
'''
# if working on laptop on local docker:
nvidia-docker run -p 4242:8888 -v ~/dl_cas/:/notebooks -p 6006:6006 -it oduerr/tf_docker:gpu_r
'''

## Imports

In [1]:
# python module imports needed in customized functions:
import numpy as np
import tensorflow as tf
import sys

In [2]:
tf.__version__

'1.4.1'

In [5]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as imgplot

In [3]:
!pip install keras



In [4]:
import keras
#from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Input
#from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
from keras.layers.normalization import BatchNormalization

Using TensorFlow backend.


In [None]:

import pandas as pd
#tf.set_random_seed(1)
#np.random.seed(1)
import sys
tf.__version__, sys.version_info



# Data read-in 

### Load small external MNIST data set when for working local on windows

In [6]:

# upload mnist_4000.pkl.gz which we have used in the DL course to home
# To be compatible with python3 and python2
try:
    import cPickle as pickle
except ImportError:
    import pickle
import gzip

with gzip.open('./../mnist_4000.pkl.gz', 'rb') as f:
    if sys.version_info.major > 2:
        (X,y) = pickle.load(f, encoding='latin1')
    else:
        (X,y) = pickle.load(f)
PIXELS = len(X[0,0,0,:])

# if images are not flatten (like in mnist) we need first to flatten them
# now flatten images for fc ladder

X = X.reshape([4000, 784])
#X = X/255 # is already normalized

print("small data before split X.shape", X.shape)
print("small data before  y.shape", y.shape) 

x_train = X[0:3000]
y_train = y[0:3000]
x_test = X[3000:4000]
y_test = y[3000:4000]


print("small data x_train.shape:", x_train.shape)
print("small data y_train.shape:",y_train.shape)
print("small data x_test.shape:",x_test.shape)
print("small data y_test.shape:",y_test.shape)

num_class= len(np.unique(y))
print("num_class:",num_class)



small data before split X.shape (4000, 784)
small data before  y.shape (4000,)
small data x_train.shape: (3000, 784)
small data y_train.shape: (3000,)
small data x_test.shape: (1000, 784)
small data y_test.shape: (1000,)
num_class: 10


### Or load full MNIST dataset directly from internet

In [7]:
'''
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# if images are not flatten (like in mnist) we need first to flatten them
# now flatten images for fc ladder

x_train = x_train.reshape(-1,784)
x_test = x_test.reshape(-1,784)

print("large data x_train.shape:", x_train.shape)
print("large data y_train.shape:",y_train.shape)
print("large data x_test.shape:",x_test.shape)
print("large data x_test.shape:",y_test.shape)
'''

'\nfrom keras.datasets import mnist\n\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\n\n# if images are not flatten (like in mnist) we need first to flatten them\n# now flatten images for fc ladder\n\nx_train = x_train.reshape(-1,784)\nx_test = x_test.reshape(-1,784)\n\nprint("large data x_train.shape:", x_train.shape)\nprint("large data y_train.shape:",y_train.shape)\nprint("large data x_test.shape:",x_test.shape)\nprint("large data x_test.shape:",y_test.shape)\n'

In [8]:
x_train[:200].shape

(200, 784)

In [9]:
np.mean(x_train[:,200])

-0.0042952602

In [10]:
np.std(x_train[:,200])

0.83271211

# Lets construct a fc NN (784->500->50->10) without noise and unsupervised task to get a benchmark for the loss and accuracy

In [11]:
### Relu 
name = 'relu'
model = Sequential()

model.add(Dense(500, batch_input_shape=(None, 784),
                kernel_initializer='random_uniform',
                bias_initializer='zeros'))
#model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation('relu'))

model.add(Dense(50))
#model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation('relu'))

model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='Adam',
              # optimizer='rmsprop',
              metrics=['accuracy'])

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 500)               392500    
_________________________________________________________________
activation_1 (Activation)    (None, 500)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 50)                25050     
_________________________________________________________________
activation_2 (Activation)    (None, 50)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                510       
Total params: 418,060
Trainable params: 418,060
Non-trainable params: 0
_________________________________________________________________


# Training

In [13]:
def convertToOneHot(vector, num_classes=None):
    result = np.zeros((len(vector), num_classes), dtype='int32')
    result[np.arange(len(vector)), vector] = 1
    return result

In [15]:
tensorboard = keras.callbacks.TensorBoard(
    log_dir='tensorboard/mnist_small/' + name + '/', 
    write_graph=True,
    histogram_freq=1
)

In [16]:
history = model.fit(x_train, 
          convertToOneHot(y_train,10), 
          nb_epoch=100, 
          batch_size=128, 
          callbacks=[tensorboard],
          validation_data=[x_test, convertToOneHot(y_test,10)], verbose=2)





Train on 3000 samples, validate on 1000 samples
Epoch 1/100
 - 2s - loss: 0.8515 - acc: 0.7530 - val_loss: 0.3355 - val_acc: 0.9120
Epoch 2/100
 - 1s - loss: 0.2079 - acc: 0.9433 - val_loss: 0.2614 - val_acc: 0.9260
Epoch 3/100
 - 1s - loss: 0.0978 - acc: 0.9770 - val_loss: 0.2507 - val_acc: 0.9370
Epoch 4/100
 - 1s - loss: 0.0506 - acc: 0.9937 - val_loss: 0.2465 - val_acc: 0.9390
Epoch 5/100
 - 1s - loss: 0.0287 - acc: 0.9977 - val_loss: 0.2454 - val_acc: 0.9420
Epoch 6/100
 - 1s - loss: 0.0184 - acc: 0.9990 - val_loss: 0.2447 - val_acc: 0.9370
Epoch 7/100
 - 1s - loss: 0.0116 - acc: 1.0000 - val_loss: 0.2502 - val_acc: 0.9400
Epoch 8/100
 - 1s - loss: 0.0081 - acc: 1.0000 - val_loss: 0.2506 - val_acc: 0.9430
Epoch 9/100
 - 1s - loss: 0.0062 - acc: 1.0000 - val_loss: 0.2525 - val_acc: 0.9410
Epoch 10/100
 - 1s - loss: 0.0049 - acc: 1.0000 - val_loss: 0.2561 - val_acc: 0.9410
Epoch 11/100
 - 1s - loss: 0.0040 - acc: 1.0000 - val_loss: 0.2585 - val_acc: 0.9410
Epoch 12/100
 - 1s - loss:

Epoch 94/100
 - 1s - loss: 5.1418e-05 - acc: 1.0000 - val_loss: 0.3371 - val_acc: 0.9430
Epoch 95/100
 - 1s - loss: 5.0197e-05 - acc: 1.0000 - val_loss: 0.3376 - val_acc: 0.9440
Epoch 96/100
 - 1s - loss: 4.8948e-05 - acc: 1.0000 - val_loss: 0.3380 - val_acc: 0.9430
Epoch 97/100
 - 1s - loss: 4.7864e-05 - acc: 1.0000 - val_loss: 0.3385 - val_acc: 0.9430
Epoch 98/100
 - 1s - loss: 4.6705e-05 - acc: 1.0000 - val_loss: 0.3388 - val_acc: 0.9430
Epoch 99/100
 - 1s - loss: 4.5595e-05 - acc: 1.0000 - val_loss: 0.3393 - val_acc: 0.9440
Epoch 100/100
 - 1s - loss: 4.4550e-05 - acc: 1.0000 - val_loss: 0.3403 - val_acc: 0.9440
