## MNIST exercise (handwritten printed digits recognition tutorial) ##
**Goal: Introduction to Keras, convolution feature maps, and features**

**Exercise:**
    
1. Review the steps of the code in this notebook
2. Look for the model.Sequential statement 
   and fill in the paramaters for the first Conv2D layer: <br>
tf.keras.layers.Conv2D(
    filters,
    kernel_size,
    strides=(1, 1),
    activation=..... 
    )
<br>  use 16 for number of filters
<br>  use 3x3 for kernel size
<br>  use relu for activation

3. run the notebook, observe the images of filter weights and activations (at end)
4. Try changing the filter size for the first convolution layer to something large (like 9x9 or 16x16)
5. compare the filters and activation to the 3x3 filter size

<br>
Question to consider: for 10 digits what is min number of filters needed?



In [None]:

# ----------- IMPORT STATEMENTS ---------------
import numpy as np
np.random.seed(1)  # for reproducibility
 
from tensorflow import keras

if 1:
 from tensorflow.keras.models import Sequential        #the standard stack of layers models
 from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten   #core layer  functions
 from tensorflow.keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D     #convolution layer functions
 from tensorflow.keras import optimizers                             #For training 

# Load the TensorBoard notebook extension
%load_ext tensorboard

import tensorflow as tf
import datetime, os

#---------------------------------------------
print('import done')

In [None]:
#Load MNIST data from Keras datasets
(X_train, Y_train), (X_test, Y_test) = tf.keras.datasets.mnist.load_data()

print('data loaded:'+str(X_train.shape))

In [None]:
X_train=X_train[0:1000,]  #only need smaller subset to get good results
Y_train=Y_train[0:1000,]

print('train shapes: \n')
print(X_train.shape)     #review the dimensions Note python3 uses print(X..) python 2 uses print X...
print(Y_train.shape)
print('img load done')


In [None]:
# --------- Reshape input data ------------
#  b/c Keras expects N-3D images (ie 4D matrix)
X_train = X_train[:,:,:,np.newaxis]
X_test  = X_test[:,:,:,np.newaxis]

print('added dimension')

In [None]:
#convert and put into 0-1 range
X_train  = X_train.astype('float32')
X_test   = X_test.astype('float32')

#Scale 0 to 1  - or should we not scale
X_train /= 255
X_test  /= 255

# Convert 1-dimensional class arrays to 10-dimensional class matrices
Y_train = keras.utils.to_categorical(Y_train, 10)
Y_test  = keras.utils.to_categorical(Y_test,  10)

# ------------- End loading and preparing data --------------
#To confirm, we can print X_train's dimensions again:
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)



In [None]:
# --------------Set up Model ---------------------
mymodel = keras.models.Sequential()
mymodel.add(keras.layers.Convolution2D(___, 
                                       (___, ___),
                                       strides=1,  
                                       data_format="channels_last",
                                       activation='_____', 
                                       input_shape=(28,28,1))) 
#  <<<<<<<<<------EXERCISE fill in blanks, 
 
print('modeldef and first conv layer done')


In [None]:
#-----------------Now add more Convolution layers
mymodel.add(Convolution2D(16, (3, 3), strides=1, data_format="channels_last", activation='relu'))
mymodel.add(MaxPooling2D(pool_size=(2,2),strides=2,data_format="channels_last")) #get Max over 2D region,and slide
mymodel.add(Flatten())            #reorganize 2DxFilters output into 1D
print('added more layers')

#----------------Now add final classification layers
mymodel.add(Dense(32, activation='relu'))  #enter number of hidden units (no good rule, but start with ~ num of previous output) 
mymodel.add(Dense(10, activation='softmax'))
print('assemble model done')

In [None]:
# --------- Now assemble (ie compile TensorFlow commands) and run -----
mymodel.compile(loss='categorical_crossentropy',
               optimizer='sgd',
               metrics=['accuracy'])
print('compiled')

In [None]:
mymodel.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
myES_function = EarlyStopping(monitor='val_loss', mode='min', patience=5) #patience before stopping
   
#------------ Now Run Training
fit_history=mymodel.fit(X_train, Y_train, #validation_split=0.20,
          validation_data=(X_test,Y_test),
          batch_size=32, epochs=20, verbose=1,callbacks=[myES_function])

In [None]:
!nvidia-smi
tf.config.list_physical_devices()

In [None]:
import matplotlib.pyplot as plt      #These provide matlab type of plotting functions
import matplotlib.image as mpimg
%matplotlib inline                   

# list all data in history and print out performance
print(fit_history.history.keys())

# summarize history for accuracy
plt.figure()
plt.axis([0 ,10, 0, 1])
plt.plot(fit_history.history['accuracy'])
plt.plot(fit_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val_test'], loc='upper left')
plt.show()

In [None]:
#

In [None]:
#To view a sample image
import matplotlib.pyplot as plt      #These provide matlab type of plotting functions
import matplotlib.image as mpimg

tmpimg=np.squeeze(X_train[0,:,:,:]).reshape((28,28))
plt.imshow(tmpimg,'gray')   

In [None]:
# ------------ GET WEIGHTS From Convolution Layer and make mosaic image 
Wlist   =mymodel.layers[0].get_weights()      
W3D     =np.squeeze(Wlist[0])
print("W3D shape Wlist[0]:"+str(W3D.shape))
W3Dchan =W3D.swapaxes(1,2).swapaxes(0,1)  #get the channels as 1st dimension;

#plot mosaic of filters of 
ncol =4
nrow =np.ceil(16/ncol)
plt.figure()
for i in range(W3Dchan.shape[0]):
   plt.subplot(nrow,ncol,i+1)
   plt.imshow(W3Dchan[i],'gray')
   plt.axis('off')

plt.show()
print('done plotting weights mosaic')

In [None]:
#

In [None]:
#  ---------------- NOW Visualize the activations for the first training example --------
#   1. gather activations from the model layers
# -------------------------------------------------------------------------
import numpy as np

layer_outputs     = [layer.output for layer in mymodel.layers[:]]
my_model_actvtns  = keras.models.Model(inputs=mymodel.input, outputs=layer_outputs)
x                 = np.expand_dims(X_train[0],0)           #set up a 4D input of 1 image training set 
my_actvtns_output = my_model_actvtns.predict(x)   #for each image get predictions/activatns

print(str(len(my_actvtns_output))+ ' layers with output activations')



In [None]:
#

In [None]:
# 2.  Now output a mosaic of layer 1
layeroutput3D      = np.squeeze(my_actvtns_output[0]) #<<---- -try different layer output     
ncol =4
nrow =np.ceil(16/ncol)
plt.figure()
for i in range(layeroutput3D.shape[2]):  
   plt.subplot(nrow,ncol,i+1)
   plt.imshow(layeroutput3D[:,:,i],'gray')
   plt.axis('off')
#plt.savefig("test.png", bbox_inches='tight')
plt.show()
print('done plotting layer1 activation output mosaic')
