<a href="https://colab.research.google.com/github/davidabelin/capstone_project_udacity_mle/blob/main/digits_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Handwritten Arithmetic

In [None]:
#@title Imports

#%matplotlib inline
import matplotlib.pyplot as plt
#import matplotlib.image as Image
#import seaborn as sbn
 
#import zipfile
#import math
import random as rd
import numpy as np
import pandas as pd
#import os, signal
from IPython.display import clear_output
 
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Model
#from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, InputLayer, Input
from tensorflow.keras.optimizers import RMSprop, Adam

In [None]:
#@ title VARIABLES
### Load raw MNIST data as tuples of numpy arrays
### (example images, labels)
traintuple, testuple = tf.keras.datasets.mnist.load_data()

### Number of images to generate for each category
N_val = 5000        # used while training
N_test = 10000      # used for evaluation
N_train = 100000    # used for training

### Training hyperparameters, same for all models
batch_size = 500
validation_batch_size = 100
epochs = 5
learning_rate = 0.005
input_layer = layers.Input(shape=(28, 56, 1))

image_size = (28,56) # shape of target image
model_kinds = ["miniNN", "CNN", "DNN", "comboNN", "ocNN"]
noise_levels = ["no", "low", "high", "var"]
training_keys = []
for nc in noise_levels:
    for k in model_kinds:
        training_keys.append((k, nc))
### Translate noise level to description
noise2condition = {nc:d for (nc, d) in zip(noise_levels,
                                        ["No noise", "Low noise", "High noise", "Variable noise"])} 

# Construct the Input Data

In [None]:
#@ title Define Functions
def scale_array(arr):
    '''Scale an array of numbers.
        Params: numpy array of values to be scaled
        Returns: the same array with values scaled to (0,1)
    '''
    max = np.max(arr)
    min = np.min(arr)
    scaled = (arr - min)/(max - min)
    return scaled
print("Loaded function scale_array(arr)")

def get_noise(image, noise_condition="var"):
    ''' Add a normal disitribution of noise to an image
        Params: image: numpy array with values (0,1)
                noise_factor: one of "no", "low", "high", "var"
        Returns: noisy image array re-normalized to (0,1)
    '''
    cond2num = {"no":0, 
                "low":0.3, 
                "high":1.3, 
                "var":rd.random()} # Different value each call
    noise_factor = np.std(image)*cond2num[noise_condition]
    noise = np.random.normal(np.mean(image), 
                             noise_factor, 
                             size=image.shape)
    ## TODO: Unfortunately have to call scale_array() for second time
    # Renormalize combined image to (0,1)
    return scale_array(noise + image)
print("Loaded function get_noise(image, noise_condition)")

def doubleDigits(datatuple, nc="no"):
    ''' Merge two single-digit images into one double-digit image.
        Params: images: np.array with shape (N,28,28), values 0 to 255
                answers: np.array with shape (N,), values 0 to 9
                nf: "noise factor" (float) is multiples/fractions of image std
        Returns: noisy double-digit image as a numpy array with shape (28,56),
                with values normed to (0.0, 1.0); 
                and the corresponding label, with values now 0 to 99
    '''
    (images,answers)=datatuple  # or x, y
    
    # Randomly select left and right single digit images
    # with values 0 to 9, from the same raw training or test
    # data set
    left_index = rd.randrange(0, len(answers))   
    right_index = rd.randrange(0, len(answers))
    # Calculate double digit label 0 to 99
    answer = answers[left_index]*10 + answers[right_index]
    
    # Have to scale them here because they have
    # different distributions of pixel values
    left_scaled = scale_array(images[left_index])
    right_scaled = scale_array(images[right_index])

    # Make background array with shape (28,56)
    image = np.zeros(image_size)
    # Group digits closer to middle of new image
    width = image_size[1]
    half_width = width//2  
    # Shift left digit to the right
    image[:,8:half_width+4] = left_scaled[:,4:half_width]
    # Shift right digit to the left
    image[:,half_width-4:width-8] += right_scaled[:,0:half_width-4]
    # In case some bright pixels overlap
    image = image.clip(0,1)
    # Add noise to the new double digit image
    ####### TODO: this will scale it again for second time to [0,1] ###
    # Call get_noise() to overlay a Normal distribution
    # of random pixel values centered on the mean of pixel values
    # in the image and with "noise_factor" as the width of the
    # distribution, from 0 (no distortion) to +inf (uniform distribution),
    # in multiples or fractions of the standard deviation of the
    # pixel values in image, and rescale pixel values to (0,1)
    image = get_noise(image, nc)  
    return image, int(answer)              
print("Loaded function doubleDigits(datatuple, nc=no)")

def getDoubleDigits(datatuple, how_many=1, nc="no"):
    ''' Aggregate a given number of two-digit images, with or without noise
        Params: image array of size (N, (image size)),
                answers array of size (N,)
        Returns: a single 28x56 double-digit image and
                 the corresponding array of int labels
    '''
    yy = np.zeros((how_many,),dtype=int)
    xx = np.zeros((how_many, image_size[0], image_size[1]))
    for i in range(how_many):
        dd, ans = doubleDigits(datatuple, nc)
        yy[i] = ans
        xx[i] = dd
    return (xx, yy)
print("Loaded function getDoubleDigits(datatuple,how_many=1,nc=no)")

In [None]:
def plotLearningCurves(history):
    acc = history['acc']
    val_acc = history['val_acc']
    epochs = range(len(acc))
    plt.plot(epochs, acc)
    plt.plot(epochs, val_acc)
    plt.title('Training and validation accuracy')
print("Loaded function plotLearningCurves(history)")

def show_layers(model, output_model):
    # Take random image from the training set.
    rindex = rd.randrange(y_test.shape[0])
    ans = y_test[rindex]
    img = x_test[rindex]
    img = img.reshape((1,) + img.shape)    # np shape (1, 28, 28, 1)
    gue = model.predict(img)
    print("Answer:",ans, "\tGuess:",np.argmax(gue))
    plt.figure(figsize=(3,3))
    plt.imshow(img[0,:,:,0], cmap="binary_r")                 # np shape (28, 28)

    layer_output_maps = output_model.predict( img )
    layer_names = [layer.name for layer in model.layers[1:]]
    for layer_name, layer_map in zip(layer_names, layer_output_maps):
        if len(layer_map.shape) == 4:# and not 'max_pooling' in layer_name:
            n_maps = layer_map.shape[-1]  # number of maps
            if n_maps > 10:
                n_maps = 10
            # Map has shape (1, rows, columns, n_features)
            rows = layer_map.shape[1]
            cols = layer_map.shape[2]
            image_grid = np.zeros((rows, cols * n_maps))
            
            for i in range(n_maps):
                x = layer_map[0, :, :, i]
                x *= 255.0
                image_grid[:, i * cols : (i + 1) * cols] = x
                image_grid[:,i*cols] = 255.
                image_grid[:,i*cols+1] = 0.
                
            scale = 2.           
            plt.figure(figsize=(scale * n_maps, scale))
            plt.title(layer_name)
            plt.grid(False)
            plt.imshow(image_grid, cmap='gray')
print("Loaded function show_layers(model, output_model)")

In [None]:
def guessing(model,n=1,return_image=False):
    answers, guesses, pA, pG = [],[],[],[]
    for count in range(n):
        rindex = rd.randrange(y_test.shape[0])
        ans = y_test[rindex]
        img = x_test[rindex]
        img = img.reshape((1,) + img.shape)    # eg. 1x50x50x1
        guess_set = model.predict(img).flatten()
        guess = np.argmax(guess_set)
        answers += [ans]
        guesses += [guess]
        pG += [guess_set[guess]]
        pA += [guess_set[ans]]

        print("Answer",ans,"\tGuess",guess, "\tp(A)",round(pA[count],2),"\tp(G)",round(pG[count],2))
        if count%10 == 0:
            print ('Processing...',count,"...")
            clear_output(wait=True)

    if return_image:
        return answers, guesses, pA, pG, img
    else:
        return answers, guesses, pA, pG
print("Loaded function guessing(model,n=1,return_image=False)")

def get_guesses(m,n=1):
    results = pd.DataFrame(columns=['Answer','Guess','P(A)','P(G)'])
    results['Answer'],
    results['Guess'],
    results['P(A)'],
    results['P(G)'] = guessing(n, m) 
    return results
print("Loaded function get_guesses(m,n=1)")


In [None]:
### Check a random sampling
nc = rd.choice(noise_levels)
x, y = getDoubleDigits(traintuple, nc=nc, how_many=9)
print('Answers:',y,"\nNoise:",nc) 

f,a = plt.subplots(3,3,True,True)
f.set_size_inches(15,7)
a=a.reshape(9,)
for i in range(9):  # 0 to 7
    #a = f.add_subplot(3,3,i+1) 
    a[i].imshow(x[i])

In [None]:
######### GENERATE DD TRAINING DATA FOR ALL NOISE CONDITIONS ###########
train_data = {} # {noise level : tuple of training data (x,y)}
val_data = {}   # {noise level : val data (x,y)}
test_data = {}  # {noise level : test data (x,y)}

for nc in noise_levels:
    ####### Make a set of training, validation, and test images
    ####### One set each for each noise level 
    print ("Noise level:", noise2condition[nc])
    x_train, y_train = getDoubleDigits(traintuple, N_train, nc=nc)
    print("Made",N_train,"new double-digit images to train on.")
    # Test and validation sets constructed from MNIST test images
    x_val, y_val = getDoubleDigits(testuple, N_val, nc=nc)
    print("Made",N_val,"new double-digit images to validate on.")
    x_test, y_test = getDoubleDigits(testuple, N_test, nc=nc)
    print("Made",N_test,"new double-digit images to test on.")

    ######  Add a black-and-white channels dimension
    x_train = x_train[..., np.newaxis].astype("float32")
    x_val = x_val[..., np.newaxis].astype("float32")
    x_test = x_test[..., np.newaxis].astype("float32")

    train_data.update({nc:(x_train, y_train)})
    val_data.update({nc:(x_val, y_val)}) 
    test_data.update({nc:(x_test, y_test)})

# Build models

In [None]:
#########################
#@title ORIGINAL Build a model for each noise condition

def original_buildModel(key):
    '''Builds a model of each kind, for each noise level
        params: a tuple of "model kind" and "noise level"
        returns: a model of the specified kind,
                 dedicated to noise of the specified level
    '''
    (model_kind, noise) = key
    ## Same input for all layers
    input_layer = layers.Input(shape=(image_size[0], image_size[1], 1))
        
    if "miniNN" in model_kind:
        ########### Build miniNN as "baseline" model 
        x = layers.Flatten()(input_layer)
        output_layer = layers.Dense(100, activation='softmax')(x)
        miniNN = Model(input_layer, output_layer, name="miniNN_"+noise)
        print ("built", miniNN.name)
        return miniNN

    elif "CNN" in model_kind:
        ########### Build CNN
        x = layers.Conv2D(20, 2, padding='same', activation='relu')(input_layer)
        x = layers.AveragePooling2D(2)(x)
        x = layers.Conv2D(30, 3, activation='relu')(x) 
        x = layers.MaxPooling2D(2)(x)
        x = layers.Flatten()(x)
        output_layer = layers.Dense(100, activation='softmax')(x)
        CNN = Model(input_layer, output_layer, name="CNN_"+noise)
        print ("built", CNN.name)
        return CNN
    
    elif "DNN" in model_kind:
        ########### Build DNN
        x = layers.Flatten()(input_layer)
        x = layers.Dense(1000, activation='relu')(x)
        x = layers.Dropout(0.3)(x)
        x = layers.Dense(500, activation='relu')(x) 
        x = layers.Dropout(0.1)(x)
        output_layer = layers.Dense(100, activation='softmax')(x)
        DNN = Model(input_layer, output_layer, name="DNN_"+noise)
        print ("built", DNN.name)
        return DNN

    elif "comboNN" in model_kind:
        ########### Build comboNN, hybrid CNN/DNN: 
        x = layers.Conv2D(20, 3, activation='relu')(input_layer)
        x = layers.AveragePooling2D(2)(x)
        x = layers.Flatten()(x)
        x = layers.Dense(200, activation='relu')(x)
        x = layers.Dropout(0.2)(x)
        output_layer = layers.Dense(100, activation='softmax')(x)
        comboNN = Model(input_layer, output_layer, name="comboNN_"+noise)
        print ("built", comboNN.name)
        return comboNN

    elif "ocNN" in model_kind:
        ########### Build "overcomplicated" model ocNN ########### 
        x = layers.Flatten()(input_layer)
        x = layers.Dense(1000, activation='relu')(x)
        x = layers.Dropout(0.2)(x)

        x1 = layers.Dense(600, activation='relu')(x) 
        x1 = layers.Dropout(0.3)(x1)

        x2 = layers.Dense(200, activation='relu')(x) 
        x2 = layers.Dropout(0.1)(x2)

        y = layers.Conv2D(20, 2, padding='same', activation='relu')(input_layer)
        y = layers.MaxPooling2D(2)(y)

        y1 = layers.Conv2D(20, 3, activation='relu')(y) 
        y1 = layers.AveragePooling2D(2)(y1)
        y1 = layers.Flatten()(y1)
        y1 = layers.Dropout(0.2)(y1)

        y2 = layers.Conv2D(20, 2, activation='relu')(y) 
        y2 = layers.Conv2D(20, 2, activation='relu')(y2)
        y2 = layers.MaxPooling2D(2)(y2)
        y2 = layers.Flatten()(y2)
        y2 = layers.Dropout(0.1)(y2)

        z1 = layers.Concatenate()([x1,y1])
        z1 = layers.Dropout(0.3)(z1)

        z2 = layers.Concatenate()([x2,y2])
        z2 = layers.Dropout(0.1)(z2)

        z = layers.Concatenate()([z1,z2])
        z = layers.Dense(1000,activation="relu")(z)
        z = layers.Dropout(0.3)(z)

        logits_layer = layers.Dense(100, activation='softmax')(z) 
        ocNN = Model(input_layer, logits_layer, name="ocNN_"+noise)
        print ("built", ocNN.name)
        return ocNN
    
    else:
        print ("Could not find that kind of model.")
        return None

In [None]:
def buildModel(key):
    '''Builds a model of each kind, for each noise level
        params: a tuple of "model kind" and "noise level"
        returns: a model of the specified kind,
                 dedicated to noise of the specified level
    '''
    (model_kind, noise) = key
    ## Same input for all layers
    input_layer = layers.Input(shape=(image_size[0], image_size[1], 1))
        
    if "miniNN" in model_kind:
        ########### Build miniNN as "baseline" model 
        x = layers.Flatten()(input_layer)
        output_layer = layers.Dense(100, activation='softmax')(x)
        miniNN = Model(input_layer, output_layer, name="miniNN_"+noise)
        print ("built", miniNN.name)
        return miniNN

    elif "CNN" in model_kind:
        ########### Build CNN
        x = layers.Conv2D(20, 3, activation='relu')(input_layer)#, padding='same'
        x = layers.MaxPooling2D(2)(x)
        x = layers.Conv2D(30, 2, activation='relu')(x) 
        x = layers.AveragePooling2D(2)(x)
        x = layers.Flatten()(x)
        output_layer = layers.Dense(100, activation='softmax')(x)
        CNN = Model(input_layer, output_layer, name="CNN_"+noise)
        print ("built", CNN.name)
        return CNN
    
    elif "DNN" in model_kind:
        ########### Build DNN
        x = layers.Flatten()(input_layer)
        x1 = layers.Dense(400, activation='relu')(x)
        x1 = layers.Dropout(0.2)(x1)

        x2 = layers.Dense(400, activation='relu')(x) 
        x2 = layers.Dropout(0.2)(x2)

        x3 = layers.Concatenate()([x1,x2])
        x3 = layers.Dense(400, activation='relu')(x3)
        x3 = layers.Dropout(0.2)(x3) 

        output_layer = layers.Dense(100, activation='softmax')(x3)
        DNN = Model(input_layer, output_layer, name="DNN_"+noise)
        print ("built", DNN.name)
        return DNN

    elif "comboNN" in model_kind:
        ########### Build comboNN, hybrid CNN/DNN: 
        x = layers.Conv2D(20, 3, activation='relu')(input_layer)
        x = layers.AveragePooling2D(2)(x)
        x = layers.Flatten()(x)
        x = layers.Dense(500, activation='relu')(x)
        x = layers.Dropout(0.2)(x)
        output_layer = layers.Dense(100, activation='softmax')(x)
        comboNN = Model(input_layer, output_layer, name="comboNN_"+noise)
        print ("built", comboNN.name)
        return comboNN

    elif "ocNN" in model_kind:
        ########### Build "overcomplicated" model ocNN ########### 
        x = layers.Flatten()(input_layer)
        x = layers.Dense(1000, activation='relu')(x)
        x = layers.Dropout(0.3)(x)

        x1 = layers.Dense(600, activation='relu')(x) 
        x1 = layers.Dropout(0.2)(x1)

        x2 = layers.Dense(200, activation='relu')(x) 
        x2 = layers.Dropout(0.1)(x2)

        y = layers.Conv2D(20, 2, padding='same', activation='relu')(input_layer)
        y = layers.MaxPooling2D(2)(y)

        y1 = layers.Conv2D(20, 3, activation='relu')(y) 
        y1 = layers.AveragePooling2D(2)(y1)
        y1 = layers.Flatten()(y1)
        y1 = layers.Dropout(0.2)(y1)

        y2 = layers.Conv2D(20, 2, activation='relu')(y) 
        y2 = layers.Conv2D(20, 2, activation='relu')(y2)
        y2 = layers.MaxPooling2D(2)(y2)
        y2 = layers.Flatten()(y2)
        y2 = layers.Dropout(0.1)(y2)

        z1 = layers.Concatenate()([x1,y1])
        z1 = layers.Dropout(0.3)(z1)

        z2 = layers.Concatenate()([x2,y2])
        z2 = layers.Dropout(0.1)(z2)

        z = layers.Concatenate()([z1,z2])
        z = layers.Dense(1000,activation="relu")(z)
        z = layers.Dropout(0.2)(z)

        logits_layer = layers.Dense(100, activation='softmax')(z) 
        ocNN = Model(input_layer, logits_layer, name="ocNN_"+noise)
        print ("built", ocNN.name)
        return ocNN
    
    else:
        print ("Could not find that kind of model.")
        return None

In [None]:
DNN = buildModel(("DNN","no"))
tf.keras.utils.plot_model(DNN,show_shapes=True)

In [None]:
#@title Compile and Train functions
## Same training loop for all models, with same compiler too
def train(model, traintuple, valtuple, epochs=epochs):
    '''Train a model on the given sets of data
        Params: the given model,
                the train data as a tuple of x,y,
                the test data as a tuple of x,y
        Returns: a dictionary of metric values after each epoch of training
    '''
    (x_train, y_train) = traintuple
    (x_val, y_val) = valtuple

    history = model.fit(x=x_train,
                        y=y_train,
                        batch_size=batch_size,
                        validation_data=valtuple,
                        epochs=epochs,  
                        verbose=1)   
    return history.history
print("loaded function train(model, traintuple, testuple, epochs=4)")

def compile_model(model):    
    model.compile(  loss="sparse_categorical_crossentropy",
                    optimizer=Adam(lr=learning_rate),
                    metrics=['acc'])
    print ("Compiled model", model.name)
print("loaded function compile_model(model)")

In [None]:
#################### Build all 20 models and store in dict get_model
get_model = {}  # Training key --> model
for key in training_keys:  # One training key for each model trained on each noise level.
     model = buildModel(key)  
     get_model.update({key:model}) # One model for each training key

In [None]:
####################### Compile and Train all 20 models
stats = {}
for key in training_keys:    
    nc = key[1]
    model = get_model[key]
    compile_model(model)
    train_stats = train(model, train_data[nc], val_data[nc], epochs=epochs)
    stats.update({key:train_stats})

##Evaluation and Results

In [None]:
## Plot the training accuracy for all 20 models ####
ncols = 5
nrows = 4 #len(training_keys)//ncols
y = len(training_keys)
fig = plt.figure(figsize=(15,15)) 
axarr = fig.subplots(nrows,ncols,sharex=True,sharey=True,)
axarr = axarr.reshape((20,))
for i, tk in enumerate(training_keys):
    ax = axarr[i]#fig.add_subplot(nrows,ncols,i+1)
    ax.set_title(tk)
    #ax.set_xlabel("Epoch")
    ax.set_xlim(0,4)
    #ax.set_ylabel("Accuracy")
    ax.set_ylim(0,1)
    ax.plot(range(epochs),stats[tk]['acc'])
    ax.plot(range(epochs),stats[tk]['val_acc'])

_______________________

In [None]:
############################# Evaluation: ALL MODELS, ALL NOISE LEVELS
results = {}
evalkeys = []
for key in training_keys:
    model = get_model[key]
    print (">>>>>>>>>>>>>>> Key:", key)
    for nl in noise_levels:
        evalkey = (model.name, nl)
        evalkeys += [evalkey]
        x, y = test_data[nl]
        evaluated = model.evaluate( x=x, y=y, verbose=0, batch_size=50) #500
        results.update({evalkey:round(evaluated[1], 4)})
        condition = noise2condition[nl]
        print(model.name,"tested on",condition,":",round(evaluated[1], 4))

In [None]:
#models = model_kinds  #list(set(t[0] for t in training_keys))
rows = len(training_keys)
results_df = pd.DataFrame(columns=["model"]+["trained_on"]+noise_levels, index=range(rows))
for row, (model_kind, train_condition) in enumerate(training_keys):
    results_df.iloc[row]["model"]=model_kind
    results_df.iloc[row]["trained_on"]=noise2condition[train_condition]
    for test_condition in noise_levels:                 #TO DO:
        results_df.iloc[row][test_condition] = results[(model_kind+"_"+train_condition, test_condition)]
results_df = results_df.sort_values("model")
results_df = results_df.sort_values("trained_on")

In [None]:
## Plot results for each model ####
f = plt.figure(figsize=(18,18) )
for i, m in enumerate(model_kinds):
    modax = f.add_subplot(3,2,i+1)
    modax.set(ylim=(0, 1))
    
    r_df = results_df.loc[lambda df: df["model"]==m].sort_values('model')
    r_df.plot("trained_on", ["no","low","high","var"], kind='bar', ax=modax)
    
    plt.title("Model "+m)
f.show()

In [None]:
f = plt.figure(figsize=(18,18))
ax = []
for i in range(4):
    ax += [f.add_subplot(2,2,i+1)]#,label=nl)
for i,nl in enumerate(noise_levels):
    nl = noise2condition[nl]
    r_df = results_df.loc[lambda df: df["trained_on"]==nl].sort_values('model')
    r_df.plot("model", ["no","low","high","var"], kind='bar', 
              ax=ax[i], 
              title="Trained on "+nl, 
              ylabel="Evaluation (accuracy)")