# Street View House Numbers - Evaluation on Test dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import svhn
import graphics
import keras_utils
from keras.utils import np_utils
import keras
from keras import backend as K

%matplotlib inline

max_digits = 7
image_size = (54,128)

# print the keras version used
import keras
print "Keras version : {}".format(keras.__version__)

## Load test dataset
Here we load the sample points as individual prediction targets. We will not flatten the data like we did during training, since we have to predict the complete sequence of digits in an image together.

In [None]:
# load the data file (takes time)
rawdata = svhn.read_process_h5('../inputs/test/digitStruct.mat')

In [None]:
x = im - np.mean(im, axis=(2,0,1), keepdims=True)

In [None]:
# extract resized images, counts and label sequences for each sample
def generateTestData(data, n=100):
    Ximg = []
    ycount = []
    ylabel = []
    for datapoint in np.random.choice(data, size=n, replace=False):
        img,rawsize = svhn.createImageData(datapoint, image_size, '../inputs/test/')
        Ximg.append(img)
        ycount.append(datapoint['length'])
        ylabel.append(datapoint['labels'])
        
    ylabel = [[0 if y==10 else int(y) for y in ys] for ys in ylabel]
    return np.array(Ximg), np.array(ycount), np.array(ylabel)

def standardize(img):
    s = img - np.mean(img, axis=(2,0,1), keepdims=True)
    s /= np.std(s, axis=(2,0,1), keepdims=True)
    return s
    
# change to 13068 to test on full test dataset
Ximg, ycount, ylabel = generateTestData(rawdata, 5)
Xs = np.array([standardize(x) for x in Ximg])

## Load models
Our training graph is composed of macro models - vision, counter and label detector. While it was suitable for training on flattened data, we will need to organize the models differently to generate sequence predictions.
Extract the models from parent graph.

In [None]:
model_yaml = open('../checkpoints/model.yaml','r')
model = keras.models.model_from_yaml(model_yaml.read())
model_yaml.close()
model.load_weights('../checkpoints/model.hdf5')

# enumerate the layers of main graph
for i,layer in zip(range(len(model.layers)), model.layers):
    print "layer {} : {}".format(i,layer.name)

In [None]:
# extract the individual models from training graph
vision = model.layers[1]
counter = model.layers[3]
detector = model.layers[4]

## Prediction
For prediction, we will first generate the intermediate output h, from vision model. We will then pass it to counter first. Then the detector will be called for each sample with all indices in one go. 

The crucial part here is that we want to calculate the intermediate output h only once to save on computations.

In [None]:
h = vision.predict(Xs)

In [None]:
ycount_ = counter.predict(h)
ycount_ = np.argmax(ycount_, axis=1)

In [None]:
ylabel_ = []
for i in range(len(ycount_)):
    # generate range for each count
    indices = np.arange(ycount_[i])
    # one hot encoding for each index
    indices = np_utils.to_categorical(indices, max_digits)
    # tile h to match shape of indices matrix
    hs = np.tile(h[i], (ycount_[i],1))
    
    # predict labels for the sample
    sample_seq = detector.predict([hs, indices])
    sample_seq = np.argmax(sample_seq,1)
    ylabel_.append(sample_seq)

## Evaluation
We can evaluate the performance of both counter and label detector to get a better insight on the fit. In the end, we will evaluate the performance of the whole system. We will consider a predicted sequence to be correct only if all labels have been identified successfully, as any wrong classification can result in a totally different house number.

### Counter Metrics

In [None]:
from sklearn.metrics import classification_report
print classification_report(ycount, ycount_)

### Detector Metrics
Here we need to presume that counter has made a perfect prediction. If the counter itself has predicted wrong count, then it shouldn't be counted as detector's failure. So we'll evaluate the detector's performance for digits in a sequence till an index such that index is the minimum of true count value and predicted count value.

In [None]:
ycmin = np.minimum(ycount, ycount_)

# extract labels from ylabel and ylabel_ using ycmin
ylabel_det = np.array([ylabelrow[0:ycminc] for ylabelrow,ycminc in zip(ylabel, ycmin)])
ylabel_det = np.concatenate(ylabel_det)

ylabel_det_= np.array([ylabelrow[0:ycminc] for ylabelrow,ycminc in zip(ylabel_, ycmin)])
ylabel_det_= np.concatenate(ylabel_det_)

In [None]:
print classification_report(ylabel_det, ylabel_det_)

### Overall Sequence Prediction Accuracy

In [None]:
def matchSequence(seq, seq_):
    return [np.array_equal(seqi, seqi_) for seqi, seqi_ in zip(seq, seq_)]
seqmatch = matchSequence(ylabel, ylabel_)
print "Sequence prediction accuracy : {}".format(np.average(seqmatch))

## Display samples
Show some prediction results. The value in brackets is the true value.

In [None]:
graphics.displaySamples(Ximg, ycounttrue=ycount, ycountpred=ycount_, ylabels=ylabel, ylabelspred=ylabel_)

## Visualize Convolutions
We can visualize the convolutions in vision model to see how the image is being processed. Change **convlayer** to see the convolutions generated by the desired layer.

In [None]:
convlayer = 1
graphics.showCNNConv(vision, convlayer, Ximg[2])

In [None]:
convlayer = 2
graphics.showCNNConv(vision, convlayer, Ximg[2])