# Model evaluation

In this notebook we evaluate all trained models. For each of them we compute the modified MSE on test set. Additionally we compute the intersection over the union for each of the three filters (glasses, beard, hat) on test set.

In [1]:
from Models.mobilenet import MobileNetTuned
from Models.mobilenet_v2 import MobileNetV2Tuned
from Models.resnet_50 import ResNetTuned
from Loss.losses import mse_visible_lists
from Filters.glasses import GlassesFilter
from Filters.hat import HatFilter
from Filters.beard import BeardFilter
from Data.data_generator import DataGenerator

import numpy as np
from tqdm.auto import trange
import matplotlib.pyplot as plt

## Load data and filters

We again use a custom data generator to go through the test set. Here we however use the batch size of 1 and then imeediately calculate all metrics for each input/output pair and stack those metrices together.

In [2]:
test_generator = DataGenerator(data_path="Data/test", batch_size=1, shuffle=False)
glasses_filter = GlassesFilter()
hat_filter = HatFilter()
beard_filter = BeardFilter()

## Helper functions

These functions are used to go through the test set and compute the metrices

In [3]:
def bootstrap_ci(x, n_samples=1000, seed=7):
    """
    Function thatm computes the 95% confidence interval for mean value of sequence x
    """
    np.random.seed(seed)
    means = []
    n = len(x)
    
    for _ in range(n_samples):
        sample = np.random.choice(x, size=n, replace=True)
        means.append(np.mean(sample))
        
    means.sort()
    lower_quantile = int(0.025 * n_samples)
    upper_quantile = int(0.975 * n_samples)
    
    return means[lower_quantile-1], means[upper_quantile-1]

def evaluate_model(model):
    mse = []
    glasses_iou = []
    hat_iou = []
    beard_iou = []
    
    for i in trange(len(test_generator)):
        # Obtain image, true keypoints and predicted keypoints
        img, y_true = test_generator[i]
        y_true = y_true[0]
        preds = model.predict(img)
        y_pred = [int(np.round(point)) for point in preds[0]]
        
        # Compute the metrics
        img = img[0]
        mse.append(mse_visible_lists(y_true, y_pred))
        glasses_iou.append(glasses_filter.compute_iou(img, y_true, y_pred))
        hat_iou.append(hat_filter.compute_iou(img, y_true, y_pred))
        beard_iou.append(beard_filter.compute_iou(img, y_true, y_pred))
        
    # Get the average and CI for each metric
    #print(mse)
    mse_avg = np.mean(mse)
    mse_ci = bootstrap_ci(mse)
    #print(glasses_iou)
    g_iou_avg = np.mean(glasses_iou)
    g_iou_ci = bootstrap_ci(glasses_iou)
    #print(hat_iou)
    h_iou_avg = np.mean(hat_iou)
    h_iou_ci = bootstrap_ci(hat_iou)
    #print(beard_iou)
    b_iou_avg = np.mean(beard_iou)
    b_iou_ci = bootstrap_ci(beard_iou)

    # Report the results
    print(f"MSE on test set: {mse_avg}, 95% CI: [{mse_ci[0]}, {mse_ci[1]}]")
    print(f"Average IoU for glasses filter on test set: {g_iou_avg}, 95% CI: [{g_iou_ci[0]}, {g_iou_ci[1]}]")
    print(f"Average IoU for hat filter on test set: {h_iou_avg}, 95% CI: [{h_iou_ci[0]}, {h_iou_ci[1]}]")
    print(f"Average IoU for beard filter on test set: {b_iou_avg}, 95% CI: [{b_iou_ci[0]}, {b_iou_ci[1]}]")

## MobileNet

### Trained using regular MSE

In [4]:
model = MobileNetTuned()
model.load_model("mobilenet_mse.h5")

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 tf.cast (TFOpLambda)        (None, 224, 224, 3)       0         
                                                                 
 tf.math.truediv (TFOpLambda  (None, 224, 224, 3)      0         
 )                                                               
                                                                 
 tf.math.subtract (TFOpLambd  (None, 224, 224, 3)      0         
 a)                                                              
                                                                 
 mobilenet_1.00_224 (Functio  (None, 1024)             3228864   
 nal)                                                            
                                                             

In [5]:
evaluate_model(model)

  0%|          | 0/998 [00:00<?, ?it/s]



MSE on test set: 44.63298819861946, 95% CI: [31.884936539746157, 60.57659763972389]
Average IoU for glasses filter on test set: 0.1676986552442015, 95% CI: [0.1584109929506459, 0.1769321939303506]
Average IoU for hat filter on test set: 0.7109316550111402, 95% CI: [0.6981115330829488, 0.7247179350739077]
Average IoU for beard filter on test set: 0.7186824116930834, 95% CI: [0.7081407018116884, 0.7303637279997734]


### With convolution instead of pooling

In [6]:
model = MobileNetTuned(pooling=False)
model.load_model("mobilenet_conv.h5")

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 tf.cast_1 (TFOpLambda)      (None, 224, 224, 3)       0         
                                                                 
 tf.math.truediv_1 (TFOpLamb  (None, 224, 224, 3)      0         
 da)                                                             
                                                                 
 tf.math.subtract_1 (TFOpLam  (None, 224, 224, 3)      0         
 bda)                                                            
                                                                 
 mobilenet_1.00_224 (Functio  (None, 7, 7, 1024)       3228864   
 nal)                                                            
                                                           

In [7]:
evaluate_model(model)

  0%|          | 0/998 [00:00<?, ?it/s]

MSE on test set: 59.42852371409485, 95% CI: [43.59569138276553, 79.21348252059676]
Average IoU for glasses filter on test set: 0.13691223209851386, 95% CI: [0.12956610904211133, 0.14446022959006796]
Average IoU for hat filter on test set: 0.6824863555441798, 95% CI: [0.6689179682421569, 0.6967425374700925]
Average IoU for beard filter on test set: 0.6722476522288183, 95% CI: [0.6597724413081689, 0.6855835565537732]


### Trained using custom MSE

In [4]:
model = MobileNetTuned()
model.load_model("mobilenet_custom.h5", custom_loss=True)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 tf.cast (TFOpLambda)        (None, 224, 224, 3)       0         
                                                                 
 tf.math.truediv (TFOpLambda  (None, 224, 224, 3)      0         
 )                                                               
                                                                 
 tf.math.subtract (TFOpLambd  (None, 224, 224, 3)      0         
 a)                                                              
                                                                 
 mobilenet_1.00_224 (Functio  (None, 1024)             3228864   
 nal)                                                            
                                                             

In [5]:
evaluate_model(model)

  0%|          | 0/998 [00:00<?, ?it/s]



MSE on test set: 46.77042974838566, 95% CI: [31.583166332665332, 66.22923625027832]
Average IoU for glasses filter on test set: 0.17038754065561798, 95% CI: [0.1620838295126825, 0.17891035368005917]
Average IoU for hat filter on test set: 0.7161300969659689, 95% CI: [0.7020551381849107, 0.7287948342147859]
Average IoU for beard filter on test set: 0.7185885386834898, 95% CI: [0.7071729689000632, 0.7309489150791576]


## MobileNetV2

### Trained using regular MSE

In [7]:
model = MobileNetV2Tuned()
model.load_model("mobilenet_v2_mse.h5")

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 tf.cast_1 (TFOpLambda)      (None, 224, 224, 3)       0         
                                                                 
 tf.math.truediv_1 (TFOpLamb  (None, 224, 224, 3)      0         
 da)                                                             
                                                                 
 tf.math.subtract_1 (TFOpLam  (None, 224, 224, 3)      0         
 bda)                                                            
                                                                 
 mobilenetv2_1.00_224 (Funct  (None, 1280)             2257984   
 ional)                                                          
                                                           

In [8]:
evaluate_model(model)

  0%|          | 0/998 [00:00<?, ?it/s]

MSE on test set: 46.80416388332219, 95% CI: [29.28662881318192, 67.61812513916722]
Average IoU for glasses filter on test set: 0.18350554472516992, 95% CI: [0.17424985169420606, 0.19339024240791167]
Average IoU for hat filter on test set: 0.7110306654738637, 95% CI: [0.6977899027759671, 0.7239345374458567]
Average IoU for beard filter on test set: 0.7269443656077204, 95% CI: [0.7148583220519837, 0.7394011427763167]


### Trained using custom MSE

In [10]:
model = MobileNetV2Tuned()
model.load_model("mobilenet_v2_custom.h5", custom_loss=True)

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_8 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 tf.cast_3 (TFOpLambda)      (None, 224, 224, 3)       0         
                                                                 
 tf.math.truediv_3 (TFOpLamb  (None, 224, 224, 3)      0         
 da)                                                             
                                                                 
 tf.math.subtract_3 (TFOpLam  (None, 224, 224, 3)      0         
 bda)                                                            
                                                                 
 mobilenetv2_1.00_224 (Funct  (None, 1280)             2257984   
 ional)                                                          
                                                           

In [11]:
evaluate_model(model)

  0%|          | 0/998 [00:00<?, ?it/s]

MSE on test set: 56.7558450233801, 95% CI: [38.0109107103095, 81.88721888220886]
Average IoU for glasses filter on test set: 0.15723284021677073, 95% CI: [0.1482166793919309, 0.1663343685993454]
Average IoU for hat filter on test set: 0.681165595935777, 95% CI: [0.6680139672170918, 0.6940991975256476]
Average IoU for beard filter on test set: 0.7049192311150386, 95% CI: [0.6935870577801084, 0.7169976533844906]


## ResNet

In [12]:
model = ResNetTuned()
model.load_model("resnet.h5")

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 224, 224, 3)]     0         
                                                                 
 tf.cast_4 (TFOpLambda)      (None, 224, 224, 3)       0         
                                                                 
 tf.__operators__.getitem (S  (None, 224, 224, 3)      0         
 licingOpLambda)                                                 
                                                                 
 tf.nn.bias_add (TFOpLambda)  (None, 224, 224, 3)      0         
                                                                 
 resnet50 (Functional)       (None, 2048)              23587712  
                                                                 
 dense_4 (Dense)             (None, 18)                36882     
                                                           

In [13]:
evaluate_model(model)

  0%|          | 0/998 [00:00<?, ?it/s]

MSE on test set: 38.44600311734581, 95% CI: [20.84992206635493, 61.30483188599421]
Average IoU for glasses filter on test set: 0.1966153283192146, 95% CI: [0.1870413837762483, 0.2059936337153714]
Average IoU for hat filter on test set: 0.7428874039649773, 95% CI: [0.730350794795514, 0.7554606397706264]
Average IoU for beard filter on test set: 0.7447226874801864, 95% CI: [0.7333440000780822, 0.7564942979478888]
