# Evaluate a trained model

## Setup

```
jupyter nbextension enable --py widgetsnbextension
```

In [None]:
%reload_ext autoreload
%autoreload 2

import os
from pathlib import Path
import pickle
import shutil
import sys

import azureml.core
from azureml.core import Workspace
from azureml.core.dataset import Dataset
import glob2
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
import tensorflow as tf
from tensorflow.keras.models import load_model
from tqdm.notebook import tqdm
from IPython.display import display, HTML

print("Azure ML SDK Version: ", azureml.core.VERSION)

sys.path.append(str(Path(os.getcwd()).parent / 'src'))

from eval_utils import calculate_performance, CODE_TO_SCANTYPE, REPO_DIR
from config import CONFIG
from preprocessing import preprocess_targets, preprocess_depthmap, tf_load_pickle

### Select the  model to be evaluated from workspace

In [None]:
workspace = Workspace.from_config()

# checkboxes = []
# for experiment_name, experiment in workspace.experiments.items():
#     checkbox = widgets.Checkbox(value=False, description=experiment_name)
#     display(checkbox)
#     checkboxes.append(checkbox)

# Get the selected experiments.
# selected_experiments = [checkbox.description for checkbox in checkboxes if checkbox.value]

selected_experiments = ["q3-depthmap-plaincnn-height-95k"]
RUN_ID = 'q3-depthmap-plaincnn-height-95k_1597988908_42c4ef33'  # Run3
OUTPUT_DIR = 'data/logs/q3-depthmap-plaincnn-height-95k/run_03/'

### Download the models on your local system for evaluation

In [None]:
# # Get folder.
# temp_path = "logs"
# if os.path.exists(temp_path):
#     shutil.rmtree(temp_path)
# os.mkdir(temp_path)

# # Download logs of all completed runs
# for selected_experiment in selected_experiments:
#     print(f"Experiment: {selected_experiment}")
#     experiment = workspace.experiments.get(selected_experiment)
#     for run_index, run in enumerate(list(experiment.get_runs())[::-1]):
#         log_path = os.path.join(temp_path, experiment.name, "run_{:02d}".format(run_index + 1))
#         if run.id == RUN_ID:
#             print("Run: {}".format(run_index + 1))
#             run.download_files(output_directory=OUTPUT_DIR, output_paths=None, batch_size=100, append_prefix=False)
# #             run.download_files(prefix=".h5", output_directory=log_path, output_paths=None, batch_size=100, append_prefix=False)

# print("Done.")

In [None]:
# print(log_path)

### Load the models

In [None]:
# MODEL_PATH = 'evaluation_95k_30082020/q3-depthmap-plaincnn-height-100-95k/run_03/outputs/best_model.h5'
MODEL_PATH = str(REPO_DIR / "data/outputs/best_model_Run3_nodropout.h5")

model = load_model(MODEL_PATH)
# summarize model.
# model.summary()

### Show a sample from the testset

In [None]:
# paths = '../testdepthmap1/1585551618-hlby208u8z/pc_1585551618-hlby208u8z_1593156356859_100_000.p'
paths = REPO_DIR / "data/anon-depthmap-testset/scans/1585551618-hlby208u8z/100/pc_1585551618-hlby208u8z_1593156356859_100_000.p"

depthmap, targets = pickle.load(open(paths, "rb"))
depthmap = preprocess_depthmap(depthmap)
depthmap = depthmap/depthmap.max()
print("depthmap_max:",depthmap.max())
depthmap = tf.image.resize(depthmap, (CONFIG.IMAGE_TARGET_HEIGHT, CONFIG.IMAGE_TARGET_WIDTH))
targets = preprocess_targets(targets, CONFIG.TARGET_INDEXES)
depthmap.set_shape((CONFIG.IMAGE_TARGET_HEIGHT, CONFIG.IMAGE_TARGET_WIDTH, 1))
# targets.set_shape((len(targets_indices,)))
plt.imshow(np.squeeze(depthmap), cmap='gray');

### Preprocess the samples from testset 

In [None]:
def preprocess(path):
    depthmap, targets = pickle.load(open(path, "rb"))
    depthmap = preprocess_depthmap(depthmap)
    depthmap = depthmap / CONFIG.NORMALIZATION_VALUE
    depthmap = tf.image.resize(depthmap, (CONFIG.IMAGE_TARGET_HEIGHT, CONFIG.IMAGE_TARGET_WIDTH))
    targets = preprocess_targets(targets, CONFIG.TARGET_INDEXES)
    depthmap.set_shape((CONFIG.IMAGE_TARGET_HEIGHT, CONFIG.IMAGE_TARGET_WIDTH, 1))
    return depthmap,targets

## Dataset

To use the dataset, you can:
- mount the dataset
- use datastore (blob storage)
- download the dataset

Choose your preferred way and make sure to adjust the absolute path

In [None]:
# DATASET_PATH = '/mnt/depthmap/depthmap_testset/scans/*/*/'
DATASET_PATH = str(REPO_DIR / "data/anon-depthmap-testset/scans/*/*/")

### Perform the prediction

In [None]:
prediction_folder = glob2.glob(DATASET_PATH); prediction_folder[:3]

In [None]:
prediction_folder = prediction_folder[:10]  # reduce size for DEBUG speed

predictions = []
for qrcode in tqdm(prediction_folder):
    depthmaps_pred = []
    labels = []
    depthfiles = []
    depthmaps = glob2.glob(qrcode + '/*.p')
    for files in depthmaps:
        depths, targets = preprocess(files)
        depthmaps_pred.append(depths)
        labels.append(targets)
        depthfiles.append(files)
    files_to_predict = tf.stack(depthmaps_pred)
    inference = model.predict(files_to_predict)
    predictions.append([qrcode, depthfiles,np.squeeze(inference), labels])

In [None]:
## putting the predictions in a dataframe
df = pd.DataFrame([])
for i in tqdm(range(len(predictions))):
    label = np.array(predictions[i][3]).flatten()
    data = pd.DataFrame({
        'qrcode':predictions[i][0],
        'artifacts': predictions[i][1],
        'predicted':predictions[i][2],
        'GT':label,
    })
    df = df.append(data)
df.head()

In [None]:
def extract_qrcode(row):
    qrc = row['artifacts'].split('/')[-3]
    return qrc

def extract_scantype(row):
    """https://dev.azure.com/cgmorg/ChildGrowthMonitor/_wiki/wikis/ChildGrowthMonitor.wiki/15/Codes-for-Pose-and-Scan-step"""
    scans = row['artifacts'].split('/')[-2]
    return scans

In [None]:
df['qrcode'] = df.apply(extract_qrcode, axis=1)
df.head()

In [None]:
df['artifacts'].iloc[1]  # sample of how the artifacts path looks like for me, modify it accordingly to suit your path dependency

In [None]:
len(df['qrcode'].unique()) ## total number of scans

In [None]:
df['scantype'] = df.apply(extract_scantype, axis=1)
df['scantype'].value_counts()

### Group the results of artifacts by qrcode and scantype by taking mean across the same scantype

In [None]:
MAE = df.groupby(['qrcode', 'scantype']).mean()
MAE

## error between predicted and ground truth

In [None]:
def avgerror(row):
    difference = row['GT'] - row['predicted']
    return difference

In [None]:
MAE['error'] = MAE.apply(avgerror, axis=1)
MAE

In [None]:
## froming the unique name for the index values
model_name = 'q3-depthmap-plaincnn-height-100-95k'
run_no ='_front_run_03'
complete_name = model_name + run_no; complete_name

## calculating accuracies across the scantypes

In [None]:
dfs = []
for code in CODE_TO_SCANTYPE.keys():
    df = calculate_performance(code, MAE)
    full_model_name = complete_name + CODE_TO_SCANTYPE[code]
    df.rename(index={0:full_model_name}, inplace=True)
    display(HTML(df.to_html()))
    dfs.append(df)

## combining the results for all accuracies

In [None]:
result = pd.concat(dfs)
result.index.name = 'Model_Scantype'
result = result.round(2)
result

In [None]:
## Saving the model results in csv file
CSV_OUT_PATH = REPO_DIR / 'data' / 'eval' / RUN_ID / 'result.csv'
Path(CSV_OUT_PATH.parent).mkdir(parents=True, exist_ok=True)
result.to_csv(CSV_OUT_PATH, index=True)