# Evaluate a trained model

Setup for tqdm widgets: https://ipywidgets.readthedocs.io/en/stable/user_install.html#installing-the-jupyterlab-extension

In [None]:
%reload_ext autoreload
%autoreload 2

import os
from pathlib import Path
import pickle
import shutil
import sys
from typing import List

import azureml.core
from azureml.core import Experiment, Workspace
from azureml.core.dataset import Dataset
from azureml.core.run import Run
import glob2
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
import tensorflow as tf
from tensorflow.keras.models import load_model
from tqdm.notebook import tqdm
from IPython.display import display, HTML

print("Azure ML SDK Version: ", azureml.core.VERSION)

sys.path.append(str(Path(os.getcwd()).parent / 'src'))
from eval_utils import calculate_performance, CODE_TO_SCANTYPE, CONFIG, MODEL_CKPT_FILENAME, REPO_DIR, preprocess_targets, preprocess_depthmap, preprocess, extract_qrcode, extract_scantype, avgerror

sys.path.append(str(REPO_DIR / 'src/common/model_utils'))
from preprocessing_multiartifact2 import create_multiartifact_samples
from preprocessing_multiartifact2_multiartifact import create_multiartifact_sample

In [None]:
print(REPO_DIR)
DATA_DIR = REPO_DIR / 'data' if Run.get_context().id.startswith("OfflineRun") else Path(".")
print(DATA_DIR)

### Select the  model to be evaluated from workspace

In [None]:
workspace = Workspace.from_config()

# RUN_ID = 'q3-depthmapmultiartifactlatefusion-plaincnn-height-95k_1614066635_8c470f0a'
# RUN_NUMBER = 4

RUN_ID = 'q3-depthmapmultiartifactlatefusion-plaincnn-height-95k_1614177517_ecd7b6e2'
RUN_NUMBER = 6


EXPERIMENT = "_".join(RUN_ID.split('_')[:-2])
OUTPUT_DIR = f'data/logs/q3-depthmapmultiartifact-plaincnn-height-95k/run_{RUN_NUMBER}/'

### Download the models on your local system for evaluation

In [None]:
# Download model
print(f"Downloading model from {RUN_ID}")
previous_experiment = Experiment(workspace=workspace, name=EXPERIMENT)
previous_run = Run(previous_experiment, RUN_ID)
model_fpath = DATA_DIR / "pretrained" / RUN_ID
previous_run.download_files(f"outputs/{MODEL_CKPT_FILENAME}", model_fpath)

In [None]:
# Debug with local model
# model_fpath = DATA_DIR / 'outputs' / MODEL_CKPT_FILENAME

### Load the models

In [None]:
model_fpath

In [None]:
model = load_model(f'{model_fpath}/outputs/{MODEL_CKPT_FILENAME}')
# summarize model.
# model.summary()

### Show a sample from the testset

In [None]:
# Clean: anon-depthmap-testset
# DATASET_DIR = DATA_DIR / "anon-depthmap-testset" / "scans"  # locally
# DATASET_DIR = Path('/mnt/datasets/depthmap_testset') / "scans"  # on VM

# Unclean: anon-realtime-testdata
# DATASET_DIR = DATA_DIR / "anon-realtime-testdata" / "depthmaps" # locally
DATASET_DIR = Path('/mnt/datasets/realtime_evaluation') / "depthmaps" # on VM

In [None]:
paths = DATASET_DIR / "1585015607-01sk32pthg/100/pc_1585015607-01sk32pthg_1591875909425_100_000.p"

depthmap, targets = pickle.load(open(paths, "rb"))
depthmap = preprocess_depthmap(depthmap)
depthmap = depthmap / depthmap.max()
print("depthmap_max:", depthmap.max())
depthmap = tf.image.resize(depthmap, (CONFIG.IMAGE_TARGET_HEIGHT, CONFIG.IMAGE_TARGET_WIDTH))
targets = preprocess_targets(targets, CONFIG.TARGET_INDEXES)
depthmap.set_shape((CONFIG.IMAGE_TARGET_HEIGHT, CONFIG.IMAGE_TARGET_WIDTH, 1))
# targets.set_shape((len(targets_indices,)))
plt.imshow(np.squeeze(depthmap), cmap='gray');

## Dataset

To use the dataset, you can:
- mount the dataset
- use datastore (blob storage)
- download the dataset

Choose your preferred way and make sure to adjust the absolute path

In [None]:
DATASET_PATH = str(DATASET_DIR / "*/")

### Perform the prediction

In [None]:
qrcode_paths = glob2.glob(DATASET_PATH); 
print(len(qrcode_paths))
qrcode_paths = qrcode_paths# [:100]  # reduce size for DEBUG speed
qrcode_paths[:3]

In [None]:
samples_paths = create_multiartifact_samples(qrcode_paths, CONFIG)

In [None]:
# len(samples_paths), samples_paths[0]

In [None]:
predictions = []
for sample_paths in tqdm(samples_paths):
    depthmap, targets = create_multiartifact_sample(sample_paths,
                                                    CONFIG.NORMALIZATION_VALUE,
                                                    CONFIG.IMAGE_TARGET_HEIGHT,
                                                    CONFIG.IMAGE_TARGET_WIDTH,
                                                    CONFIG.TARGET_INDEXES)
    depthmaps = tf.stack([depthmap])
    
    pred = model.predict(depthmaps)
    
    predictions.append([sample_paths[0], float(np.squeeze(pred)), targets[0]])

In [None]:
# 4.2 minutes for all 1745 scans' predictions

predictions[0]

In [None]:
# list to dataframe
df = pd.DataFrame(predictions, columns=['artifacts', 'predicted', 'GT'])

In [None]:
# df.head(5)

In [None]:
df['artifacts'].iloc[1]  # sample of how the artifacts path looks like for me, modify it accordingly to suit your path dependency

In [None]:
df['scantype'] = df.apply(extract_scantype, axis=1)
df['qrcode'] = df.apply(extract_qrcode, axis=1)
df['scantype'].value_counts()

In [None]:
df.head(5)

In [None]:
len(df)

In [None]:
len(df['qrcode'].unique()) ## total number of scans

### Group the results of artifacts by qrcode and scantype by taking mean across the same scantype

In [None]:
MAE = df.groupby(['qrcode', 'scantype']).mean()
# MAE

## Error between predicted and ground truth

In [None]:
MAE['error'] = MAE.apply(avgerror, axis=1)
# MAE

## Calculate accuracies across the scantypes

In [None]:
dfs = []
for code in CODE_TO_SCANTYPE.keys():
    # unique name for the index values
    model_name = 'q3-depthmap-plaincnn-height-100-95k'
    run_no = f'{CODE_TO_SCANTYPE[code]}_run_{RUN_NUMBER}'
    complete_name = EXPERIMENT + run_no; complete_name

    df_out = calculate_performance(code, MAE)
    full_model_name = complete_name + CODE_TO_SCANTYPE[code]
    df_out.rename(index={0:full_model_name}, inplace=True)
    df_out = df_out.round(2)
    display(HTML(df_out.to_html()))
    dfs.append(df_out)

In [None]:
df['artifacts'].iloc[0]

## Combine the results for all accuracies

In [None]:
result = pd.concat(dfs)
result.index.name = 'Model_Scantype'
result

In [None]:
# Save the model results in csv file
CSV_OUT_PATH = DATA_DIR / 'eval' / RUN_ID / 'result.csv'
Path(CSV_OUT_PATH.parent).mkdir(parents=True, exist_ok=True)
result.to_csv(CSV_OUT_PATH, index=True)