# Classifier performance dashboard

In [None]:
import altair as alt
import datapane as dp
import json
import math
import matplotlib.pyplot as plt
import numpy as np
import time

import pandas as pd

from joblib import load
from io import BytesIO, StringIO
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

plt.ioff()

## Wrangling and visualisation functions

In [None]:
def predictions_to_df(id_test=None, y_test=None, predictions=None):
    df_predictions = pd.DataFrame(
        {
            "sample": id_test if id_test else range(len(predictions)),
            "true class": y_test if y_test else "unknown",
            "predicted class": predictions,
        },
        index=id_test,
    )
    
    if(y_test):
        df_predictions["true positive"] = (
            df_predictions["true class"] == df_predictions["predicted class"]
        )
        
    return df_predictions

In [None]:
def plot_banner(X_test, y_test):
    cm_sequential = [
        "Purples",
        "Blues",
        "Greens",
        "YlOrBr",
        "OrRd",
        "PuRd",
        "RdPu",
        "BuPu",
        "GnBu",
        "PuBu",
        "YlGnBu",
        "PuBuGn",
        "BuGn",
        "YlGn",
    ]

    plt_banner, axes = plt.subplots(ncols=26, nrows=2, figsize=(26, 2))
    for ax, image, truth in zip(axes.flatten(), np.array(X_test), y_test):
        ax.set_axis_off()
        image = image.reshape(8, 8)
        ax.imshow(
            image, cmap=plt.cm.get_cmap(cm_sequential[truth]), interpolation="nearest"
        )
    plt_banner.tight_layout()
    plt.close()
    return plt_banner

In [None]:
def plot_confusion_matrix(y_test, predictions, class_labels):
    cm_x, cm_y = np.meshgrid(class_labels, class_labels)
    cm = confusion_matrix(y_test, predictions)
    df_cm = pd.DataFrame(
        {
            "Predicted": cm_x.ravel(),
            "True": cm_y.ravel(),
            "z": cm.ravel(),
        }
    )

    heatmap = (
        alt.Chart(df_cm)
        .mark_rect()
        .encode(x="Predicted:O", y="True:O", color="z:Q")
        .properties(width="container", height=416)
    )

    text = heatmap.mark_text(baseline="middle").encode(
        text="z:Q",
        color=alt.condition(alt.datum.z < 50, alt.value("gray"), alt.value("white")),
    )

    fig = heatmap + text
    return fig, cm

In [None]:
def plot_preview(X_test, predictions, y_test):
    nrows = math.ceil(len(predictions) / 10)
    nrows = 10 if nrows > 10 else nrows
    fig, axes = plt.subplots(ncols=10, nrows=nrows, figsize=(11, nrows + 1))
    for ax, image, prediction, truth in zip(
        axes.flatten(), np.array(X_test), predictions, y_test
    ):
        ax.set_axis_off()
        image = image.reshape(8, 8)
        if prediction == truth:
            col = plt.cm.gray_r
        else:
            col = plt.cm.Reds
        ax.imshow(image, cmap=col, interpolation="nearest")
        ax.set_title(f"pred: {prediction}")
    fig.tight_layout()
    plt.close()
    return fig

In [None]:
def plot_tp_trade_offs(df_tp_trade_offs, class_labels):
    df_tp_trade_offs["clf_name"] = df_tp_trade_offs.index
    fig = (
        alt.Chart(df_tp_trade_offs)
        .transform_window(index="count()")
        .transform_fold(class_labels)
        .mark_line()
        .encode(
            x=alt.X(
                "key:N",
                scale=alt.Scale(
                    zero=False,
                    padding=0,
                ),
                axis=alt.Axis(grid=True),
                title="Digit",
            ),
            y=alt.Y("value:Q", scale=alt.Scale(zero=False), title="True Positives"),
            color=alt.Color("clf_name:N", scale=alt.Scale(scheme="viridis")),
            detail="clf_name:N",
            strokeWidth=alt.value(3),
            opacity=alt.value(0.7),
            tooltip=["clf_name:N", "key:N", "value:Q"],
        )
        .configure_legend(orient="top", title=None)
        .properties(width="container", height=200)
    )
    return fig

## Report blocks functions

In [None]:
def build_metrics_group(y_test, predictions):
    metrics_block = dp.Group(
        dp.BigNumber(
            "Precision",
            value="{:.2f}".format(
                precision_score(y_test, predictions, average="weighted")
            ),
        ),
        dp.BigNumber(
            "Recall",
            value="{:.2f}".format(
                recall_score(y_test, predictions, average="weighted")
            ),
        ),
        dp.BigNumber(
            "F1-score",
            value="{:.2f}".format(f1_score(y_test, predictions, average="weighted")),
        ),
        columns=3,
    )
    return metrics_block

In [None]:
def build_narrative_block(clf_name):
    narrative_block = dp.Text(file=f"./assets/discussion/{clf_name}.md").format(
        clf_name=clf_name.lower(),
        repo=dp.Code(
            code=f"git clone https://github.com/datapane/{clf_name.lower()}-digits.git",
            language="bash",
        ),
    )
    return narrative_block

In [None]:
def build_visualization_blocks(plt_cm, df_predictions, plt_preview):
    visualization_blocks = dp.Select(
        blocks=[
            dp.Plot(plt_cm, label="Confusion Matrix"),
            dp.DataTable(df_predictions, label="Predictions"),
            dp.Plot(plt_preview, label="Preview"),
        ],
        label="Tabs",
        type=dp.SelectType.TABS,
    )
    return visualization_blocks

## Load and wrangle data

In [None]:
with open("./assets/results.json", "r") as f:
    results = json.load(f)

X_test = results["X_test"]
y_test = results["y_test"]
id_test = results["id_test"]
predictions = results["predictions"]
class_labels = [str(x) for x in range(0, 10)]
df_tp_trade_offs = pd.DataFrame(columns=class_labels)

## Build classifier pages

In [None]:
classifier_pages = {}

for clf_name in predictions.keys():
    # wrangle results into dataframe
    df_predictions = predictions_to_df(id_test, y_test, predictions[clf_name])

    # generate confusion matrix
    plt_cm, cm = plot_confusion_matrix(y_test, predictions[clf_name], class_labels)

    # assign TP vector
    df_tp_trade_offs.loc[clf_name, class_labels] = pd.Series(cm.diagonal()).values

    # generate preview image
    plt_preview = plot_preview(X_test, predictions[clf_name], y_test)

    # build page
    classifier_pages[clf_name] = dp.Group(
        build_metrics_group(y_test, predictions[clf_name]),
        dp.Group(
            dp.Group(
                build_narrative_block(clf_name),
                build_visualization_blocks(plt_cm, df_predictions, plt_preview),
                columns=2,
            )
        ),
        label=clf_name,
    )

## Build header and overview blocks

In [None]:
# build banner block
banner_block = dp.Plot(plot_banner(X_test, y_test))

# generate trade-off plot
trade_offs_block = dp.Plot(plot_tp_trade_offs(df_tp_trade_offs, class_labels))

## Upload & Predict

In [None]:
def predict_digits(params):
    test_data = open(params["test_data"]).read()

    X_test = np.genfromtxt(StringIO(test_data), dtype=float, delimiter=",")
    predictions = models[params["model"]].predict(X_test)

    results = dp.Group(
        plot_preview(X_test, predictions, predictions),
        dp.DataTable(predictions_to_df(predictions=predictions)),
    )
    
    results_path = f"results_{time.time()}.html"

    dp.save_report(dp.View(results), results_path, open=False)

    results_view = dp.View(
        dp.Text("Download your report:"),
        dp.Attachment(file=results_path),
        results,
        name="predictions"
    )
    
    return results_view

In [None]:
models = {}

for clf_name in predictions.keys():
    clf = load(f"assets/models/{clf_name}.pkl")
    models[clf_name] = clf

controls = dp.Controls(
    dp.Choice(
        "model",
        initial=list(models.keys())[0],
        options=list(models.keys()),
        label="Select a classifier",
    ),
    dp.File("test_data", label="Upload test data"),
)

upload_and_predict = dp.Group(
    dp.Function(
        predict_digits, target="predictions", submit_label="Predict", controls=controls
    ),
    dp.Text("Results", name="predictions"),
)

## Build report

In [None]:
report = dp.View(
    # header material
    banner_block,
    "# Classifier Performance Dashboard",
    "This dashboard highlights the performance of multiple classifiers on the [Optical Recognition of Handwritten Digits Data Set](https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits).",
    dp.Select(
        blocks=[
            dp.Group(
                # performance overview
                trade_offs_block,
                # performance breakdown page per classifier
                dp.Select(
                    blocks=classifier_pages.values(),
                    type=dp.SelectType.TABS,
                ),
                label="Study Results",
            ),
            dp.Group(upload_and_predict, label="Upload & Predict"),
        ]
    ),
)

dp.serve(report)