In [None]:
import matplotlib.pyplot as plt
import pandas as pd

plt.rcParams['text.usetex'] = True
import json

In [None]:
def load_artifact(type):
    with open("tmp/artifacts.json") as f:
        artifacts = json.load(f)
    data_tpr_fpr = pd.DataFrame()

    for idx in range(len(artifacts["validation_tpr"])):
        for nuclide in artifacts["validation_tpr"][idx].keys():
            nuclide_df = pd.DataFrame(artifacts["validation_tpr"][idx][nuclide], columns=["validation_tpr"])
            nuclide_df["validation_fpr"] = artifacts["validation_fpr"][idx][nuclide]
            nuclide_df["nuclide"] = nuclide
            nuclide_df["epoch"] = idx
            data_tpr_fpr = pd.concat([data_tpr_fpr, nuclide_df], axis=0)
    data_tpr_fpr = data_tpr_fpr.reset_index(drop=True)
    data_tpr_fpr["type"] = type

    data_auc = pd.DataFrame()

    for idx in range(len(artifacts["training_auc"])):
        for nuclide in artifacts["training_auc"][idx].keys():
            nuclide_df = pd.DataFrame([artifacts["training_auc"][idx][nuclide]], columns=["training_auc"])
            nuclide_df["validation_auc"] = artifacts["validation_auc"][idx][nuclide]
            nuclide_df["nuclide"] = nuclide
            nuclide_df["epoch"] = idx
            data_auc = pd.concat([data_auc, nuclide_df], axis=0)
    data_auc["type"] = type
    data_auc = data_auc.reset_index(drop=True)
    return data_tpr_fpr, data_auc


def load_run_id(run_id, type):
    os.environ["AWS_ACCESS_KEY_ID"] = load_config()["minio"]["AWS_ACCESS_KEY_ID"]
    os.environ["AWS_SECRET_ACCESS_KEY"] = load_config()["minio"]["AWS_SECRET_ACCESS_KEY"]
    os.environ["MLFLOW_S3_ENDPOINT_URL"] = load_config()["minio"]["MLFLOW_S3_ENDPOINT_URL"]
    model_uri = load_config()["mlflow"]["uri"]
    mlflow.set_tracking_uri(uri=model_uri)

    client = mlflow.tracking.MlflowClient(
        tracking_uri=load_config()["mlflow"]["uri"]
    )
    run = client.get_run(run_id)
    client.download_artifacts(run_id=run_id, path="artifacts.json", dst_path="tmp")
    mlb_classes = run.data.params["mlb_classes"].split(",")

    training_macro_loss = client.get_metric_history(run_id=run_id, key="training_macro_loss")
    training_micro_loss = client.get_metric_history(run_id=run_id, key="training_micro_loss")
    training_mac_loss = []
    training_mic_loss = []

    validation_macro_loss = client.get_metric_history(run_id=run_id, key="validation_macro_loss")
    validation_mac_loss = []
    validation_mic_loss = []
    for i in range(len(training_macro_loss)):
        macro_loss = training_macro_loss[i].value
        training_mac_loss.append(macro_loss)
        micro_loss = training_micro_loss[i].value
        training_mic_loss.append(micro_loss)

        val_macro_loss = validation_macro_loss[i].value
        validation_mac_loss.append(val_macro_loss)

    data_mic_mac_loss = pd.DataFrame([training_mac_loss, training_mic_loss, validation_mac_loss])
    data_mic_mac_loss = data_mic_mac_loss.T.rename(columns={0: "training_macro_loss", 1: "training_micro_loss",
                                                            2: "validation_macro_loss"})
    data_mic_mac_loss["epoch"] = data_mic_mac_loss.index
    data_mic_mac_loss["type"] = type
    data_tpr_fpr, data_auc = load_artifact(type)
    return data_mic_mac_loss, data_tpr_fpr, data_auc

In [None]:
name_space = [
    "(A) 844 gemessene $\gamma$-Spektren",
    "(B) 844 synthetische $\gamma$-Spektren",
    "(C) 844 synthetische und gemessene $\gamma$-Spektren",
    "(D) 10195 synthetische und 844 gemessene $\gamma$-Spektren"
]

data_mic_mac_loss, data_tpr_fpr, data_auc = load_run_id("e78098da07bb482aa6b451bd7c6fc310", name_space[0])
data_mic_mac_loss_syntetics_only, data_tpr_fpr_synthetics_only, data_auc_synthetics = load_run_id(
    "12d920f422764a989abf5d48aa890e3e", name_space[1])
data_mic_mac_loss_ms, data_tpr_fpr_ms, data_auc_ms = load_run_id("357e0e2a6f7648fc8792a53bccff0015",
                                                                 name_space[2])
data_mic_mac_loss_lms, data_tpr_fpr_lms, data_auc_lms = load_run_id("6351cd71637c48e5a20ab8f669247b60",
                                                                    name_space[3])

data_mic_mac_loss_all = pd.concat([
    data_mic_mac_loss, data_mic_mac_loss_syntetics_only, data_mic_mac_loss_ms, data_mic_mac_loss_lms
], axis=0).reset_index(drop=True)
data_tpr_fpr_all = pd.concat([data_tpr_fpr, data_tpr_fpr_synthetics_only, data_tpr_fpr_ms, data_tpr_fpr_lms],
                             axis=0).reset_index(
    drop=True)
data_auc_all = pd.concat([data_auc_synthetics, data_auc, data_auc_ms, data_auc_lms], axis=0).reset_index(drop=True)


In [None]:
plt.rcParams['text.usetex'] = True
import matplotlib.patheffects as path_effects


def create_plot_axis(ax, data, color, label):
    colors = sns.color_palette()
    muted_colors = sns.color_palette("dark")
    training_min_loss_x = data["training_macro_loss"].max()
    training_min_loss_y = data.loc[data["training_macro_loss"] == training_min_loss_x]["epoch"].values[0]

    validation_min_loss_x = data["validation_macro_loss"].max()
    validation_min_loss_y = data.loc[data["validation_macro_loss"] == validation_min_loss_x]["epoch"].values[0]

    ax.plot(data["epoch"], data["training_macro_loss"], color=colors[color], linestyle="--", alpha=0.5,
            label=f"Training: {label}")

    ax.plot(data["epoch"], data["validation_macro_loss"], color=colors[color],
            label=f"Validierung: {label}")

    ax.vlines(x=validation_min_loss_y,
              ymin=0,
              ymax=validation_min_loss_x,
              color=colors[color],
              alpha=0.5,
              )
    ax.text(
        x=validation_min_loss_y + 0.3,
        y=0.05,
        s=f"$AUC_{{makro}}$ = {validation_min_loss_x:.2f}",
        color=muted_colors[color],
        fontsize=12,
        rotation=270,
        fontweight="bold",
        va="bottom"
    )
    ax.set_ylim(0, 1)


fig, axs = plt.subplots(ncols=1, figsize=[10, 5])
create_plot_axis(axs,
                 data_mic_mac_loss_all.loc[data_mic_mac_loss_all["type"] == name_space[0]].reset_index(drop=True),
                 color=0, label=name_space[0])
create_plot_axis(axs,
                 data_mic_mac_loss_all.loc[data_mic_mac_loss_all["type"] == name_space[1]].reset_index(drop=True),
                 color=1, label=name_space[1])
create_plot_axis(axs, data_mic_mac_loss_all.loc[data_mic_mac_loss_all["type"] == name_space[2]].reset_index(
    drop=True), color=2, label=name_space[2])
create_plot_axis(axs,
                 data_mic_mac_loss_all.loc[data_mic_mac_loss_all["type"] == name_space[3]].reset_index(
                     drop=True), color=4, label=name_space[3])
plt.grid(alpha=0.2)
leg = plt.legend(
    loc="lower center",
    bbox_to_anchor=(0.5, 1.02),
    borderaxespad=0,
    ncol=2,
    fontsize=12,
    frameon=False
)
for line in leg.get_lines():
    line.set_linewidth(3)
plt.xlim(0, 101)
plt.ylim(0, 1.02)
plt.xlabel("Epoche", fontsize=14, labelpad=10)
plt.ylabel("$AUC_{makro}$", fontsize=14, labelpad=10)
plt.tick_params(axis='x', labelsize=14, bottom=True)
plt.tick_params(axis='y', labelsize=14, left=True)
ax = plt.gca()
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
plt.savefig("plots/validation_cnn.pdf", bbox_inches="tight")


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import re


def format_isotope_label(row, auc_df):
    match = re.match(r"([a-zA-Z]+)(\d+)", row["nuclide"])
    if match:
        element, mass = match.groups()
        return f"$^{{{mass}}}{element.capitalize()}$"
    return row["nuclide"]


def prepare_data(type_name):
    loss_df = data_mic_mac_loss_all[data_mic_mac_loss_all["type"] == type_name]
    max_val_loss = loss_df["validation_macro_loss"].max()
    epoch_best = loss_df.loc[loss_df["validation_macro_loss"] == max_val_loss, "epoch"].values[0]

    auc_df = data_auc_all[(data_auc_all["epoch"] == epoch_best) & (data_auc_all["type"] == type_name)]
    df = data_tpr_fpr_all[(data_tpr_fpr_all["epoch"] == epoch_best) & (data_tpr_fpr_all["type"] == type_name)].copy()
    df["nuclide"] = df.apply(lambda row: format_isotope_label(row, auc_df), axis=1)
    return df, auc_df


df_mes, auc_df_mes = prepare_data(name_space[0])
df_syn, auc_df_syn = prepare_data(name_space[1])
df_ms, auc_df_ms = prepare_data(name_space[2])
df_lms, auc_df_lms = prepare_data(name_space[3])
combined_df = pd.concat([df_syn, df_mes, df_ms, df_lms])
combined_df_auc = pd.concat([auc_df_syn, auc_df_mes, auc_df_ms, auc_df_lms])
combined_df = combined_df.sort_values(by="type")
fig = plt.figure(figsize=(5, 3))
g = sns.relplot(
    data=combined_df,
    x="validation_fpr",
    y="validation_tpr",
    hue="nuclide",
    col="type",
    col_wrap=2,
    kind="line",
    drawstyle="steps-pre",
    facet_kws={"sharex": True, "sharey": True},
    height=5,
    aspect=1.2,
    palette="tab10",
    errorbar=None
)

itrs = 0
for ax in g.axes.flat:
    ax.plot([0, 1], [0, 1], ls="--", color="black", alpha=1, lw=2, zorder=100, label="Basislinie")
    ax.set_xlabel("False Positive Rate (FPR)", fontsize=16, labelpad=10)
    ax.set_ylabel("True Positive Rate (TPR)", fontsize=16, labelpad=10)
    ax.tick_params(axis='x', labelsize=16, bottom=True)
    ax.tick_params(axis='y', labelsize=16, left=True)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_title(name_space[itrs], fontsize=18)
    ax.grid(False)
    itrs += 1

handles, labels = g.axes[0].get_legend_handles_labels()

handles.append(ax)
labels.append("Basislinie")

g._legend.remove()
leg = g.fig.legend(
    handles,
    labels,
    loc="lower center",
    bbox_to_anchor=(0.5, 1.02),
    ncol=6,
    frameon=False,
    fontsize=20,
    title="",
)
for line in leg.get_lines():
    line.set_linewidth(5)

plt.tight_layout()
plt.savefig("plots/results_cnn_rocs.pdf", bbox_inches="tight")


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams['text.usetex'] = True
combined_df_auc["validation_auc"] = combined_df_auc["validation_auc"].round(2)
combined_df_auc["nuclide"] = combined_df_auc.apply(lambda row: format_isotope_label(row, None), axis=1)

combined_df_auc["type"] = combined_df_auc["type"].str.replace("measurements_only", "Gemessen").str.replace(
    "synthetics_only", "Synthetisch")
means = combined_df_auc.groupby("type")["validation_auc"].mean()

combined_df_auc = combined_df_auc.sort_values(by="type")
fig = plt.figure(figsize=(10, 5))
ax = sns.barplot(
    combined_df_auc,
    x="nuclide",
    y="validation_auc",
    hue="type",
    dodge=True,
    palette=sns.color_palette("Greys")[2:],
    legend=True,
)

colots = sns.color_palette("Greys")[2:]
for idx, (label, mean_val) in enumerate(means.items()):
    if idx == 0:
        shift = 0.03
    elif idx == 1:
        shift = -0.03
    else:
        shift = 0
    ax.axhline(y=mean_val, color=colots[idx], linestyle="--", linewidth=0.2, zorder=0)
    ax.text(
        x=len(combined_df_auc["nuclide"].unique()) - 0.3,
        y=mean_val - 0.02 + shift,
        s=f"$AUC_{{makro}}$ {label[0:3]} = {mean_val:.2f}",
        color=colots[idx],
        fontsize=10,
        va="bottom"
    )

sns.move_legend(ax,
                loc="lower center",
                bbox_to_anchor=(0.5, 1.05),
                borderaxespad=0,
                ncol=2,
                frameon=False,
                fontsize=12,
                title=""
                )
ax.bar_label(ax.containers[0], fontsize=10, rotation=90,
             label_type="edge",
             padding=-30,
             color="white")
ax.bar_label(ax.containers[1], fontsize=10, rotation=90,
             label_type="edge",
             padding=-30,
             color="white")
ax.bar_label(ax.containers[2], fontsize=10, rotation=90,
             label_type="edge",
             padding=-30,
             color="white")
ax.bar_label(ax.containers[3], fontsize=10, rotation=90,
             label_type="edge",
             padding=-30,
             color="white")

plt.xlabel("Nuklid", size=14, labelpad=10)
plt.ylabel("AUC", size=14, labelpad=10)
plt.tick_params(axis='x', labelsize=12, bottom=True, pad=10)
plt.tick_params(axis='y', labelsize=12, left=True)
plt.grid(False)

ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_xlim(-0.5, 10.5)

plt.tight_layout()
plt.savefig("plots/cnn_result.pdf", bbox_inches="tight")




In [None]:
import mlflow
import os
from config.loader import load_config

os.environ["AWS_ACCESS_KEY_ID"] = load_config()["minio"]["AWS_ACCESS_KEY_ID"]
os.environ["AWS_SECRET_ACCESS_KEY"] = load_config()["minio"]["AWS_SECRET_ACCESS_KEY"]
os.environ["MLFLOW_S3_ENDPOINT_URL"] = load_config()["minio"]["MLFLOW_S3_ENDPOINT_URL"]
model_uri = load_config()["mlflow"]["uri"]
model_name = "CNN_CPU"
model_version = "latest"
mlflow.set_tracking_uri(uri=model_uri)
model = mlflow.pytorch.load_model(f"models:/{model_name}/{model_version}").to("cpu")

In [None]:
import torch
from torchview import draw_graph
import os

model.eval()
os.environ["PATH"] += os.pathsep + r"C:\Program Files\Graphviz\bin\dot.exe"
graph = draw_graph(model, input_size=(1, 1, 8160), expand_nested=False, roll=True)
graph.visual_graph.graph_attr.update({
    "rankdir": "TB",
    "dpi": "120",
    "size": "10,10!",
    "splines": "true",
    "ratio": "compress"
})
graph.visual_graph.graph_attr.update({
    "ranksep": "0 equally",
    "nodesep": "0",
})
graph.visual_graph.render("plots/model_architecture", format="pdf")