In [None]:
import numpy as np
import matplotlib as mpl
import re
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
import random
from sklearn.preprocessing import MultiLabelBinarizer
import matplotlib
import matplotlib.colors as mcolors
import plotly.express as px
import pandas as pd
from scipy.signal import find_peaks

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import torch
from src.peaks.finder import PeakFinder

import mlflow
from config.loader import load_config
import os

import src.measurements.api as mpi
import src.generator.api as gpi
import src.peaks.api as ppi
import src.statistics.api as spi
plt.rcParams['text.usetex'] = True
from src.cnn.training import Training
import json
from src.cnn.dataset import MeasurementTraining
import mlflow
import os
from config.loader import load_config

In [None]:
os.environ["AWS_ACCESS_KEY_ID"] = load_config()["minio"]["AWS_ACCESS_KEY_ID"]
os.environ["AWS_SECRET_ACCESS_KEY"] = load_config()["minio"]["AWS_SECRET_ACCESS_KEY"]
os.environ["MLFLOW_S3_ENDPOINT_URL"] = load_config()["minio"]["MLFLOW_S3_ENDPOINT_URL"]
model_uri = load_config()["mlflow"]["uri"]
model_name = "CNN_CPU"
model_version = "latest"
mlflow.set_tracking_uri(uri=model_uri)
model = mlflow.pytorch.load_model(f"models:/{model_name}/{model_version}").to("cpu")

client = mlflow.tracking.MlflowClient(
    tracking_uri=load_config()["mlflow"]["uri"]
)
run_id = client.get_latest_versions("CNN_CPU")[0].run_id
# MODEL REAL MEASUREMENTS ONLY
run_id = "e78098da07bb482aa6b451bd7c6fc310"
run = client.get_run(run_id)
client.download_artifacts(run_id=run_id, path="artifacts.json", dst_path="tmp/measurements_only")
mlb_classes = run.data.params["mlb_classes"].split(",")

In [None]:
training_macro_loss = client.get_metric_history(run_id=run_id, key="training_macro_loss")
training_micro_loss = client.get_metric_history(run_id=run_id, key="training_micro_loss")
training_mac_loss = []
training_mic_loss = []

validation_macro_loss = client.get_metric_history(run_id=run_id, key="validation_macro_loss")
validation_mac_loss = []
validation_mic_loss = []
for i in range(len(training_macro_loss)):
    macro_loss = training_macro_loss[i].value
    training_mac_loss.append(macro_loss)
    micro_loss = training_micro_loss[i].value
    training_mic_loss.append(micro_loss)

    val_macro_loss = validation_macro_loss[i].value
    validation_mac_loss.append(val_macro_loss)


data_mic_mac_loss = pd.DataFrame([training_mac_loss, training_mic_loss, validation_mac_loss])
data_mic_mac_loss = data_mic_mac_loss.T.rename(columns={0:"training_macro_loss", 1:"training_micro_loss",
                                                        2:"validation_macro_loss"})
data_mic_mac_loss["epoch"] = data_mic_mac_loss.index
data_mic_mac_loss["type"] = "measurements_only"

In [None]:
os.environ["AWS_ACCESS_KEY_ID"] = load_config()["minio"]["AWS_ACCESS_KEY_ID"]
os.environ["AWS_SECRET_ACCESS_KEY"] = load_config()["minio"]["AWS_SECRET_ACCESS_KEY"]
os.environ["MLFLOW_S3_ENDPOINT_URL"] = load_config()["minio"]["MLFLOW_S3_ENDPOINT_URL"]
model_uri = load_config()["mlflow"]["uri"]
model_name = "CNN_CPU"
model_version = "latest"
mlflow.set_tracking_uri(uri=model_uri)
model = mlflow.pytorch.load_model(f"models:/{model_name}/{model_version}").to("cpu")

client = mlflow.tracking.MlflowClient(
    tracking_uri=load_config()["mlflow"]["uri"]
)
run_id = client.get_latest_versions("CNN_CPU")[0].run_id
# MODEL REAL SYNTHETICS ONLY
run_id = "afbd04ca343d42a29f3d1c8fe7e97b61"
run = client.get_run(run_id)
client.download_artifacts(run_id=run_id, path="artifacts.json", dst_path="tmp/synthetics_only")
mlb_classes = run.data.params["mlb_classes"].split(",")

In [None]:
training_macro_loss = client.get_metric_history(run_id=run_id, key="training_macro_loss")
training_micro_loss = client.get_metric_history(run_id=run_id, key="training_micro_loss")
training_mac_loss = []
training_mic_loss = []

validation_macro_loss = client.get_metric_history(run_id=run_id, key="validation_macro_loss")
validation_mac_loss = []
validation_mic_loss = []
for i in range(len(training_macro_loss)):
    macro_loss = training_macro_loss[i].value
    training_mac_loss.append(macro_loss)
    micro_loss = training_micro_loss[i].value
    training_mic_loss.append(micro_loss)

    val_macro_loss = validation_macro_loss[i].value
    validation_mac_loss.append(val_macro_loss)


data_mic_mac_loss_syntetics_only = pd.DataFrame([training_mac_loss, training_mic_loss, validation_mac_loss])
data_mic_mac_loss_syntetics_only = data_mic_mac_loss_syntetics_only.T.rename(columns={0:"training_macro_loss", 1:"training_micro_loss",
                                                        2:"validation_macro_loss"})
data_mic_mac_loss_syntetics_only["epoch"] = data_mic_mac_loss_syntetics_only.index
data_mic_mac_loss_syntetics_only["type"] = "synthetics_only"

In [None]:
data_mic_mac_loss_all = pd.concat([data_mic_mac_loss, data_mic_mac_loss_syntetics_only], axis=0).reset_index(drop=True)
data_mic_mac_loss_all

In [None]:
with open("tmp/measurements_only/artifacts.json") as f:
    artifacts = json.load(f)
# dict_keys(['used_keys', 'training_tpr', 'training_fpr', 'training_auc', 'validation_tpr', 'validation_fpr', 'validation_auc'])

data_tpr_fpr = pd.DataFrame()

for idx in range(len(artifacts["validation_tpr"])):
    for nuclide in artifacts["validation_tpr"][idx].keys():
        nuclide_df = pd.DataFrame(artifacts["validation_tpr"][idx][nuclide], columns=["validation_tpr"])
        nuclide_df["validation_fpr"] = artifacts["validation_fpr"][idx][nuclide]
        nuclide_df["nuclide"] = nuclide
        nuclide_df["epoch"] = idx
        data_tpr_fpr = pd.concat([data_tpr_fpr, nuclide_df], axis=0)
data_tpr_fpr = data_tpr_fpr.reset_index(drop=True)
data_tpr_fpr["type"] = "measurements_only"

data_auc = pd.DataFrame()

for idx in range(len(artifacts["training_auc"])):
    for nuclide in artifacts["training_auc"][idx].keys():
        nuclide_df = pd.DataFrame([artifacts["training_auc"][idx][nuclide]], columns=["training_auc"])
        nuclide_df["validation_auc"] = artifacts["validation_auc"][idx][nuclide]
        nuclide_df["nuclide"] = nuclide
        nuclide_df["epoch"] = idx
        data_auc = pd.concat([data_auc, nuclide_df], axis=0)
data_auc["type"] = "measurements_only"
data_auc = data_auc.reset_index(drop=True)

In [None]:
with open("tmp/synthetics_only/artifacts.json") as f:
    artifacts = json.load(f)
# dict_keys(['used_keys', 'training_tpr', 'training_fpr', 'training_auc', 'validation_tpr', 'validation_fpr', 'validation_auc'])

data_tpr_fpr_synthetics_only = pd.DataFrame()

for idx in range(len(artifacts["validation_tpr"])):
    for nuclide in artifacts["validation_tpr"][idx].keys():
        nuclide_df = pd.DataFrame(artifacts["validation_tpr"][idx][nuclide], columns=["validation_tpr"])
        nuclide_df["validation_fpr"] = artifacts["validation_fpr"][idx][nuclide]
        nuclide_df["nuclide"] = nuclide
        nuclide_df["epoch"] = idx
        data_tpr_fpr_synthetics_only = pd.concat([data_tpr_fpr_synthetics_only, nuclide_df], axis=0)
data_tpr_fpr_synthetics_only = data_tpr_fpr_synthetics_only.reset_index(drop=True)
data_tpr_fpr_synthetics_only["type"] = "synthetics_only"

data_auc_synthetics = pd.DataFrame()

for idx in range(len(artifacts["training_auc"])):
    for nuclide in artifacts["training_auc"][idx].keys():
        nuclide_df = pd.DataFrame([artifacts["training_auc"][idx][nuclide]], columns=["training_auc"])
        nuclide_df["validation_auc"] = artifacts["validation_auc"][idx][nuclide]
        nuclide_df["nuclide"] = nuclide
        nuclide_df["epoch"] = idx
        data_auc_synthetics = pd.concat([data_auc_synthetics, nuclide_df], axis=0)

data_auc_synthetics["type"] = "synthetics_only"
data_auc_synthetics = data_auc_synthetics.reset_index(drop=True)

In [None]:
data_tpr_fpr_all = pd.concat([data_tpr_fpr, data_tpr_fpr_synthetics_only], axis=0).reset_index(drop=True)
data_tpr_fpr_all

In [None]:
data_auc_all = pd.concat([data_auc_synthetics, data_auc], axis=0).reset_index(drop=True)
data_auc_all

In [None]:
plt.rcParams['text.usetex'] = True

def create_plot_axis(ax, data):
    training_min_loss_x = data["training_macro_loss"].max()
    training_min_loss_y = data.loc[data["training_macro_loss"] == training_min_loss_x]["epoch"].values[0]

    validation_min_loss_x = data["validation_macro_loss"].max()
    validation_min_loss_y = data.loc[data["validation_macro_loss"] == validation_min_loss_x]["epoch"].values[0]

    ax.plot(data["epoch"], data["training_macro_loss"], color="black",
             label="training_macro_loss")

    ax.plot(data["epoch"], data["validation_macro_loss"], color="red",
             label="validation_macro_loss")
    ax.annotate(f'MAX(Macro-AUC) Training = {round(training_min_loss_x, 2)}',
                 ha='center', va='bottom',
                 size='large',
                 xytext=(training_min_loss_y, 1), xy=(training_min_loss_y, training_min_loss_x), arrowprops={'facecolor': 'darkgrey'}, alpha=0.5)

    ax.annotate(f'MAX(Macro-AUC) Validierung = {round(validation_min_loss_x, 2)}',
                 ha='center', va='bottom',
                 size='large',
                 xytext=(validation_min_loss_y, 1), xy=(validation_min_loss_y, validation_min_loss_x), arrowprops={'facecolor': 'darkgrey'}, alpha=0.5)
    ax.set_ylim(0,1)


fig, axs = plt.subplots(ncols=1)
create_plot_axis(axs, data_mic_mac_loss_all.loc[data_mic_mac_loss_all["type"] == "synthetics_only"].reset_index(drop=True))
create_plot_axis(axs, data_mic_mac_loss_all.loc[data_mic_mac_loss_all["type"] == "measurements_only"].reset_index(drop=True))

In [None]:
data_auc_all

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import re

def format_isotope_label(row, auc_df):
    match = re.match(r"([a-zA-Z]+)(\d+)", row["nuclide"])
    if match:
        element, mass = match.groups()
        if auc_df is not None:
            val_auc = auc_df.loc[auc_df["nuclide"] == row["nuclide"], "validation_auc"].values[0]
            train_auc = auc_df.loc[auc_df["nuclide"] == row["nuclide"], "training_auc"].values[0]
        # $\nAUC-Training={round(train_auc,2)}\nAUC-Validation={round(val_auc,2)}
        return f"$^{{{mass}}}{element.capitalize()}$"
    return row["nuclide"]

# Prepare one filtered DataFrame
def prepare_data(type_name):
    loss_df = data_mic_mac_loss_all[data_mic_mac_loss_all["type"] == type_name]
    max_val_loss = loss_df["validation_macro_loss"].max()
    epoch_best = loss_df.loc[loss_df["validation_macro_loss"] == max_val_loss, "epoch"].values[0]

    auc_df = data_auc_all[(data_auc_all["epoch"] == epoch_best) & (data_auc_all["type"] == type_name)]
    df = data_tpr_fpr_all[(data_tpr_fpr_all["epoch"] == epoch_best) & (data_tpr_fpr_all["type"] == type_name)].copy()
    df["nuclide"] = df.apply(lambda row: format_isotope_label(row, auc_df), axis=1)
    return df, auc_df

# Combine both types
df_syn, auc_df_syn = prepare_data("synthetics_only")
df_mes, auc_df_mes = prepare_data("measurements_only")
combined_df = pd.concat([df_syn, df_mes])
combined_df["type"] = combined_df["type"].map({
    "synthetics_only": "Synthetische Daten (1022 Datensätze)",
    "measurements_only": "Gemessene Daten (1022 Datensätze)"
})
combined_df_auc = pd.concat([auc_df_syn, auc_df_mes])

fig = plt.figure(figsize = (10, 5))
# Plot with relplot
g = sns.relplot(
    data=combined_df,
    x="validation_fpr",
    y="validation_tpr",
    hue="nuclide",
    col="type",
    kind="line",
    drawstyle="steps-pre",
    facet_kws={"sharex": True, "sharey": True},
    height=5,
    aspect=1.2,
    palette="tab10"
)

titles = ["(A) Synthetische Daten (1022 Datensätze)" , "(B) Gemessene Daten (1022 Datensätze)"]
itrs = 0
for ax in g.axes.flat:
    ax.plot([0, 1], [0, 1], ls="--", color="black", alpha=0.5, lw=2, zorder=100)
    ax.set_xlabel("False Positive Rate (FPR)", fontsize=12)
    ax.set_ylabel("True Positive Rate (TPR)", fontsize=12)
    ax.set_title(titles[itrs], fontsize=14)
    ax.grid(False)
    itrs += 1

sns.move_legend(g,
                loc="lower center",
                bbox_to_anchor=(0.5, 1.02),
                borderaxespad=0,
                title="",
                ncol=6,
                fontsize=14,
                frameon=False)
plt.tight_layout()
plt.show()


In [None]:
combined_df_auc.groupby("type").mean(numeric_only=True)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams['text.usetex'] = True
combined_df_auc["validation_auc"] = combined_df_auc["validation_auc"].round(2)
combined_df_auc["nuclide"] = combined_df_auc.apply(lambda row: format_isotope_label(row, None), axis=1)

combined_df_auc["type"] = combined_df_auc["type"].str.replace("measurements_only", "Gemessen").str.replace("synthetics_only", "Synthetisch")
means = combined_df_auc.groupby("type")["validation_auc"].mean()


fig = plt.figure(figsize=(12, 4))
ax = sns.barplot(
    combined_df_auc,
    x="nuclide",
    y="validation_auc",
    hue="type",
    dodge=True,
    palette=[sns.color_palette("Greys")[-5], sns.color_palette("Greys")[-3]],
    legend=False,
)

colots = [sns.color_palette("Greys")[-1], sns.color_palette("Greys")[-2]]
for idx, (label, mean_val) in enumerate(means.items()):
    ax.axhline(y=mean_val, color=colots[idx], linestyle="--", linewidth=0.5)
    ax.text(
        x=len(combined_df_auc["nuclide"].unique()) - 0.3,  # near the right edge
        y=mean_val - 0.02,
        s=f"$AUC_{{makro}}$ {label} = {mean_val:.2f}",
        color=colots[idx],
        fontsize=10,
        va="bottom"
    )

ax.bar_label(ax.containers[0], fontsize=10)
ax.bar_label(ax.containers[1], fontsize=10)

plt.xlabel("Nuklid", size=14, labelpad=10)
plt.ylabel("Anzahl", size=14, labelpad=10)
plt.tick_params(axis='x', labelsize=12, bottom=True, pad=10)
plt.tick_params(axis='y', labelsize=12, left=True)
plt.grid(False)

ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_xlim(-0.5,10.5)

fig.legend(labels=['AUC Validierung (synthetisch 1022 Datensätz)','AUC Validierung (gemessen 1022 Datensätze)'],
            bbox_to_anchor=(0.7, 1.02),
            borderaxespad=0,
            ncol=3,
            frameon=False
           )


# for line in leg.get_lines():
#     line.set_linewidth(5)


plt.tight_layout()


