In [None]:
import numpy as np
import matplotlib as mpl
import re
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
import random
from sklearn.preprocessing import MultiLabelBinarizer
import matplotlib
import matplotlib.colors as mcolors
import plotly.express as px
import pandas as pd
from scipy.signal import find_peaks

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import torch
from src.peaks.finder import PeakFinder

import mlflow
from config.loader import load_config
import os

import src.measurements.api as mpi
import src.generator.api as gpi
import src.peaks.api as ppi
import src.statistics.api as spi
plt.rcParams['text.usetex'] = True

In [None]:
latent_space_data = spi.API().processed_synthetics_latent_space()
try:
    latent_space_data = latent_space_data.drop(columns=["cluster"])
except:
    pass

X = latent_space_data.drop(columns=["datetime", "datetime_from_measurement"]).to_numpy()
kmeans = KMeans(n_clusters=5, random_state=42)
latent_space_data["cluster"] = kmeans.fit_predict(X)
melted_latent_space_data = latent_space_data.melt(id_vars=["datetime", "datetime_from_measurement"], value_vars=[str(i) for i in range(0,24)])
melted_latent_space_data.groupby("variable").mean(numeric_only=True).round(2)
melted_latent_space_data.groupby("variable").std(numeric_only=True).round(2)
melted_latent_space_data["value"].std()

cov = latent_space_data.drop(columns=["datetime", "datetime_from_measurement", "cluster"]).cov()
mean = latent_space_data.drop(columns=["datetime", "datetime_from_measurement", "cluster"]).mean()
samples = pd.DataFrame(np.random.multivariate_normal(mean, cov, size=10000))
melted_samples = samples.melt(value_vars=[int(i) for i in range(0,24)])
# melted_samples
melted_samples

In [None]:
fig = plt.figure(figsize=(10, 7))
gs = fig.add_gridspec(3, 2, height_ratios=[1, 4, 4], width_ratios=[5, 1], hspace=0.02, wspace=0.02)

ax_violin_plot = fig.add_subplot(gs[1:, 0])

# Violin plot
sns.violinplot(data=melted_latent_space_data, x="variable", y="value",
               color="grey", fill=False, inner="box", linewidth=1, ax=ax_violin_plot)

mean = melted_latent_space_data["value"].mean()
std = melted_latent_space_data["value"].std()
ax_violin_plot.axhline(y=mean, color="black", alpha=0.5, linestyle="--", linewidth=0.5)
# Styling
ax_violin_plot.set_xlabel("Latent Space Variable", fontsize=14, labelpad=15)
ax_violin_plot.set_ylabel("Latent Space Wert", fontsize=14, labelpad=15)
ax_violin_plot.tick_params(axis='x', labelsize=12)
ax_violin_plot.tick_params(axis='y', labelsize=12)
ax_violin_plot.grid(False)


ax_dist = fig.add_subplot(gs[1:, 1])
sns.histplot(melted_latent_space_data, y="value", kde=True, color="grey", ax=ax_dist)
ax_dist.set_xlabel("", fontsize=14, labelpad=20)
ax_dist.set_ylabel("", fontsize=14, labelpad=15)
ax_dist.tick_params(axis='x', labelsize=12)
ax_dist.tick_params(axis='y', labelsize=12)
ax_dist.grid(False)
ax_dist.set_yticks([])
ax_dist.set_xticks([])


# Add horizontal lines for mean ± std
ax_dist.axhline(mean, color="black", linestyle="--", linewidth=0.5, alpha=0.5)
ax_dist.axhline(mean + std, color="black", linestyle=":", linewidth=0.5, alpha=0.5)
ax_dist.axhline(mean - std, color="black", linestyle=":", linewidth=0.5, alpha=0.5)

# Optionally add text annotations
ax_dist.text(0.7, mean, f"$\mu$ = {mean:.2f}", va="bottom", ha="right", transform=ax_dist.get_yaxis_transform(), fontsize=10)
ax_dist.text(0.9, mean + std, f"+$\sigma$ = {mean + std:.2f}", va="bottom", ha="right", transform=ax_dist.get_yaxis_transform(), fontsize=10)
ax_dist.text(0.9, mean - std, f"-$\sigma$ = {mean - std:.2f}", va="top", ha="right", transform=ax_dist.get_yaxis_transform(), fontsize=10)

for ax in [ax_dist]:
    for spine in ax.spines.values():
        spine.set_visible(False)


# Save to PDF
plt.savefig("plots/latent_space_dist.pdf")

In [None]:
fig = plt.figure(figsize=(10, 7))
gs = fig.add_gridspec(3, 2, height_ratios=[1, 4, 4], width_ratios=[5, 1], hspace=0.02, wspace=0.02)

ax_violin_plot = fig.add_subplot(gs[1:, 0])

# Violin plot
sns.violinplot(data=melted_samples, x="variable", y="value",
               color="grey", fill=False, inner="box", linewidth=1, ax=ax_violin_plot)


mean = melted_samples["value"].mean()
std = melted_samples["value"].std()
ax_violin_plot.axhline(y=mean, color="black", alpha=0.5, linestyle="--", linewidth=0.5)
# Styling
ax_violin_plot.set_xlabel("Latent Space Variable", fontsize=14, labelpad=15)
ax_violin_plot.set_ylabel("Latent Space Wert", fontsize=14, labelpad=15)
ax_violin_plot.tick_params(axis='x', labelsize=12)
ax_violin_plot.tick_params(axis='y', labelsize=12)
ax_violin_plot.grid(False)


ax_dist = fig.add_subplot(gs[1:, 1])
sns.histplot(melted_samples, y="value", kde=True, color="grey", ax=ax_dist)
ax_dist.set_xlabel("", fontsize=14, labelpad=20)
ax_dist.set_ylabel("", fontsize=14, labelpad=15)
ax_dist.tick_params(axis='x', labelsize=12)
ax_dist.tick_params(axis='y', labelsize=12)
ax_dist.grid(False)
ax_dist.set_yticks([])
ax_dist.set_xticks([])


# Add horizontal lines for mean ± std
ax_dist.axhline(mean, color="black", linestyle="--", linewidth=0.5, alpha=0.5)
ax_dist.axhline(mean + std, color="black", linestyle=":", linewidth=0.5, alpha=0.5)
ax_dist.axhline(mean - std, color="black", linestyle=":", linewidth=0.5, alpha=0.5)

# Optionally add text annotations
ax_dist.text(0.7, mean, f"$\mu$ = {mean:.2f}", va="bottom", ha="right", transform=ax_dist.get_yaxis_transform(), fontsize=10)
ax_dist.text(0.9, mean + std, f"+$\sigma$ = {mean + std:.2f}", va="bottom", ha="right", transform=ax_dist.get_yaxis_transform(), fontsize=10)
ax_dist.text(0.9, mean - std, f"-$\sigma$ = {mean - std:.2f}", va="top", ha="right", transform=ax_dist.get_yaxis_transform(), fontsize=10)

for ax in [ax_dist]:
    for spine in ax.spines.values():
        spine.set_visible(False)

In [None]:
pca = PCA(n_components=24)
X_pca = pca.fit_transform(X)
print(pca.explained_variance_ratio_)
latent_space_data["PC1"] = X_pca[:, 0]
latent_space_data["PC2"] = X_pca[:, 1]

sns.scatterplot(data=latent_space_data, x="PC1", y="PC2", hue="cluster", palette="tab10")
plt.title("PCA of Latent Space Colored by Cluster")
plt.show()

# co_occurrence = latent_space_data[isotopes].T @ latent_space_data[isotopes]
# sns.heatmap(co_occurrence, annot=True, cmap="Blues")
# plt.title("Isotope Co-Occurrence Matrix")
# plt.show()

In [None]:
import mlflow
import os
from config.loader import load_config

os.environ["AWS_ACCESS_KEY_ID"] = load_config()["minio"]["AWS_ACCESS_KEY_ID"]
os.environ["AWS_SECRET_ACCESS_KEY"] = load_config()["minio"]["AWS_SECRET_ACCESS_KEY"]
os.environ["MLFLOW_S3_ENDPOINT_URL"] = load_config()["minio"]["MLFLOW_S3_ENDPOINT_URL"]
model_uri = load_config()["mlflow"]["uri"]
model_name = "VAE_CPU"
model_version = "latest"
mlflow.set_tracking_uri(uri=model_uri)
model = mlflow.pytorch.load_model(f"models:/{model_name}/{model_version}").to("cpu")

client = mlflow.tracking.MlflowClient(
    tracking_uri=load_config()["mlflow"]["uri"]
)
run_id = client.get_latest_versions("VAE_CPU")[0].run_id
run = client.get_run(run_id)
min_vae = float(run.data.params["min"])
max_vae = float(run.data.params["max"])


In [None]:
all_metrics = ["validation_loss", "training_loss", "validation_reconstruction_loss", "training_reconstruction_loss", "validation_kl_divergence", "training_kl_divergence"]
metrics_values = {}
for metric in all_metrics:
    metric_result = client.get_metric_history(run_id, metric)
    metric_result = sorted(metric_result, key=lambda m: m.timestamp)
    res = []
    for value in metric_result:
        res.append(value.value)
    metrics_values[metric] = res

results = pd.DataFrame(metrics_values)
results = results.rename(columns={
    "validation_loss": "Validierung: Poisson-Loss + KL-Divergence",
    "training_loss": "Training: Poisson-Loss + KL-Divergence",
    "training_reconstruction_loss": "Training: Poisson-Loss",
    "validation_reconstruction_loss": "Validierung: Poisson-Loss",
    "training_kl_divergence": "Training: KL-Divergence",
    "validation_kl_divergence": "Validierung: KL-Divergence",
})

min_loss_x = results["Validierung: Poisson-Loss + KL-Divergence"].min()
min_loss_y = results.loc[results["Validierung: Poisson-Loss + KL-Divergence"] == min_loss_x].index.values

plt.figure(figsize=(10, 5))
sns.lineplot(results, lw=1, style=None, dashes=False, palette="Paired")
plt.annotate(f'MIN(Validierung: Poisson-Loss + KL-Divergence) = {round(min_loss_x, 2)}',
             ha='center', va='bottom',
             size='large',
             xytext=(min_loss_y-30, 40), xy=(min_loss_y, min_loss_x), arrowprops={'facecolor': 'darkgrey'}, alpha=0.5)

plt.ylim(0,50)
plt.xlim(0,100)
plt.grid(False)
plt.xlabel("Epoche", size=14)
plt.ylabel("Loss", size=14)
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
leg = plt.legend(
    loc="lower center",
    bbox_to_anchor=(0.5, 1.02),
    borderaxespad=0,
    ncol=3,
    frameon=False
)
for line in leg.get_lines():
    line.set_linewidth(5)
plt.savefig("plots/vae_metrics.pdf")

In [None]:
import torch
from torchview import draw_graph
import os
os.environ["PATH"] += os.pathsep + r"C:\Program Files\Graphviz\bin"
graph = draw_graph(model, input_size=(8160,), expand_nested=False, roll=True)
graph.visual_graph.graph_attr.update({
    "rankdir": "TB",
    "dpi": "120",
    "size": "10,10!",
    "splines": "true",
    "ratio": "compress"
})
graph.visual_graph.graph_attr.update({
    "ranksep": "0 equally",
    "nodesep": "0",
})
graph.visual_graph.render("plots/model_architecture", format="pdf")

In [None]:
splitted_keys = mpi.API().splitted_keys()
keys_for_vae = splitted_keys.loc[splitted_keys["type"] == "vae"]["datetime"].tolist()
meas_from_vae_training = ppi.API().measurement(keys_for_vae)

In [None]:
vae_mean_training = meas_from_vae_training.groupby("energy")["count"].mean().reset_index()

In [None]:
vae_mean_training

In [None]:
mean_meas = spi.API().view_mean_measurement()

In [None]:
means = melted_latent_space_data.groupby("variable").mean(numeric_only=True)["value"].to_numpy(dtype=np.float32)

In [None]:
means

In [None]:
np.ones(24, dtype=np.float32) * means

In [None]:
latent_data = pd.DataFrame([])
decoder = model.decode
results = []
for i in range(-10, 10):
    z_torch = torch.from_numpy(np.zeros(24, dtype=np.float32) + i/5).to("cpu")
    x_hat = decoder(z_torch).to("cpu").detach().numpy()
    x_hat = x_hat * (max_vae - min_vae) + min_vae
    data = pd.DataFrame(x_hat)
    data["variable"] = i/5
    latent_data = pd.concat([latent_data, data], axis=0)
latent_data


In [None]:


plt.figure(figsize=(10, 5))
sns.lineplot(meas_from_vae_training, x="energy", y="count", errorbar=("se", 2), color="black")
# plt.plot(vae_mean_training["energy"], vae_mean_training["count"], color="red")
sns.lineplot(latent_data, x="energy", y=0)
plt.ylim(0,2000)
plt.legend()

In [None]:
z_torch = torch.from_numpy(samples.iloc[2].to_numpy(dtype=np.float32)).to("cpu")
decoder = model.decode
x_hat = decoder(z_torch).to("cpu").detach().numpy()
import shap

class DecoderWrapper:
    def __init__(self, decoder_func):
        self.decoder_func = decoder_func

    def __call__(self, z_numpy):
        # Convert NumPy input to Torch tensor
        z_tensor = torch.from_numpy(z_numpy).float().to("cpu")
        if len(z_tensor.shape) == 1:
            z_tensor = z_tensor.unsqueeze(0)  # ensure batch dimension
        with torch.no_grad():
            output = self.decoder_func(z_tensor).cpu().numpy()
            output = output * (max_vae - min_vae) + min_vae
        return output

background_data = np.stack([
    samples.iloc[i].to_numpy(dtype=np.float32)
    for i in range(200)
])

decoder_fn = DecoderWrapper(model.decode)
explainer = shap.Explainer(decoder_fn, background_data)

# Explain for a new latent sample
z_input = z_torch.unsqueeze(0).cpu().numpy()  # shape (1, latent_dim)
shap_values = explainer(background_data)

In [None]:
mean_explainer = np.mean(shap_values.values, axis=(0))
data = pd.DataFrame(mean_explainer.T)
step_size = 0.34507313512321336
energy_max = step_size * 8160
energy_axis = np.arange(0, energy_max, step_size)
data["energy"] = energy_axis
data = data.melt(id_vars=["energy"], value_vars=data.columns)
# data = data.groupby("energy").max()

In [None]:
from config.loader import load_engine
engine = load_engine()
data["variable"] = data["variable"].astype(int)
data_filtered = data.loc[(data["energy"] >= 0) & (data["energy"] < 3000)]
data_filtered = data_filtered.rename(columns={"variable": "datetime", "value": "count"})
data_filtered.to_sql(
    "latent_space_shaps",
    engine,
    if_exists="replace",
    index=False,
    schema="measurements"
)

In [None]:
import src.vae.api as vpi
data_filtered = vpi.API().latent_space_shaps().rename(columns={"datetime": "variable", "count": "value"})
plt.rcParams["text.usetex"] = False

data_filtered = data_filtered.set_index("variable").join(
    data_filtered.groupby("variable").mean().drop(columns=["energy"]
                                                  ).rename(columns={"value": "mean_of_value"})).reset_index()

data_filtered = data_filtered.set_index("variable").join(
    data_filtered.groupby("variable").std().drop(columns=["energy", "mean_of_value"]
                                                  ).rename(columns={"value": "std_of_value"})).reset_index()

data_filtered["variable_with_mean"] = (
    data_filtered["variable"].astype(int).astype(str)
    + ", μ = " + data_filtered["mean_of_value"].round(0).astype(int).astype(str)
    + ", σ = " + data_filtered["std_of_value"].round(0).astype(int).astype(str)
)

data_filtered["greater_or_less"] = data_filtered["mean_of_value"] >= 0

df_250 = data_filtered[(data_filtered["energy"] > 250) & (data_filtered["energy"] < 250.5)]
max_values_per_variable = df_250.groupby("variable")["value"].max().reset_index()
data_filtered = data_filtered.set_index("variable").join(max_values_per_variable.rename(columns={"value": "max_for_250"}).set_index("variable")).reset_index()

data_above = data_filtered[data_filtered["mean_of_value"] >= 0]
data_below = data_filtered[data_filtered["mean_of_value"] < 0]
data_above = data_above.sort_values(by="mean_of_value", ascending=False).reset_index(drop=True)
data_below = data_below.sort_values(by="mean_of_value", ascending=True).reset_index(drop=True)

hue_order_above = data_above.drop_duplicates("variable").sort_values("max_for_250")["variable"].reset_index(drop=True)
palette_above = sns.color_palette("flare", n_colors=len(hue_order_above))

hue_order_below = data_above.drop_duplicates("variable").sort_values("max_for_250")["variable"].reset_index(drop=True)
palette_below = sns.color_palette("crest", n_colors=len(hue_order_below))
# Set figure

fig, ax = plt.subplots(figsize=(10, 8))
data_above["variable"] = data_above["variable"].astype(str)
data_below["variable"] = data_below["variable"].astype(str)
# Plot above-zero group with one palette
sns.lineplot(
    data=data_above,
    x="energy",
    y="value",
    hue="variable_with_mean",
    palette=palette_above,
    alpha=0.5,
    linewidth=0.3,
    legend="brief",
    ax=ax
)

# Plot below-zero group with a different palette
sns.lineplot(
    data=data_below,
    x="energy",
    y="value",
    hue="variable_with_mean",
    palette=palette_below,
    alpha=0.5,
    linewidth=0.3,
    legend="brief",
    ax=ax
)

# Labeling
plt.xlabel("Energie [keV]", size=14, labelpad=15)
plt.ylabel("SHAP Wert", size=14)
plt.ylim(-40, 40)
plt.xlim(-0, 2800)
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.xticks([i for i in range(0, int(data_filtered["energy"].max()), 200)])

plt.grid(False)

# Legend management
legend = plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.3), ncol=5, frameon=False, title="Latent Space Variable")
for line in legend.get_lines():
    line.set_linewidth(3)  # Adjust thickness here

axin = ax.inset_axes([0.7, 0.7, 0.2, 0.2])

axin.set_xlim(1170, 1176)
axin.set_ylim(-20, 20)
axin.spines['top'].set_visible(False)
axin.spines['right'].set_visible(False)
axin.grid(False)

sns.lineplot(
    data=data_above,
    x="energy",
    y="value",
    hue="variable_with_mean",
    palette=palette_above,
    alpha=0.5,
    linewidth=1,
    legend="brief",
    ax=axin
)

# Plot below-zero group with a different palette
sns.lineplot(
    data=data_below,
    x="energy",
    y="value",
    hue="variable_with_mean",
    palette=palette_below,
    alpha=0.5,
    linewidth=1,
    legend="brief",
    ax=axin
)
axin.get_legend().remove()
axin.set_xlabel("")
axin.set_ylabel("")
axin.set_title("$^{60}Co$ bei 1173 keV")

plt.tight_layout()
plt.savefig("plots/shap.pdf")
plt.show()


In [None]:
import plotly.express as px

fig = px.line(data_filtered, x="energy", y="value", color="variable")
fig.update_layout(height=1000)
fig.show()

In [None]:
from scipy.signal import find_peaks

data_for_px = data_filtered.copy()
data_for_px_below_500 = data_for_px.loc[data_for_px["energy"] <= 500].reset_index(drop=True)
data_for_px_below_500 = data_for_px_below_500.reset_index()
data_for_px_below_500["value"] = data_for_px_below_500["value"].abs()
data_for_px_below_500 = data_for_px_below_500.groupby("energy").sum().reset_index()
# data_for_px_below_500["value"] = (data_for_px_below_500["value"] - data_for_px_below_500["value"].min()) / (data_for_px_below_500["value"].max() - data_for_px_below_500["value"].min())

peaks, _ = find_peaks(data_for_px_below_500["value"].to_numpy(), prominence=100, width=3)
data_for_px_below_500["peak"] = False
data_for_px_below_500.loc[peaks, "peak"] = True
# data_for_px["count"] = data_for_px["count"].sum()
# data_for_px = data_for_px.groupby("energy").mean().reset_index()

import plotly.graph_objects as go

fig = go.Figure()

# Gesamtsignal als Linie
fig.add_trace(go.Scatter(x=data_for_px_below_500["energy"], y=data_for_px_below_500["value"],
                         mode='lines', name='Signal', line=dict(color='black')))

# Peaks als Punkte
peaks_df_below_500 = data_for_px_below_500[data_for_px_below_500["peak"]]
fig.add_trace(go.Scatter(x=peaks_df_below_500["energy"], y=peaks_df_below_500["value"],
                         mode='markers', name='Peaks',
                         marker=dict(color='red', size=8, symbol='circle')))

fig.update_layout(height=1000, title="Signal mit Peaks")
fig.update_xaxes(title="Energie [keV]")
fig.update_yaxes(title="Zählwert")
fig.show()


In [None]:
from scipy.signal import find_peaks
data_for_px = data_filtered.copy()
data_for_px = data_for_px.loc[data_for_px["energy"] > 500].reset_index(drop=True)
data_for_px = data_for_px.reset_index()
data_for_px["value"] = data_for_px["value"].abs()
data_for_px = data_for_px.groupby("energy").sum().reset_index()
# data_for_px["value"] = (data_for_px["value"] - data_for_px["value"].min()) / (data_for_px["value"].max() - data_for_px["value"].min())

peaks, _ = find_peaks(data_for_px["value"].to_numpy(), prominence=10, width=3)
data_for_px["peak"] = False
data_for_px.loc[peaks, "peak"] = True
# data_for_px["count"] = data_for_px["count"].sum()
# data_for_px = data_for_px.groupby("energy").mean().reset_index()

import plotly.graph_objects as go

fig = go.Figure()

data_combined_shaps = pd.concat([data_for_px, data_for_px_below_500], axis=0).reset_index(drop=True)
fig.add_trace(go.Scatter(x=data_combined_shaps["energy"], y=data_combined_shaps["value"],
                         mode='lines', name='Signal', line=dict(color='black')))

# Peaks als Punkte
# peaks_df = data_for_px[data_for_px["peak"]]
peaks_df = pd.concat([data_for_px[data_for_px["peak"]], peaks_df_below_500], axis=0).reset_index(drop=True)
fig.add_trace(go.Scatter(x=peaks_df["energy"], y=peaks_df["value"],
                         mode='markers', name='Peaks',
                         marker=dict(color='red', size=8, symbol='circle')))

fig.update_layout(height=1000, title="Signal mit Peaks")
fig.update_xaxes(title="Energie [keV]")
fig.update_yaxes(title="Zählwert")
fig.show()



In [None]:
import src.nuclide.api as npi
# nuclides = npi.API().nuclides(nuclide_ids="all", intensity=0)
eu_154 = npi.API().nuclides(nuclide_ids=["eu154"], intensity=30)

co_60 = npi.API().nuclides(nuclide_ids=["co60"], intensity=20)

bi_214 = npi.API().nuclides(nuclide_ids=["bi214"], intensity=1)

eu_152 = npi.API().nuclides(nuclide_ids=["eu152"], intensity=5)
eu_152 = eu_152.loc[eu_152["energy"] > 50]

u_235 = npi.API().nuclides(nuclide_ids=["u235"], intensity=5)
u_235 = u_235.loc[u_235["energy"] > 180]

am_241 = npi.API().nuclides(nuclide_ids=["am241"], intensity=1)
am_241 = am_241.loc[am_241["energy"] > 50]

tm_163 = npi.API().nuclides(nuclide_ids=["tm163"], intensity=10)
tm_163 = tm_163.loc[tm_163["energy"] > 50]

tb_141 = npi.API().nuclides(nuclide_ids=["tb141"], intensity=15)
tb_141 = tb_141.loc[tb_141["energy"] < 500]

co62 = npi.API().nuclides(nuclide_ids=["co62"], intensity=1)
co62 = co62.loc[co62["energy"] > 500]

cs137 = npi.API().nuclides(nuclide_ids=["cs137"], intensity=20)

k40 = npi.API().nuclides(nuclide_ids=["k40"], intensity=0)
k40 = k40.loc[k40["energy"] > 515]

pb214 = npi.API().nuclides(nuclide_ids=["pb214"], intensity=1)
pb214 = pb214.loc[pb214["energy"] > 70]

# EXPLORE
# explorer = npi.API().nuclides(nuclide_ids=["bi214"], intensity=1)

fig = plt.figure(figsize=(12, 8))


plt.vlines(x=explorer["energy"], ymin=0, ymax=1300, color=sns.color_palette()[0], label="explorer", linewidth=0.7, linestyle="--")
# plt.vlines(x=cs137["energy"], ymin=0, ymax=1300, color=sns.color_palette()[8], label="cs137", linewidth=0.7, linestyle="--")
# plt.vlines(x=co62["energy"], ymin=0, ymax=1300, color=sns.color_palette()[8], label="co62", linewidth=0.7, linestyle="--")
# plt.vlines(x=tb_141["energy"], ymin=0, ymax=1300, color=sns.color_palette()[8], label="tb141", linewidth=0.7, linestyle="--")
# plt.vlines(x=tm_163["energy"], ymin=0, ymax=1300, color=sns.color_palette()[8], label="tm163", linewidth=0.7, linestyle="--")
# plt.vlines(x=am_241["energy"], ymin=0, ymax=1300, color=sns.color_palette()[7], label="am241", linewidth=0.7, linestyle="--")
# plt.vlines(x=u_235["energy"], ymin=0, ymax=1300, color=sns.color_palette()[6], label="u235", linewidth=0.7, linestyle="--")
# plt.vlines(x=bi_214["energy"], ymin=0, ymax=1300, color=sns.color_palette()[5], label="bi214", linewidth=0.7, linestyle="--")
# plt.vlines(x=co_60["energy"], ymin=0, ymax=1300, color=sns.color_palette()[4], label="co60", linewidth=0.7, linestyle="--")
# plt.vlines(x=eu_154["energy"], ymin=0, ymax=1300, color=sns.color_palette()[3], label="eu154", linewidth=0.7, linestyle="--")
# plt.vlines(x=eu_152["energy"], ymin=0, ymax=1300, color=sns.color_palette()[2], label="eu152", linewidth=0.7, linestyle="--")

sns.lineplot(data_combined_shaps, x="energy", y="value", color="black")
sns.scatterplot(peaks_df, x="energy", y="value", color=sns.color_palette()[1], label="Peaks")

plt.grid(False)
plt.xlabel("Energiedifferenz [keV]", size=14)
plt.ylabel("Summe aller absoluter Shap-Werte", size=14)
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.grid(axis="y", alpha=0.2)
plt.legend()
plt.xlim(0,2800)
plt.legend()
plt.ylim(0,1300)
plt.savefig("plots\\energy_diffs.pdf")
plt.show()
plt.close()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

nuclide_data = [
    ("$^{141}\\mathrm{Tb}$", tb_141, sns.color_palette()[1]),
    ("$^{163}\\mathrm{Tm}$", tm_163, sns.color_palette()[2]),
    ("$^{241}\\mathrm{Am}$", am_241, sns.color_palette()[3]),
    ("$^{235}\\mathrm{U}$", u_235, sns.color_palette()[4]),
    ("$^{214}\\mathrm{Bi}$", bi_214, sns.color_palette()[5]),
    ("$^{60}\\mathrm{Co}$", co_60, sns.color_palette()[6]),
    ("$^{154}\\mathrm{Eu}$", eu_154, sns.color_palette()[7]),
    ("$^{152}\\mathrm{Eu}$", eu_152, sns.color_palette()[8]),
    ("$^{137}\\mathrm{Cs}$", cs137, sns.color_palette()[8]),
    ("$^{40}\\mathrm{K}$", k40, sns.color_palette()[0]),
    ("$^{214}\\mathrm{Pb}$", pb214, sns.color_palette()[0]),
]

fig, axes = plt.subplots(4, 3, figsize=(8, 8), sharex=True, sharey=True)
axes = axes.flatten()

for ax, (label, df, color) in zip(axes, nuclide_data):
    ax.vlines(x=df["energy"], ymin=0, ymax=1300, color=color, label=label, linewidth=1.0, alpha=0.7, linestyle="--")
    sns.lineplot(data=data_combined_shaps, x="energy", y="value", color="black", ax=ax, linewidth=0.5, label="$\sum |SHAP_{k}|$")
    # sns.scatterplot(data=peaks_df, x="energy", y="value", color=sns.color_palette()[1], ax=ax)

    ax.set_xlabel("")
    ax.set_ylabel("")
    # ax.set_title(label, fontsize=12)
    ax.grid(False)
    ax.set_xlim(0, 2800)
    ax.set_ylim(0, 1300)
    ax.tick_params(axis='x', labelsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.grid(axis="y", alpha=0.2)
    ax.legend(loc="upper right")
# Common axis labels
fig.delaxes(axes[-1])
fig.supxlabel("Energie [keV]", fontsize=14)
fig.supylabel("", fontsize=14)

plt.tight_layout()
plt.savefig("plots/energy_diffs_subplots.pdf", dpi=300, bbox_inches="tight")
plt.show()
plt.close()


In [None]:
import src.nuclide.api as npi

# "eu154": 30,
# "eu152": 5,
# "co60": 2,
# "bi214": 1,
# "u235": 5,
# "am241": 1,
# "tm163": 10,
# "tb141": 15,
# "co62": 1,
# "cs137": 20

nuclides = npi.API().nuclides(nuclide_ids="all", intensity=1)
nuclides = nuclides.loc[~nuclides["nuclide_id"].isin(["eu154", "eu152", "co60", "bi214", "u235", "am241" "cs137"])]
peaks = data_combined_shaps.loc[data_combined_shaps["peak"] == True].reset_index(drop=True)
energies_to_check_for = peaks["energy"].tolist()

matches = {}


for energy in energies_to_check_for:
    nuclides["diff"] = (nuclides["energy"] - energy).abs()
    nuclides_diff_filtered = nuclides.loc[nuclides["diff"] < 2].reset_index(drop=True)
    nuclides_heads = nuclides_diff_filtered.sort_values(by=["diff"], ascending=True).head(2)["nuclide_id"].tolist()
    matches[energy] = nuclides_heads
result = pd.DataFrame(matches).T

In [None]:
df_result = pd.DataFrame(result).reset_index().rename(columns={"index": "energy"})
df_result["energy"] = df_result["energy"].round(2)
df_long = df_result.melt(id_vars=["energy"], value_name="nuclide").drop(columns="variable")
# df_long = df_long.loc[df_long["nuclide"] == "ag116"]
# Step 3: Create binary presence matrix
heatmap_data = pd.crosstab(df_long["energy"], df_long["nuclide"])

# Optional: sort columns by frequency
heatmap_data = heatmap_data.loc[:, heatmap_data.sum().sort_values(ascending=False).index]

# Step 4: Plot the heatmap
plt.figure(figsize=(12, 12))
sns.heatmap(heatmap_data, cmap="Greys", linewidths=0.5, linecolor="lightgrey", cbar=True)

In [None]:
import pandas as pd
flattened = pd.Series([nuclide for col in result.columns for nuclide in result[col]])
counts = flattened.value_counts().reset_index()
counts.columns = ['nuclide_id', 'count']
counts

In [None]:
import src.peaks.api as ppi
from src.peaks.refinder import RePeakFinder

dates = ppi.API().unique_dates()