# Dateexploration

## Visualization of Raw Data and Synthetic Data

In [None]:
import numpy as np
import matplotlib as mpl
import re
import seaborn as sns
import matplotlib.pyplot as plt
import random
from sklearn.preprocessing import MultiLabelBinarizer
import matplotlib
import matplotlib.colors as mcolors
import pandas as pd
import mlflow
from config.loader import load_config
import os

import src.measurements.api as mpi
import src.generator.api as gpi
import src.peaks.api as ppi
import src.statistics.api as spi

plt.rcParams['text.usetex'] = True



In [None]:
synthetic_keys = gpi.API().unique_keys()
random.shuffle(synthetic_keys)
synthetics = gpi.API().synthetics(keys=synthetic_keys[0:1246])

In [None]:
date = "2017-07-07 13:52:17"
one_meas_synthetics = gpi.API().synthetics_for_meas([date])
one_meas_processed_measurement = ppi.API().measurement(dates=[date])
one_meas_synthetics["datetime"] = one_meas_synthetics["datetime"].str.split("_").str[1] + \
                                  one_meas_synthetics["datetime"].str.split("_").str[2]
one_meas_synthetics

In [None]:
plt.figure(figsize=(8, 5))

plt.rcParams['text.usetex'] = True

sns.lineplot(data=one_meas_synthetics[8160 * 0:8160 * 10], x="energy", y="count", hue="datetime", alpha=0.3,
             linewidth=0.5)
sns.lineplot(data=one_meas_processed_measurement, x="energy", y="count", zorder=100, color="black",
             label="Messung 2017-07-07 13:52:17")
plt.ylim(0, 1000)
plt.xlim(0, 2800)
plt.grid(False)
plt.xlabel("Energie [keV]", size=14)
plt.ylabel("Zählwert", size=14)
plt.tick_params(axis='x', labelsize=12, bottom=True)
plt.tick_params(axis='y', labelsize=12, left=True)
plt.annotate('Neuer Peak',
             ha='center', va='bottom',
             size='large',
             xytext=(1580, 900), xy=(1582.5, 300), arrowprops={'facecolor': 'darkgrey'})

plt.annotate('Abschwächung der Peaks',
             ha='center', va='bottom',
             size='large',
             alpha=1,
             zorder=100,
             xytext=(200, -200), xy=(800, 0), arrowprops={'facecolor': "darkgrey"})

leg = plt.legend(
    loc="lower center",
    bbox_to_anchor=(0.5, 1.02),
    borderaxespad=0,
    ncol=4,
    frameon=False
)
for line in leg.get_lines():
    line.set_linewidth(5)
axin = plt.gca()
axin.spines['top'].set_visible(False)
axin.spines['right'].set_visible(False)
plt.tight_layout()
plt.savefig("plots/synthetics_example_for_one_meas.pdf")

In [None]:
dates = mpi.API().unique_dates()
# measurement = mpi.API().measurement(dates=dates)
processed_measurements = ppi.API().measurement(dates=dates)

In [None]:
dates = mpi.API().unique_dates()
# measurement = mpi.API().measurement(dates=dates)
re_processed_measurements = ppi.API().re_measurement(dates=dates)

In [None]:
plt.figure(figsize=(10, 7))
synthetics["count_shifted"] = synthetics["count"] + 1

cmap = mpl.colormaps['Greys']
new_cmap = cmap(np.linspace(0.1, 1, 256))
custom_cmap = mpl.colors.ListedColormap(new_cmap)

ax = sns.histplot(
    synthetics,
    x="energy",
    y="count_shifted",
    bins=(2988, 40),
    log_scale=(False, True),
    cbar=True,
    cbar_kws={
        "orientation": "horizontal",
        "shrink": 1,
        "label": "Anzahl der Zählwerte für die Energiewerte",
    },
    cmap=custom_cmap,
    vmin=0,
    vmax=1000,
    zorder=-10,
    # rasterized=True
)
cbar = ax.figure.axes[-1]
cbar.xaxis.label.set_size(14)
cbar.xaxis.labelpad = 15
cbar.tick_params(labelsize=12)
ax.set_rasterization_zorder(0)
plt.xlabel("Energie [keV]", size=14, labelpad=15)
plt.ylabel("Log(Zählwert + 1)", size=14, labelpad=15)
plt.xticks(size=12)
plt.yticks(size=12)
plt.ylim(1, 10000000)
plt.xlim(0, 2788)

plt.grid(False)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(axis='x', labelsize=12, bottom=True)
ax.tick_params(axis='y', labelsize=12, left=True)
plt.savefig("plots\\count_energy_heatmap_synthetics.pdf")
plt.show()
plt.close()

In [None]:
synthetics["datetime"].values[0]

In [None]:
one_meas = synthetics.loc[synthetics["datetime"] == synthetics["datetime"].values[0]].reset_index(drop=True)
one_meas = one_meas.loc[one_meas["energy"] < 301]
plt.figure(figsize=(10, 7))
plt.plot(one_meas["energy"], one_meas["count"], label="Messung", color=sns.color_palette()[0], alpha=0.5)
plt.plot(one_meas["energy"], one_meas["background"], label="Hintergrund", color=sns.color_palette()[1], alpha=0.5)
plt.plot(one_meas["energy"], one_meas["count"] - one_meas["background"], label="Messung - Hintergrund",
         color=sns.color_palette()[2], alpha=0.5)
plt.vlines(x=one_meas.loc[one_meas["peak"] == True]["energy"], ymin=0,
           ymax=2000, label="Nuklid", color=sns.color_palette()[3],
           linewidth=2, linestyle="dashed")
plt.ylim(0, 3000)
plt.xlim(0, 301)
plt.xlabel("Energie [keV]", size=14)
plt.ylabel("Zählwert", size=14)
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.grid(False)
leg = plt.legend(
    loc="lower center",
    bbox_to_anchor=(0.5, 1.02),
    borderaxespad=0,
    ncol=4,
    frameon=False
)
for line in leg.get_lines():
    line.set_linewidth(3)
plt.savefig("plots/background_meas_example.pdf")

In [None]:
import src.generator.api as gpi
import seaborn as sns
import matplotlib.pyplot as plt

counts = synthetics["count"].to_numpy() + 1

sns.histplot(
    counts,
    bins=40,
    stat="probability",
    color="black",
    alpha=0.6,
    label="Data",
    log_scale=(True, False),
)
plt.xlabel("Log(Zählwert + 1)")
plt.ylabel("Wahrscheinlichkeit")
plt.grid(True, alpha=0.3, which="both")
plt.xlim(
    1,
)
plt.savefig("plots\\count_histogram_synthetics.pdf")
plt.show()
plt.close()

In [None]:
import src.vae.api as vpi
import random

uniquq_keys = vpi.API().re_unique_dates()
random.shuffle(uniquq_keys)
re_synthetics = vpi.API().re_synhtetics(uniquq_keys[0:1246])
dates = mpi.API().unique_dates()
# measurement = mpi.API().measurement(dates=dates)
re_processed_measurements = ppi.API().re_measurement(dates=dates)

In [None]:
import src.vae.api as vpi
import random

synthetic_keys = vpi.API().unique_dates()
random.shuffle(synthetic_keys)
re_synthetics = gpi.API().synthetics(keys=synthetic_keys[0:1246])
dates = mpi.API().unique_dates()
# measurement = mpi.API().measurement(dates=dates)
re_processed_measurements = ppi.API().measurement(dates=dates)

In [None]:
filtered_re_synthetics = re_synthetics.drop_duplicates(subset=["datetime", "identified_isotope"]).reset_index(drop=True)
filtered_re_measurements = re_processed_measurements.drop_duplicates(
    subset=["datetime", "identified_isotope"]).reset_index(drop=True)
filtered_re_measurements = filtered_re_measurements.loc[filtered_re_measurements["identified_isotope"] != ""]

filtered_re_synthetics = filtered_re_synthetics.loc[filtered_re_synthetics["identified_isotope"] != ""]
filtered_re_synthetics["data"] = "Synthetische $\gamma$-Spektren"
filtered_re_measurements["data"] = "Gemessene $\gamma$-Spektren"
combined_data = pd.concat([filtered_re_synthetics, filtered_re_measurements], axis=0, ignore_index=True)


def format_isotope(isotope):
    match = re.match(r"([a-zA-Z]+)(\d+)", isotope)
    if match:
        element, mass = match.groups()
        return f"$^{{{mass}}}{element.capitalize()}$"
    else:
        return isotope


combined_data["identified_isotope"] = combined_data["identified_isotope"].apply(format_isotope)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

plt.rcParams['text.usetex'] = True

counts = combined_data.groupby(["identified_isotope", "data"]).size().unstack(fill_value=0)
counts = counts.sort_values(by=["Gemessene $\gamma$-Spektren", "Synthetische $\gamma$-Spektren"], ascending=True)
x = range(len(counts))
bar_width = 0.4

fig, ax = plt.subplots(figsize=(8, 4))
ax.bar([i - bar_width / 2 for i in x], [1246] * len(counts), color=sns.color_palette("Paired")[0], width=bar_width,
       alpha=0.4,
       label="Kein Nuklid für gemessene $\gamma$-Spektren")
ax.bar([i + bar_width / 2 for i in x], [1246] * len(counts), color=sns.color_palette("Paired")[2], width=bar_width,
       alpha=0.4,
       label="Kein Nuklid für synthetische $\gamma$-Spektren")
ax.bar([i - bar_width / 2 for i in x], counts["Gemessene $\gamma$-Spektren"], color=sns.color_palette("Paired")[1],
       width=bar_width,
       label="Annotiertes Nuklid für gemessene $\gamma$-Spektren", alpha=0.4)
ax.bar([i + bar_width / 2 for i in x], counts["Synthetische $\gamma$-Spektren"], color=sns.color_palette("Paired")[3],
       width=bar_width,
       label="Annotiertes Nuklid für synthetische $\gamma$-Spektren", alpha=0.4)

ax.set_xlabel("Nuklid", size=14, labelpad=10)
ax.set_ylabel("Anzahl", size=14, labelpad=10)
ax.set_xticks([i for i in x])
ax.set_xticklabels(counts.index, rotation=0, ha='center', fontsize=12)
ax.tick_params(axis='y', labelsize=12, left=True)
ax.tick_params(axis='x', labelsize=12, bottom=True)
ax.grid(False)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

for i, isotope in enumerate(counts.index):
    real_val = counts.loc[isotope, "Gemessene $\gamma$-Spektren"]
    synth_val = counts.loc[isotope, "Synthetische $\gamma$-Spektren"]
    if real_val > 100:
        ax.annotate(str(real_val), (i - bar_width / 2, real_val - 40), ha='center', va='top', rotation=90, fontsize=10,
                    color="white")
    if synth_val > 100:
        ax.annotate(str(synth_val), (i + bar_width / 2, synth_val - 40), ha='center', va='top', rotation=90,
                    fontsize=10, color="white")

for i, isotope in enumerate(counts.index):
    real_val = counts.loc[isotope, "Gemessene $\gamma$-Spektren"]
    synth_val = counts.loc[isotope, "Synthetische $\gamma$-Spektren"]
    if real_val > -1:
        ax.annotate(str(1246 - real_val), (i - bar_width / 2, real_val + 110), ha='center', va='top', rotation=90,
                    fontsize=10, color="black")
    if synth_val > -1:
        ax.annotate(str(1246 - synth_val), (i + bar_width / 2, synth_val + 110), ha='center', va='top', rotation=90,
                    fontsize=10, color="black")

fig.legend(
    bbox_to_anchor=(1.0, 1.10),
    borderaxespad=0,
    ncol=2,
    frameon=False,
    fontsize=12
)
ax.set_ylim(0, 1346)
ax.set_xlim(-0.5, 10.5)

plt.tight_layout()
plt.savefig("plots/nuclide_identification_compare.pdf", bbox_inches='tight')
plt.show()


In [None]:
plt.rcParams['text.usetex'] = True

fig = plt.figure(figsize=(8, 4))
combined_data["color"] = combined_data["identified_isotope"] + combined_data["data"]
combined_data = combined_data.sort_values(by="data")
combined_data = combined_data.sort_values(by="identified_isotope")
sns.histplot(
    combined_data,
    x="identified_isotope",
    hue="data",
    multiple="dodge",
    palette=[sns.color_palette("Greys")[-5], sns.color_palette("Greys")[-3]],
    legend=False,
    shrink=0.8
)

plt.xlabel("Nuklid", size=14, labelpad=10)
plt.ylabel("Anzahl", size=14, labelpad=10)
plt.tick_params(axis='x', labelsize=12, bottom=True, pad=10)
plt.tick_params(axis='y', labelsize=12, left=True)
plt.grid(False)

ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

for p in ax.patches:
    height = p.get_height()
    if height > 0:
        ax.annotate(
            f'{int(height)}',
            (p.get_x() + p.get_width() / 2, height - 20),
            ha='center',
            va='top',
            rotation=90,
            fontsize=10
        )
fig.legend(labels=['Anzahl detailliert annotierter Nuklide für\n1246 gemessene $\gamma$-Spektren',
                   'Anzahl detailliert annotierter Nuklide für\n1246 synthetische $\gamma$-Spektren'],
           bbox_to_anchor=(0.97, 1.05),
           borderaxespad=0,
           ncol=3,
           frameon=False,
           fontsize=12,
           )

plt.tight_layout()
plt.savefig("plots/nuclide_identification_compare.pdf", bbox_inches='tight')
plt.show()

In [None]:
keys_pro = mpi.API().splitted_keys()["datetime"].tolist()
processed_measurements = ppi.API().measurement(keys_pro)


In [None]:
dates = ppi.API().unique_dates()
processed_measurements = ppi.API().measurement(dates)

In [None]:
import src.vae.api as vpi

uniquq_keys = vpi.API().unique_dates()
random.shuffle(uniquq_keys)
synthetics = vpi.API().synthetic(uniquq_keys[0:1246])

In [None]:
filtered_re_synthetics = synthetics.drop_duplicates(subset=["datetime", "identified_isotope"]).reset_index(drop=True)
filtered_re_measurements = processed_measurements.drop_duplicates(
    subset=["datetime", "identified_isotope"]).reset_index(drop=True)
filtered_re_measurements = filtered_re_measurements.loc[filtered_re_measurements["identified_isotope"] != ""]

filtered_re_synthetics = filtered_re_synthetics.loc[filtered_re_synthetics["identified_isotope"] != ""]
filtered_re_synthetics["data"] = "1246 Synthetische $\gamma$-Spektren"
filtered_re_measurements["data"] = "1246 Gemessene $\gamma$-Spektren"
combined_data = pd.concat([filtered_re_measurements, filtered_re_synthetics], axis=0, ignore_index=True)


def format_isotope(isotope):
    match = re.match(r"([a-zA-Z]+)(\d+)", isotope)
    if match:
        element, mass = match.groups()
        return f"$^{{{mass}}}{element.capitalize()}$"
    else:
        return isotope


combined_data["identified_isotope"] = combined_data["identified_isotope"].apply(format_isotope)

In [None]:
plt.rcParams['text.usetex'] = True

fig = plt.figure(figsize=(8, 4))
combined_data["color"] = combined_data["identified_isotope"] + combined_data["data"]
combined_data = combined_data.sort_values(by="identified_isotope")
sns.histplot(
    combined_data,
    x="identified_isotope",
    hue="data",
    multiple="dodge",
    palette=[sns.color_palette("Greys")[-3], sns.color_palette("Greys")[-5]],
    legend=False,
    shrink=0.8
)

plt.xlabel("Nuklid", size=14, labelpad=10)
plt.ylabel("Anzahl", size=14, labelpad=10)
plt.tick_params(axis='x', labelsize=12, bottom=True, pad=10)
plt.tick_params(axis='y', labelsize=12, left=True)
plt.grid(False)

ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

for p in ax.patches:
    height = p.get_height()
    if height > 0:
        ax.annotate(
            f'{int(height)}',
            (p.get_x() + p.get_width() / 2, height + 10),
            ha='center',
            va='bottom',
            rotation=90,
            fontsize=10
        )
fig.legend(labels=["Anzahl Initial annotierter Nuklide für\n1246 synthetische $\gamma$-Spektren",
                   'Anzahl Initial annotierter Nuklide für\n1246 gemessene $\gamma$-Spektren'],
           bbox_to_anchor=(0.95, 1.1),
           borderaxespad=0,
           ncol=3,
           frameon=False,
           fontsize=12,
           )

plt.tight_layout()
plt.savefig("plots/nuclide_identification_compare_first.pdf", bbox_inches='tight')
plt.show()

In [None]:
re_synthetics.describe().apply(lambda s: s.apply("{0:.5f}".format))

In [None]:
cmap = mpl.colormaps['Greys']
new_cmap = cmap(np.linspace(0.2, 1, 256))
custom_cmap = mpl.colors.ListedColormap(new_cmap)

plt.figure(figsize=(10, 7))
processed_measurements["count_shifted"] = processed_measurements["count"] + 1
ax = sns.histplot(
    processed_measurements,
    x="energy",
    y="count_shifted",
    bins=(2988, 40),
    log_scale=(False, True),
    cbar=True,
    cbar_kws={
        "orientation": "horizontal",
        "shrink": 1,
        "label": "Anzahl der Zählwerte für die Energiewerte",
    },
    cmap=custom_cmap,
    zorder=-10,
)
cbar = ax.figure.axes[-1]
cbar.xaxis.label.set_size(14)
cbar.xaxis.labelpad = 15
cbar.tick_params(labelsize=12)
ax.set_rasterization_zorder(0)
plt.xlabel("Energie [keV]", size=14, labelpad=15)
plt.ylabel("Log(Zählwert + 1)", size=14, labelpad=15)
plt.xticks(size=12)
plt.yticks(size=12)
plt.ylim(1, 10000000)
plt.xlim(0, 2788)
plt.grid(False)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tick_params(axis='x', labelsize=12, bottom=True)
plt.tick_params(axis='y', labelsize=12, left=True)
plt.savefig("plots\\count_energy_heatmap_measurements.pdf")
plt.show()
plt.close()

In [None]:
plt.close()
import src.generator.api as gpi
import seaborn as sns
import matplotlib.pyplot as plt

sns.histplot(
    abs(processed_measurements["count"].to_numpy() + 1),
    bins=40,
    stat="probability",
    color="black",
    alpha=0.6,
    label="Data",
    log_scale=(True, False),
)
plt.xlabel("Log(Zählwert + 1)")
plt.ylabel("Wahrscheinlichkeit")
plt.grid(True, alpha=0.3, which="both")
plt.xlim(
    1,
)
plt.savefig("plots\\count_histogram_measurement.pdf")
plt.show()
plt.close()

In [None]:
from scipy.stats import poisson, chisquare
from scipy.stats import kstest

data = np.log(processed_measurements["count"].to_numpy() + 1)
data = data - data.min()

for dist in [poisson]:
    params = dist.fit(data)
    ks_stat, p_val = kstest(data, dist.name, args=params)
    print(f"{dist.name}: KS={ks_stat:.4f}, p={p_val:.4f}, params {params}")

In [None]:
from scipy.stats import chisquare
import numpy as np

hist_obs, _ = np.histogram(counts, bins=np.arange(0, 50))
expected_freq = poisson.pmf(np.arange(len(hist_obs)), mu=np.mean(counts)) * np.sum(hist_obs)
chi_stat, p_val = chisquare(hist_obs, expected_freq)
print(f"Chi² = {chi_stat:.2f}, p = {p_val:.4f}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(111)

res = stats.probplot(data, dist="poisson", sparams=params, plot=plt)

ax.get_lines()[0].set_visible(False)

ax.get_lines()[1].set_color('black')
ax.get_lines()[1].set_linestyle('--')
ax.get_lines()[1].set_linewidth(2)

num_points = len(res[0][0])
theoretical_q = res[0][0]
empirical_q = res[0][1]

ax.plot(
    theoretical_q,
    empirical_q,
    label="Zählwerte"
)

plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.yticks([0, 5, 10, 15])
plt.xticks([0, 5, 10, 15])

plt.xlabel("Theoretische Quantile", size=14)
plt.ylabel("Empirische Quantile", size=14)
plt.grid(True, alpha=0.2)

plt.savefig("plots\\qq_plot.pdf")
plt.show()

In [None]:
dates = mpi.API().unique_dates()
measurement = mpi.API().measurement(dates)

In [None]:
print(len(dates))
measurement.describe().apply(lambda s: s.apply("{0:.5f}".format))

In [None]:
data = measurement.groupby("datetime")["energy"].max().reset_index()
data["energy"] = data["energy"].round(0).astype(int)
data = data.groupby("energy").count().reset_index()
data["percent"] = data["datetime"] / data["datetime"].sum() * 100

fig = plt.figure(figsize=(10, 5))
ax = sns.barplot(
    data,
    x="energy",
    y="datetime",
    color="grey",
    linewidth=1.5,
    edgecolor=".5",
    facecolor=(0, 0, 0, 0),
)
labels = [
    f"{c / 1e0:.0f}\n({p:.2f}\%)"
    for c, p in zip(data["datetime"], data["percent"])
]
ax.bar_label(ax.containers[0], labels=labels, fontsize=12)
plt.xlabel("Max(Energie [keV])", size=14)
plt.ylabel("Log(Anzahl)", size=14)
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.grid(axis="y", alpha=0.2)
plt.yscale("log")
plt.ylim(1, 10000)
plt.savefig("plots\\max_energy.pdf")
plt.show()
plt.close()

In [None]:
measurement_diffs = (
    measurement.sort_values(by="energy").groupby("datetime").diff().dropna()
)
measurement_diffs["energy"] = measurement_diffs["energy"].round(2)
measurement_diffs = measurement_diffs.join(
    measurement, lsuffix="_diffs", rsuffix="_raw", how="left"
)
diffs = measurement_diffs.groupby("energy_diffs").count().reset_index()
diffs["percent"] = diffs["count_diffs"] / diffs["count_diffs"].sum() * 100
measurement_diffs.describe().apply(lambda s: s.apply("{0:.5f}".format))

In [None]:
mean = measurement_diffs["energy_diffs"].mean()
fig = plt.figure(figsize=(10, 5))
ax = sns.barplot(
    diffs,
    x="energy_diffs",
    y="count_diffs",
    color="grey",
    linewidth=1.5,
    edgecolor=".5",
    facecolor=(0, 0, 0, 0),
)
labels = [
    f"{c / 1e3:.1f}K\n({p:.2f}\%)"
    for c, p in zip(diffs["count_diffs"], diffs["percent"])
]
ax.bar_label(ax.containers[0], labels=labels, fontsize=12)
plt.vlines(x=mean + 2.15, ymin=1, ymax=100000000, color="black", linestyle="dotted", label="$\mu=0.34507$")
plt.xlabel("Energiedifferenz [keV]", size=14)
plt.ylabel("Log(Anzahl)", size=14)
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.grid(axis="y", alpha=0.2)
plt.legend()
plt.yscale("log")
plt.ylim(1, 100000000)
plt.savefig("plots\\energy_diffs.pdf")
plt.show()
plt.close()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

sample1 = np.log(measurement["count"].to_numpy() + 1).reshape(-1, 1)
sample2 = np.log(synthetics["count"].to_numpy() + 1).reshape(-1, 1)
scaler1 = MinMaxScaler(feature_range=(0.01, 1.0))  # min nicht 0, da Pareto >0 sein muss
scaler2 = MinMaxScaler(feature_range=(0.01, 1.0))

sample1_scaled = scaler1.fit_transform(sample1).flatten()
sample2_scaled = scaler2.fit_transform(sample2).flatten()

df = pd.DataFrame({
    "log_count": np.concatenate([sample1_scaled, sample2_scaled]),
    "source": ["processed_measurement"] * len(sample1_scaled) + ["synthetics"] * len(sample2_scaled)
})

sns.displot(df, x="log_count", hue="source", kind="kde", fill=True, common_norm=False)
plt.title("Log-Transformed Count Distributions")
plt.xlabel("log(count + 1)")
plt.ylabel("Density")
plt.show()

import numpy as np
from scipy import stats

print(stats.ks_2samp(sample1_scaled, sample2_scaled))

from scipy.stats import chisquare

counts1, bins = np.histogram(sample1_scaled, bins=400)
counts2, _ = np.histogram(sample2_scaled, bins=bins)

print(counts1, counts2)
chi2_stat, p_value = chisquare(counts1, f_exp=counts2)
print(chi2_stat)
print(p_value)



In [None]:
from scipy.stats import pareto
import numpy as np

sample1 = np.log(measurement["count"].to_numpy() + 1).reshape(-1, 1)
sample2 = np.log(synthetics["count"].to_numpy() + 1).reshape(-1, 1)

scaler1 = MinMaxScaler(feature_range=(0.01, 1.0))  # min nicht 0, da Pareto >0 sein muss
scaler2 = MinMaxScaler(feature_range=(0.01, 1.0))

sample1_scaled = scaler1.fit_transform(sample1).flatten()
sample2_scaled = scaler2.fit_transform(sample2).flatten()

params1 = pareto.fit(sample1_scaled, floc=0)
print(min(sample1_scaled), max(sample1_scaled))
params2 = pareto.fit(sample2_scaled, floc=0)
print(min(sample2_scaled), max(sample2_scaled))

print(f"Measurements Pareto params: shape={params1[0]:.3f}, loc={params1[1]:.3f}, scale={params1[2]:.3f}")
print(f"Synthetic Pareto params: shape={params2[0]:.3f}, loc={params2[1]:.3f}, scale={params2[2]:.3f}")

In [None]:
x = np.linspace(0, max(np.max(sample1_scaled), np.max(sample2_scaled)), 1000)

pdf1 = pareto.pdf(x, *params1)
pdf2 = pareto.pdf(x, *params2)

print(stats.ks_2samp(pdf1, pdf2, alternative="two-sided"))

plt.figure(figsize=(10, 6))
plt.hist(sample1_scaled, bins=100, density=True, alpha=0.5, label="Sample 1 Histogram")
plt.hist(sample2_scaled, bins=100, density=True, alpha=0.5, label="Sample 2 Histogram")
plt.plot(x, pdf1, 'r-', lw=1, label='Sample 1 Pareto fit')
plt.plot(x, pdf2, 'b-', lw=1, label='Sample 2 Pareto fit')
plt.title("Pareto Fits und Histogramme der Samples")
plt.xlabel("Skalierte Werte")
plt.ylabel("Dichte")
plt.legend()
plt.show()

In [None]:
one_measurement = measurement.loc[measurement["datetime"].isin(dates[6:8])]

plt.figure(figsize=(7, 5))
sns.lineplot(one_measurement, x="energy", y="count", hue="datetime", alpha=0.7)

plt.ylim(0, 100)
plt.xlabel("Energy [keV]")
plt.ylabel("Zählwert")
plt.grid(alpha=0.5)
plt.savefig("plots\\measurement_example.pdf")
plt.show()
plt.close()

In [None]:
os.environ["AWS_ACCESS_KEY_ID"] = load_config()["minio"]["AWS_ACCESS_KEY_ID"]
os.environ["AWS_SECRET_ACCESS_KEY"] = load_config()["minio"]["AWS_SECRET_ACCESS_KEY"]
os.environ["MLFLOW_S3_ENDPOINT_URL"] = load_config()["minio"]["MLFLOW_S3_ENDPOINT_URL"]
model_name = "VAE_CPU"
model_version = "latest"
mlflow.set_tracking_uri(uri=load_config()["mlflow"]["uri"])


In [None]:
client = mlflow.tracking.MlflowClient(tracking_uri=load_config()["mlflow"]["uri"])
run_id = client.get_latest_versions("VAE_CPU")[0].run_id
run = client.get_run(run_id)
losses = client.get_metric_history(run_id, "training_loss")
losses = pd.DataFrame([{"step": m.step, "value": m.value, "timestamp": m.timestamp} for m in losses])
sns.lineplot(losses, x="timestamp", y="value")

In [None]:
isotope_per_pm = spi.API().view_isotope_per_pm()
isotope_per_ps = spi.API().view_isotope_per_ps()
# pm_isotopes_found = spi.API().view_pm_isotopes_found()
# ps_isotopes_found = spi.API().view_ps_isotopes_found()
# isotopes_greater_one = isotope_per_pm.loc[isotope_per_pm["row_count"] > 1]
# isotopes_equal_one = isotope_per_pm.loc[isotope_per_pm["row_count"] == 1]
# print(len(isotopes_greater_one), len(isotopes_equal_one))

In [None]:
isotope_per_pm = spi.API().view_isotope_per_re_pm()
isotope_per_ps = spi.API().view_isotope_per_re_ps()

In [None]:
# pm_isotopes_found = spi.API().view_pm_isotopes_found()
ps_isotopes_found = spi.API().view_ps_isotopes_found()

In [None]:
pm_isotopes_found_filtered = ps_isotopes_found.loc[ps_isotopes_found["identified_isotope"] != ""]
df = pm_isotopes_found_filtered.groupby("datetime")["identified_isotope"].unique().reset_index()

plt.rcParams['text.usetex'] = True
df["isotope_count"] = df["identified_isotope"].apply(len)
df = df.sort_values(by="isotope_count", ascending=True).reset_index(drop=True)

mlb = MultiLabelBinarizer()
encoded = pd.DataFrame(mlb.fit_transform(df["identified_isotope"]),
                       columns=mlb.classes_,
                       index=df.index)

encoded = encoded.set_index(encoded.sum(axis=1))
isotope_order = encoded.sum(axis=0).sort_values(ascending=True).index
encoded_sorted = encoded[isotope_order]
encoded_filtered = encoded_sorted.loc[encoded_sorted.index > 5]
isotope_counts = encoded_filtered.sum(axis=0)
isotope_counts

In [None]:
pm_isotopes_found_filtered = ps_isotopes_found.loc[ps_isotopes_found["identified_isotope"] != ""]
df = pm_isotopes_found_filtered.groupby("datetime")["identified_isotope"].unique().reset_index()

plt.rcParams['text.usetex'] = True
df["isotope_count"] = df["identified_isotope"].apply(len)
df = df.sort_values(by="isotope_count", ascending=True).reset_index(drop=True)

mlb = MultiLabelBinarizer()
encoded = pd.DataFrame(mlb.fit_transform(df["identified_isotope"]),
                       columns=mlb.classes_,
                       index=df.index)

encoded = encoded.set_index(encoded.sum(axis=1))
isotope_order = encoded.sum(axis=0).sort_values(ascending=True).index
encoded_sorted = encoded[isotope_order]
encoded_filtered = encoded_sorted.loc[encoded_sorted.index < 6]
isotope_counts = encoded_filtered.sum(axis=0)

block_sizes = encoded_filtered.index.value_counts().sort_index()

block_positions = np.concatenate(([0], np.cumsum(block_sizes.values)))

fig = plt.figure(figsize=(8, 5))
gs = fig.add_gridspec(3, 2, height_ratios=[1, 1, 4], width_ratios=[5, 1], hspace=0.02, wspace=0.02)

ax_dist = fig.add_subplot(gs[0, 0])

cmap = matplotlib.colormaps.get_cmap("Greys")
norm = mcolors.Normalize(vmin=0, vmax=max(block_sizes.index))

for i, (count, size) in enumerate(zip(block_sizes.index, block_sizes.values)):
    color = cmap(norm(count))
    ax_dist.bar(x=block_positions[i], height=size, width=size, align='edge', color=color)

ax_dist.set_xlim(0, block_positions[-1])
ax_dist.set_xticks((block_positions[:-1] + block_positions[1:]) / 2)
ax_dist.set_xticklabels(block_sizes.index)
ax_dist.set_ylabel("")
ax_dist.set_title("")
ax_dist.tick_params(axis='x', rotation=0)
for container in ax_dist.containers:
    if container.get_label() == "_container4":
        ax_dist.bar_label(container, fmt='{:,.0f}', label_type='edge', padding=5, fontweight='bold', rotation=-90)
    else:
        ax_dist.bar_label(container, fmt='{:,.0f}', label_type='edge', padding=5, fontweight='bold')


def format_isotope(isotope):
    match = re.match(r"([a-zA-Z]+)(\d+)", isotope)
    if match:
        element, mass = match.groups()
        return f"$^{{{mass}}}{element.capitalize()}$"
    else:
        return isotope


ax1 = fig.add_subplot(gs[1:, 0])
data_for_heatmap = encoded_filtered.T + encoded_filtered.T.columns
data_for_heatmap.index = data_for_heatmap.index.map(format_isotope)
sns.heatmap(data_for_heatmap, cmap="Greys", cbar=False, ax=ax1, vmin=0, linewidths=0.0)

ax1.set_xlabel("Anzahl der annotierten Nuklide pro synthetischem $\gamma$-Spektrum")
ax1.set_ylabel("Nuklide")

ax2 = fig.add_subplot(gs[1:, 1])
bars = sns.barplot(y=isotope_counts.index, x=isotope_counts.values,
                   ax=ax2, palette=sns.color_palette("Greys", n_colors=len(isotope_counts.index)),
                   hue=isotope_counts.values, legend=False)

ax2.set_xlabel("Anzahl der \n annotierten Nuklide", labelpad=15, rotation=0)
ax_dist.set_ylabel("Anzahl der\nsynthetischen\n$\gamma$-Spektren", labelpad=55, rotation=0, loc="bottom")
ax_dist.set_xlabel("")
ax2.set_ylabel("")
for container in bars.containers:
    ax2.bar_label(container, fmt='{:,.0f}', label_type='edge', padding=3, fontweight='bold')
group_labels = block_sizes.index
group_centers = (block_positions[:-1] + block_positions[1:]) / 2

ax1.set_xticks(group_centers[0:])
ax1.set_xticklabels(group_labels[0:])

ax_dist.set_xticks([])
ax_dist.set_yticks([])

ax2.set_yticks([])
ax2.set_xticks([])
ax1.tick_params(axis='x', labelrotation=0, bottom=True)

for ax in [ax1, ax2, ax_dist]:
    for spine in ax.spines.values():
        spine.set_visible(False)

plt.savefig("plots/distribution_of_found_nuclides_synthetics.pdf")
plt.show()

In [None]:
import src.nuclide.api as npi
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['text.usetex'] = True
nuclides = npi.API().all_nuclides()
unique_nuclides = nuclides.groupby("nuclide_id").median(numeric_only=True).reset_index()
data = unique_nuclides[["energy", "d_n", "d_z"]]
x = data["d_n"].to_numpy()
y = data["d_z"].to_numpy()
energy = data["energy"].to_numpy()
bins = 70
heatmap, xedges, yedges = np.histogram2d(x, y, bins=bins, weights=energy)
f, ax = plt.subplots(figsize=(3, 2))
sns.scatterplot(x=x, y=y, s=1, color=sns.color_palette("dark")[7], ax=ax, zorder=50, edgecolor='lightgrey')
pcm = ax.pcolormesh(xedges, yedges, heatmap.T, cmap="Greys", shading='flat', vmin=0, vmax=7500)
cbar = plt.colorbar(pcm, ax=ax, extend="max")
cbar.set_label("Erwarteter Energiewert in keV (Median)", size=14, labelpad=15)
ax.set_xlabel("Anzahl der Neutronen (N)", size=14, labelpad=15)
ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)
ax.set_xlim(0)
ax.set_xlim(0)
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.set_ylabel("Anzahl der Protonen (Z)", size=14, labelpad=15)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(axis='x', labelsize=12, bottom=True)
ax.tick_params(axis='y', labelsize=12, left=True)

plt.tight_layout()
plt.savefig("plots\\nuclide_map_estimated_energies.pdf")
plt.show()



In [None]:
data = mpi.API().measurement(["2017-08-30 09:45:35"])

In [None]:
import matplotlib.pyplot as plt
import src.measurements.api as mpi

plt.rcParams['text.usetex'] = True

data = mpi.API().measurement(["2017-08-30 09:45:35"])
data = data.loc[data["energy"] > 0]
fig, ax = plt.subplots(figsize=(8, 6))

x1 = data["energy"]
x2 = data["count"]

ax.plot(x1, x2, color="black")

axin = ax.inset_axes([0.5, 0.5, 0.4, 0.4])

axin.set_xlim(1165, 1180)
axin.set_ylim(0, 5000)

axin.plot(x1, x2, color="black")
axin.set_xticks([], minor=True)
axin.set_yticklabels(["", "", "2000", "", "4000", ""])
axin.set_xticklabels(["", "1170", "1175", ""])

ax.set_xlim(-10)
ax.set_ylim(-100)
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)

plt.annotate('$^{60}Co$ bei 1173 keV',
             ha='center', va='bottom',
             size='large',
             xytext=(800, 7000), xy=(1173, 4000), arrowprops={'facecolor': 'darkgrey'})

plt.annotate('$^{60}Co$ bei 1332 keV',
             ha='center', va='bottom',
             size='large',
             xytext=(1700, 7000), xy=(1332, 4000), arrowprops={'facecolor': 'darkgrey'})

plt.annotate('$^{137}Cs$ bei 661 keV',
             ha='center', va='bottom',
             size='large',
             xytext=(661, 5000), xy=(661, 2700), arrowprops={'facecolor': 'darkgrey'})

plt.annotate('Bremsstrahlung',
             size='large',
             ha='center', va='bottom',
             xytext=(600, 10000), xy=(180, 700), arrowprops={'facecolor': 'lightgrey'})

plt.annotate('$^{241}Am$ bei 59 keV',
             ha='center', va='bottom',
             size='large',
             xytext=(600, 18000), xy=(59, 15500), arrowprops={'facecolor': 'darkgrey'})

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

axin.spines['top'].set_visible(False)
axin.spines['right'].set_visible(False)
axin.set_title("$^{60}Co$ bei 1173 keV")

ax.indicate_inset_zoom(axin)

axin.grid(False)
plt.tick_params(axis='x', labelsize=12, bottom=True)
plt.tick_params(axis='y', labelsize=12, left=True)
plt.grid(False)
plt.xlabel("Energie in keV", size=14, labelpad=15)
plt.ylabel("Zählwert", size=14, labelpad=15)
plt.savefig("plots/example_for_gammaspectroscopy.pdf")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import math

mu = 0
variance = 1
sigma = math.sqrt(variance)
x = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma), color="black", linewidth=7.0)

plt.grid(False)
plt.savefig("plots/normal_dist.pdf", transparent=True)

In [None]:
import mlflow
import os
from config.loader import load_config

os.environ["AWS_ACCESS_KEY_ID"] = load_config()["minio"]["AWS_ACCESS_KEY_ID"]
os.environ["AWS_SECRET_ACCESS_KEY"] = load_config()["minio"]["AWS_SECRET_ACCESS_KEY"]
os.environ["MLFLOW_S3_ENDPOINT_URL"] = load_config()["minio"]["MLFLOW_S3_ENDPOINT_URL"]
model_uri = load_config()["mlflow"]["uri"]
model_name = "CNN_CPU"
model_version = "latest"
mlflow.set_tracking_uri(uri=model_uri)
model_cnn = mlflow.pytorch.load_model(f"models:/{model_name}/{model_version}").to("cpu")

In [None]:
from src.cnn.training import Training

training_obj = Training(use_processed_synthetics=False, use_processed_measuremnets=False)

In [None]:
validation_cnn_pm_loader = training_obj.validation_cnn_pm_loader

In [None]:
for i in validation_cnn_pm_loader:
    x_data = i[0][1].float().to("cpu").unsqueeze(0).unsqueeze(0)
    y_data = i[2][1].float().squeeze(1).to("cpu")

In [None]:
x_data = i[0][4].float().to("cpu").unsqueeze(0).unsqueeze(0)
y_data = i[2][4].float().squeeze(1).to("cpu")
datetime = i[1][4]

print(x_data[0][0])
print(datetime)
print(y_data)
