In [None]:
import src.peaks.api as ppi
import seaborn as sns
import matplotlib.pyplot as plt
import src.vae.api as vpi
import random
import src.generator.api as gpi
import src.statistics.api as spi
import src.measurements.api as mpi

# dates = vpi.API().unique_dates()

In [None]:
synthetic_keys = vpi.API().unique_dates()

In [None]:
random.shuffle(synthetic_keys)
synthetics = gpi.API().synthetics(keys=synthetic_keys[0:1500])

In [None]:
dates = ppi.API().unique_dates()
meas = ppi.API().measurement(dates)

In [None]:
isotope_per_pm = spi.API().view_isotope_per_re_pm().sample(frac=1).reset_index(drop=True)

In [None]:
data = spi.API().view_re_pm_isotopes_found()

In [None]:
splitted_keys = mpi.API().re_splitted_keys()

In [None]:
filtered_data = data.set_index("datetime").join(splitted_keys.loc[
                                                    splitted_keys["type"] == "cnn_validation"
                                                    ].reset_index(drop=True).set_index("datetime"),
                                                how="right").reset_index()

In [None]:
filtered_data = filtered_data.loc[filtered_data["identified_isotope"] != ""].reset_index(drop=True)
filtered_data["datetime"].unique()

In [None]:
import re


def format_isotope(isotope):
    match = re.match(r"([a-zA-Z]+)(\d+)", isotope)
    if match:
        element, mass = match.groups()
        return f"$^{{{mass}}}{element.capitalize()}$"
    else:
        return isotope


filtered_data["identified_isotope"] = filtered_data["identified_isotope"].apply(format_isotope)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

plt.rcParams['text.usetex'] = True

counts = filtered_data['identified_isotope'].value_counts()
counts = counts.sort_values(ascending=True)

isotopes = counts.index.tolist()
values = counts.values.tolist()

fig, ax = plt.subplots(figsize=(8, 4))

max_value = 175

ax.bar(isotopes, [max_value] * len(values), color='lightgray', edgecolor='none', width=0.8, label='Nicht Annotiert')

ax.bar(isotopes, values, color='black', alpha=0.6, width=0.8, label='Annotiert')

ax.set_xlabel("Nuklid", size=14, labelpad=10)
ax.set_ylabel("Anzahl", size=14, labelpad=10)
ax.tick_params(axis='x', labelsize=12, bottom=True, pad=10, rotation=0)
ax.tick_params(axis='y', labelsize=12, left=True)
ax.grid(False)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

for x, val in zip(isotopes, values):
    if val > 0:
        ax.annotate(
            str(val),
            (x, val - 15),
            ha='center',
            va='bottom',
            fontsize=12,
            color='white',
        )

for x, val in zip(isotopes, values):
    if val > 0:
        ax.annotate(
            str(175 - val),
            (x, val + 3),
            ha='center',
            va='bottom',
            fontsize=12,
            color='black',
        )

ax.set_ylim(0, max_value)
ax.set_xlim(-0.5, 11)
ax.legend(loc='upper right', frameon=False, fontsize=12)

sns.move_legend(ax,
                loc="lower center",
                bbox_to_anchor=(0.5, 1.05),
                borderaxespad=0,
                ncol=2,
                frameon=False,
                fontsize=12,
                title=""
                )
plt.tight_layout()
plt.savefig("plots/dist_of_val.pdf", bbox_inches="tight")


In [None]:
used_training = isotope_per_pm.set_index("datetime").join(splitted_keys.set_index("datetime")).reset_index()