In [None]:
import numpy as np
import matplotlib as mpl
import re
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
import random
from sklearn.preprocessing import MultiLabelBinarizer
import matplotlib
import matplotlib.colors as mcolors
import plotly.express as px
import pandas as pd
from scipy.signal import find_peaks

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import torch
from src.peaks.finder import PeakFinder

import mlflow
from config.loader import load_config
import os

import src.measurements.api as mpi
import src.generator.api as gpi
import src.peaks.api as ppi
import src.statistics.api as spi
plt.rcParams['text.usetex'] = True
from src.cnn.training import Training
import json
from src.cnn.dataset import MeasurementTraining

In [None]:
import mlflow
import os
from config.loader import load_config

os.environ["AWS_ACCESS_KEY_ID"] = load_config()["minio"]["AWS_ACCESS_KEY_ID"]
os.environ["AWS_SECRET_ACCESS_KEY"] = load_config()["minio"]["AWS_SECRET_ACCESS_KEY"]
os.environ["MLFLOW_S3_ENDPOINT_URL"] = load_config()["minio"]["MLFLOW_S3_ENDPOINT_URL"]
model_uri = load_config()["mlflow"]["uri"]
model_name = "CNN_CPU"
model_version = "latest"
mlflow.set_tracking_uri(uri=model_uri)
model = mlflow.pytorch.load_model(f"models:/{model_name}/{model_version}").to("cpu")

client = mlflow.tracking.MlflowClient(
    tracking_uri=load_config()["mlflow"]["uri"]
)
run_id = client.get_latest_versions("CNN_CPU")[0].run_id
run = client.get_run(run_id)
client.download_artifacts(run_id=run_id, path="artifacts.json", dst_path="tmp")
mlb_classes = run.data.params["mlb_classes"].split(",")

In [None]:
training_macro_loss = client.get_metric_history(run_id=run_id, key="training_macro_loss")
training_micro_loss = client.get_metric_history(run_id=run_id, key="training_micro_loss")
training_mac_loss = []
training_mic_loss = []
for i in range(len(training_macro_loss)):
    macro_loss = training_macro_loss[i].value
    training_mac_loss.append(macro_loss)
    micro_loss = training_micro_loss[i].value
    training_mic_loss.append(micro_loss)

data_mic_mac_loss = pd.DataFrame([training_mac_loss, training_mic_loss])
data_mic_mac_loss = data_mic_mac_loss.T.rename(columns={0:"training_macro_loss", 1:"training_micro_loss"})
data_mic_mac_loss["epoch"] = data_mic_mac_loss.index
data_mic_mac_loss

In [None]:
with open("tmp/artifacts.json") as f:
    artifacts = json.load(f)
# dict_keys(['used_keys', 'training_tpr', 'training_fpr', 'training_auc', 'validation_tpr', 'validation_fpr', 'validation_auc'])

data_tpr_fpr = pd.DataFrame()

for idx in range(len(artifacts["training_tpr"])):
    for nuclide in artifacts["training_tpr"][idx].keys():
        nuclide_df = pd.DataFrame(artifacts["training_tpr"][idx][nuclide], columns=["training_tpr"])
        nuclide_df["training_fpr"] = artifacts["training_fpr"][idx][nuclide]
        nuclide_df["nuclide"] = nuclide
        nuclide_df["epoch"] = idx
        data_tpr_fpr = pd.concat([data_tpr_fpr, nuclide_df], axis=0)
data_tpr_fpr = data_tpr_fpr.reset_index(drop=True)

data_auc = pd.DataFrame()

for idx in range(len(artifacts["training_auc"])):
    for nuclide in artifacts["training_auc"][idx].keys():
        nuclide_df = pd.DataFrame([artifacts["training_auc"][idx][nuclide]], columns=["training_auc"])
        nuclide_df["nuclide"] = nuclide
        nuclide_df["epoch"] = idx
        data_auc = pd.concat([data_auc, nuclide_df], axis=0)
data_auc = data_auc.reset_index(drop=True)

In [None]:
sns.relplot(data=data_auc, x="epoch", y="training_auc", hue="nuclide", kind="line")
plt.plot(data_mic_mac_loss["epoch"], data_mic_mac_loss["training_macro_loss"], color="black", label="training_macro_loss")
plt.legend()

In [None]:
sns.relplot(data_tpr_fpr, x="training_fpr", y="training_tpr", hue="epoch", col="nuclide", col_wrap=3, kind="line",
            drawstyle="steps-pre")

In [None]:
splitted_keys

In [None]:
import src.measurements.api as mpi
splitted_keys = mpi.API().re_splitted_keys()
validation_keys = splitted_keys.loc[splitted_keys["type"] == "cnn_validation"].reset_index(drop=True)["datetime"].tolist()
validation_measurements = ppi.API().re_measurement(validation_keys)

In [None]:
validation_cnn_pm = Training(use_processed_synthetics=bool(
                load_config()["cnn"]["use_processed_synthetics"]
            ),
            use_processed_measuremnets=bool(
                load_config()["cnn"]["use_processed_measurements"],
            ),
            use_re_processed_data=True).validation_cnn_pm

In [None]:
validation_measurements["identified_isotope"].unique()

In [None]:
item = validation_cnn_pm.__getitem__(1)

In [None]:
fitted_mlb = validation_cnn_pm.fitted_mlb
fitted_mlb.classes_

In [None]:
isos_ind = pd.DataFrame([validation_cnn_pm.labels_by_datetime]).T.reset_index()
import pandas as pd
import ast
import re

def count_ones(val):
    # Remove the 'array(' and the final ')'
    arr = val # turns string into actual list
    return sum(arr[0])  # count 1s in the inner list

isos_ind["ones_count"] = isos_ind[0].apply(count_ones)
# isos_ind.gropby("ones_count").count(numeric_only=True)
isos_ind.groupby("ones_count")["index"].count()

In [None]:
from torch.utils.data import DataLoader

validation_cnn_pm_loader = DataLoader(
validation_cnn_pm, batch_size=1, shuffle=True)

In [None]:
item = validation_cnn_pm.__getitem__(10)
test = item[0].float().to("cpu").unsqueeze(0).unsqueeze(0)
item

In [None]:
item[0]

In [None]:
validation_cnn_pm.__getitem__(i)

In [None]:
# mlb_classes
model.eval()
for i in range(10):
    item = validation_cnn_pm.__getitem__(i)
    test = item[0].float().to("cpu").unsqueeze(0).unsqueeze(0)
    output = model(test)
    # print(output)
    print(torch.sigmoid(output))
    # print(item[1])
    print(item[2])