# Import libraries

In [1]:
from datetime import datetime
from Handler import *
from gandalf import gaNdalF
from sklearn.metrics import accuracy_score
from scipy.stats import binned_statistic, median_abs_deviation
import pandas as pd
import numpy as np
import seaborn as sns
import torch
import matplotlib.pyplot as plt
import sys
import yaml
import os
import joblib
%matplotlib inline
plt.rcParams["figure.figsize"] = (16, 9)

# Add path to sys.path

In [2]:
current_dir = os.getcwd()
sys.path.append(current_dir)
path = os.path.abspath(sys.path[-1])

# Load config file

In [3]:
if get_os() == "Mac":
    print("load mac config-file")
    config_file_name = "mac.cfg"
elif get_os() == "Windows":
    print("load windows config-file")
    config_file_name = "windows.cfg"
elif get_os() == "Linux":
    print("load linux config-file")
    config_file_name = "LMU.cfg"
else:
    print("load default config-file")
    config_file_name = "default.cfg"

with open(f"{path}/conf/{config_file_name}", 'r') as fp:
    cfg = yaml.load(fp, Loader=yaml.SafeLoader)

now = datetime.now()
cfg['RUN_DATE'] = now.strftime('%Y-%m-%d_%H-%M')

load mac config-file


# Set output paths and create folders

In [4]:
cfg['PATH_PLOTS_FOLDER'] = {}
cfg['PATH_OUTPUT'] = f"{cfg['PATH_OUTPUT']}/gandalf_run_{cfg['RUN_DATE']}"
cfg['PATH_PLOTS'] = f"{cfg['PATH_OUTPUT']}/{cfg['FOLDER_PLOTS']}"
cfg['PATH_CATALOGS'] = f"{cfg['PATH_OUTPUT']}/{cfg['FOLDER_CATALOGS']}"
if not os.path.exists(cfg['PATH_OUTPUT']):
    os.mkdir(cfg['PATH_OUTPUT'])
if not os.path.exists(cfg['PATH_PLOTS']):
    os.mkdir(cfg['PATH_PLOTS'])
if not os.path.exists(cfg['PATH_CATALOGS']):
    os.mkdir(cfg['PATH_CATALOGS'])
for plot in cfg['PLOTS_RUN']:
    cfg[f'PATH_PLOTS_FOLDER'][plot.upper()] = f"{cfg['PATH_PLOTS']}/{plot}"
    if not os.path.exists(cfg[f'PATH_PLOTS_FOLDER'][plot.upper()]):
        os.mkdir(cfg[f'PATH_PLOTS_FOLDER'][plot.upper()])

# Define Functions

In [5]:
def apply_cuts(cfg, data_frame):
    """"""
    data_frame = unsheared_object_cuts(data_frame=data_frame)
    data_frame = flag_cuts(data_frame=data_frame)
    data_frame = unsheared_shear_cuts(data_frame=data_frame)
    data_frame = binary_cut(data_frame=data_frame)
    # data_frame = mask_cut(data_frame=data_frame, master=f"{cfg['PATH_DATA']}/{cfg['FILENAME_MASTER_CAT']}")
    data_frame = unsheared_mag_cut(data_frame=data_frame)
    return data_frame

# Start gaNdalF

In [6]:
gandalf = gaNdalF(cfg=cfg)

Load /Volumes/elmichelangelo_external_ssd_1/Data/gandalf_data_odet_ncuts_ndef_rnan_8612535.pkl  data set
shape run dataset: (8612535, 125)
Sample -1 random data from run data set
Use yj_transformer_odet.joblib to transform data
Use maxabsscaler_odet_mag_yj.joblib to scale data


In [7]:
print(f"Length sample dataset: {len(gandalf.run_loader.dataset)}")

Length sample dataset: 8612535


In [8]:
for batch_idx, data in enumerate(gandalf.run_loader):
    tsr_input = data[0].double()
    arr_flow_true_output = data[1].numpy()
    arr_true_detected = data[2].numpy()
    arr_cut_cols = data[3].numpy()

In [9]:
df_balrog = pd.DataFrame(
    np.concatenate((tsr_input.numpy(), arr_flow_true_output, arr_true_detected, arr_cut_cols), axis=1),
    columns=cfg[f'INPUT_COLS_{gandalf.lum_type}_RUN'] +
            cfg[f'OUTPUT_COLS_{gandalf.lum_type}_RUN'] +
            cfg[f'OUTPUT_COLS_CLASSF_RUN'] +
            cfg[f'CUT_COLS_RUN']
)

In [10]:
if cfg['CLASSF_GALAXIES'] is True:
    with torch.no_grad():
        arr_classf_gandalf_output = gandalf.gandalf_classifier(tsr_input).squeeze().numpy()
    arr_gandalf_prob_calib = gandalf.predict_calibrated(arr_classf_gandalf_output)
    arr_gandalf_detected_calib = arr_gandalf_prob_calib > np.random.rand(cfg['NUMBER_SAMPLES'])
    validation_accuracy = accuracy_score(arr_true_detected, arr_gandalf_detected_calib)
    gandalf_detected = np.count_nonzero(arr_gandalf_detected_calib)
    gandalf_not_detected = arr_gandalf_detected_calib.size - gandalf_detected
    balrog_detected = np.count_nonzero(arr_true_detected)
    balrog_not_detected = arr_true_detected.size - balrog_detected

    arr_masked_input = tsr_input.numpy()[arr_gandalf_detected_calib]
    arr_masked_output = arr_flow_true_output[arr_gandalf_detected_calib]
    arr_masked_cut_cols = arr_cut_cols[arr_gandalf_detected_calib]
    df_balrog_all = df_balrog[cfg[f'INPUT_COLS_{gandalf.lum_type}_RUN'] +
                              cfg[f'OUTPUT_COLS_{gandalf.lum_type}_RUN']]
    df_gandalf_all = pd.DataFrame(np.concatenate((tsr_input.numpy(), arr_flow_true_output), axis=1),
                                  columns=cfg[f'INPUT_COLS_{gandalf.lum_type}_RUN'] +
                                          cfg[f'OUTPUT_COLS_{gandalf.lum_type}_RUN'])
    # df_balrog_true = df_balrog[arr_true_detected.astype(bool)]
    # df_balrog_true = df_balrog_true[self.cfg[f'INPUT_COLS_{self.lum_type}_RUN'] + self.cfg[f'OUTPUT_COLS_{self.lum_type}_RUN']]
    df_balrog_all.reset_index(drop=True, inplace=True)
    df_gandalf_all.reset_index(drop=True, inplace=True)

    df_balrog = df_balrog[arr_gandalf_detected_calib]
    df_balrog = df_balrog[cfg[f'INPUT_COLS_{gandalf.lum_type}_RUN'] + cfg[f'OUTPUT_COLS_{gandalf.lum_type}_RUN']]
    df_balrog.reset_index(drop=True, inplace=True)

    df_flow_input = pd.DataFrame(
        np.concatenate((arr_masked_input, arr_masked_output), axis=1),
        columns=cfg[f'INPUT_COLS_{gandalf.lum_type}_RUN'] + cfg[f'OUTPUT_COLS_{gandalf.lum_type}_RUN']
    )

    if cfg['APPLY_SCALER_CLASSF_RUN'] is True:
        df_flow_input = gandalf.galaxies.inverse_scale_data(df_flow_input)
        df_balrog = gandalf.galaxies.inverse_scale_data(df_balrog)
        # df_balrog_true = self.galaxies.inverse_scale_data(df_balrog_true)
        df_balrog_all = gandalf.galaxies.inverse_scale_data(df_balrog_all)
        df_gandalf_all = gandalf.galaxies.inverse_scale_data(df_gandalf_all)

    if cfg['APPLY_YJ_TRANSFORM_CLASSF_RUN'] is True:
        if cfg['TRANSFORM_COLS_RUN'] is None:
            trans_col = df_flow_input.keys()
        else:
            trans_col = cfg['TRANSFORM_COLS_RUN']
        df_flow_input = gandalf.galaxies.yj_inverse_transform_data(
            data_frame=df_flow_input,
            columns=trans_col
        )
        df_balrog = gandalf.galaxies.yj_inverse_transform_data(
            data_frame=df_balrog,
            columns=trans_col
        )
        # df_balrog_true = self.galaxies.yj_inverse_transform_data(
        #     data_frame=df_balrog_true,
        #     columns=trans_col
        # )
        df_balrog_all = gandalf.galaxies.yj_inverse_transform_data(
            data_frame=df_balrog_all,
            columns=trans_col
        )
        df_gandalf_all = gandalf.galaxies.yj_inverse_transform_data(
            data_frame=df_gandalf_all,
            columns=trans_col
        )

    print(f"Accuracy sample: {validation_accuracy * 100.0:.2f}%")
    print(f"Number of NOT true_detected galaxies gandalf: {gandalf_not_detected} of {cfg['NUMBER_SAMPLES']}")
    print(f"Number of true_detected galaxies gandalf: {gandalf_detected} of {cfg['NUMBER_SAMPLES']}")
    print(f"Number of NOT true_detected galaxies balrog: {balrog_not_detected} of {cfg['NUMBER_SAMPLES']}")
    print(f"Number of true_detected galaxies balrog: {balrog_detected} of {cfg['NUMBER_SAMPLES']}")

    df_classf_plot = pd.DataFrame({
        "gandalf_detected": arr_gandalf_detected_calib.ravel(),
        "balrog_detected": arr_true_detected.ravel(),
    })

    df_balrog_all["true_detected"] = arr_true_detected
    df_gandalf_all["true_detected"] = arr_gandalf_detected_calib
    df_gandalf_all["probability"] = arr_gandalf_prob_calib

In [11]:
# if cfg['CLASSF_GALAXIES'] is True:
#     gandalf.plot_classf_data(df_classf_plot=df_classf_plot, df_balrog=df_balrog_all, df_gandalf=df_gandalf_all)
    
print("Start plotting classf data")
if cfg['CLASSF_GALAXIES'] is True:
    if cfg['PLOT_MATRIX_RUN'] is True:
        plot_confusion_matrix_gandalf(
            df_classf_plot=df_classf_plot,
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg['PATH_PLOTS_FOLDER'][f'CONFUSION_MATRIX']}/confusion_matrix.png",
            title=f"Confusion Matrix"
    )

Start plotting classf data


In [12]:
if cfg['CLASSF_GALAXIES'] is True:
    if cfg['PLOT_CALIBRATION_CURVE'] is True:
        plot_calibration_curve_gandalf(
            true_detected=df_balrog_all["true_detected"],
            probability=df_gandalf_all["probability"],
            n_bins=10,
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg['PATH_PLOTS_FOLDER'][f'CALIBRATION_CURVE']}/calibration_curve.png",
            title=f"Calibration Curve"
        )

In [13]:
# ROC und AUC
if cfg['CLASSF_GALAXIES'] is True:
    if cfg['PLOT_ROC_CURVE_RUN'] is True:
        plot_roc_curve_gandalf(
            data_frame=df_classf_plot,
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg['PATH_PLOTS_FOLDER'][f'ROC_CURVE']}/roc_curve.png",
            title=f"Receiver Operating Characteristic (ROC) Curve"
        )

In [14]:
if cfg['CLASSF_GALAXIES'] is True:
    if cfg['PLOT_CLASSF_HISTOGRAM'] is True:
        plot_classifier_histogram(
            df_balrog=df_balrog_all,
            df_gandalf=df_gandalf_all,
            columns=cfg["INPUT_COLS_MAG_RUN"],
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg['PATH_PLOTS_FOLDER'][f'CLASSF_HIST']}/All.png",
            title=f"All"
        )

In [15]:
if cfg['CLASSF_GALAXIES'] is True:
    if cfg['PLOT_CLASSF_HISTOGRAM'] is True:
        plot_classifier_histogram(
            df_balrog=df_balrog_all,
            df_gandalf=df_gandalf_all,
            columns=["BDF_MAG_DERED_CALIB_R", "BDF_MAG_DERED_CALIB_I", "BDF_MAG_DERED_CALIB_Z"],
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg['PATH_PLOTS_FOLDER'][f'CLASSF_HIST']}/magnitude.png",
            title=f"magnitude"
        )

In [16]:
if cfg['CLASSF_GALAXIES'] is True:
    if cfg['PLOT_CLASSF_HISTOGRAM'] is True:
        plot_classifier_histogram(
            df_balrog=df_balrog_all,
            df_gandalf=df_gandalf_all,
            columns=["BDF_MAG_ERR_DERED_CALIB_R", "BDF_MAG_ERR_DERED_CALIB_I", "BDF_MAG_ERR_DERED_CALIB_Z"],
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg['PATH_PLOTS_FOLDER'][f'CLASSF_HIST']}/magnitude_error.png",
            title=f"magnitude_error"
        )

In [17]:
if cfg['CLASSF_GALAXIES'] is True:
    if cfg['PLOT_CLASSF_HISTOGRAM'] is True:
        plot_classifier_histogram(
            df_balrog=df_balrog_all,
            df_gandalf=df_gandalf_all,
            columns=["Color BDF MAG U-G", "Color BDF MAG G-R", "Color BDF MAG R-I", "Color BDF MAG I-Z", "Color BDF MAG Z-J", "Color BDF MAG J-H", "Color BDF MAG H-K"],
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg['PATH_PLOTS_FOLDER'][f'CLASSF_HIST']}/Color.png",
            title=f"color"
        )

In [18]:
if cfg['CLASSF_GALAXIES'] is True:
    if cfg['PLOT_CLASSF_HISTOGRAM'] is True:
        plot_classifier_histogram(
            df_balrog=df_balrog_all,
            df_gandalf=df_gandalf_all,
            columns=["BDF_T", "BDF_G", "EBV_SFD98"],
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg['PATH_PLOTS_FOLDER'][f'CLASSF_HIST']}/shape_and_dust.png",
            title=f"shape_and_dust"
        )

In [19]:
if cfg['CLASSF_GALAXIES'] is True:
    if cfg['PLOT_CLASSF_HISTOGRAM'] is True:
        plot_classifier_histogram(
            df_balrog=df_balrog_all,
            df_gandalf=df_gandalf_all,
            columns=["FWHM_WMEAN_R", "FWHM_WMEAN_I", "FWHM_WMEAN_Z"],
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg['PATH_PLOTS_FOLDER'][f'CLASSF_HIST']}/FWHM.png",
            title=f"FWHM"
        )

In [20]:
if cfg['CLASSF_GALAXIES'] is True:
    if cfg['PLOT_CLASSF_HISTOGRAM'] is True:
        plot_classifier_histogram(
            df_balrog=df_balrog_all,
            df_gandalf=df_gandalf_all,
            columns=["AIRMASS_WMEAN_R", "AIRMASS_WMEAN_I", "AIRMASS_WMEAN_Z"],
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg['PATH_PLOTS_FOLDER'][f'CLASSF_HIST']}/AIRMASS.png",
            title=f"AIRMASS"
        )

In [21]:
if cfg['CLASSF_GALAXIES'] is True:
    if cfg['PLOT_CLASSF_HISTOGRAM'] is True:
        plot_classifier_histogram(
            df_balrog=df_balrog_all,
            df_gandalf=df_gandalf_all,
            columns=["MAGLIM_R", "MAGLIM_I", "MAGLIM_Z"],
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg['PATH_PLOTS_FOLDER'][f'CLASSF_HIST']}/MAGLIM.png",
            title=f"MAGLIM"
        )

In [22]:
if cfg['EMULATE_GALAXIES'] is True:
    if cfg['CLASSF_GALAXIES'] is True:
        if cfg['APPLY_YJ_TRANSFORM_FLOW_RUN'] is True:
            gandalf.galaxies.applied_yj_transform = "_YJ"
        else:
            gandalf.galaxies.applied_yj_transform = ""
        if cfg['APPLY_SCALER_FLOW_RUN'] is True:
            gandalf.galaxies.applied_yj_transform = "_YJ"
        else:
            gandalf.galaxies.applied_yj_transform = ""
        filename = cfg[f'FILENAME_SCALER_ODET_{gandalf.lum_type}{gandalf.galaxies.applied_yj_transform}']
        gandalf.galaxies.scaler = joblib.load(
            f"{cfg['PATH_TRANSFORMERS']}/{filename}"
        )
        gandalf.galaxies.name_scaler = filename

        gandalf.galaxies.dict_pt = joblib.load(
            f"{cfg['PATH_TRANSFORMERS']}/{cfg['FILENAME_YJ_TRANSFORMER_ODET']}"
        )
        gandalf.galaxies.name_yj_transformer = cfg['FILENAME_YJ_TRANSFORMER_ODET']

        if cfg['APPLY_YJ_TRANSFORM_FLOW_RUN'] is True:
            df_flow_input = gandalf.galaxies.yj_transform_data_on_fly(
                data_frame=df_flow_input,
                columns=df_flow_input.keys(),
                dict_pt=gandalf.galaxies.dict_pt
            )

        if cfg['APPLY_SCALER_FLOW_RUN'] is True:
            df_flow_input = gandalf.galaxies.scale_data_on_fly(
                data_frame=df_flow_input,
                scaler=gandalf.galaxies.scaler
            )

        tsr_masked_input = torch.from_numpy(
            df_flow_input[cfg[f'INPUT_COLS_{gandalf.lum_type}_RUN']].values).double()

        del df_flow_input

    else:
        tsr_masked_input = torch.from_numpy(
            df_balrog[cfg[f'INPUT_COLS_{gandalf.lum_type}_RUN']].values).double()
        arr_masked_output = df_balrog[cfg[f'OUTPUT_COLS_{gandalf.lum_type}_RUN']].values
        arr_masked_cut_cols = df_balrog[cfg[f'CUT_COLS_RUN']].values
        arr_gandalf_prob_calib = np.ones(len(df_balrog))
        arr_gandalf_detected_calib = np.array([True for _ in range(len(df_balrog))])
        df_balrog = df_balrog[cfg[f'INPUT_COLS_{gandalf.lum_type}_RUN']+cfg[f'OUTPUT_COLS_{gandalf.lum_type}_RUN']]
        

In [None]:
if cfg['EMULATE_GALAXIES'] is True:
    chunk_size = 10000
    tsr_masked_input_numpy = tsr_masked_input.numpy()
    chunks = [tsr_masked_input_numpy[i:i + chunk_size] for i in range(0, len(tsr_masked_input_numpy), chunk_size)]
    
    df_list = []
    for chunk in chunks:
        arr_flow_gandalf_output = gandalf.gandalf_flow.sample(len(chunk), cond_inputs=torch.from_numpy(chunk)).detach().numpy()
        df_chunk = pd.DataFrame(
            np.concatenate(
                (chunk, arr_flow_gandalf_output),
                axis=1
            ),
            columns=cfg[f'INPUT_COLS_{gandalf.lum_type}_RUN'] + cfg[f'OUTPUT_COLS_{gandalf.lum_type}_RUN']
        )
        df_list.append(df_chunk)
    
    df_gandalf = pd.concat(df_list, ignore_index=True)
    print(f"Length gandalf catalog: {len(df_gandalf)}")
    print(f"Number of NaNs in df_gandalf: {df_gandalf.isna().sum().sum()}")
    print(f"Number of NaNs in df_balrog: {df_balrog.isna().sum().sum()}")
    

In [None]:
if cfg['EMULATE_GALAXIES'] is True:
    if cfg['APPLY_SCALER_FLOW_RUN'] is True:
        print("apply scaler on df_gandalf")
        df_gandalf = gandalf.galaxies.inverse_scale_data(df_gandalf)
        df_balrog = gandalf.galaxies.inverse_scale_data(df_balrog)
    print(f"Number of NaNs in df_gandalf after scaler: {df_gandalf.isna().sum().sum()}")
    print(f"Number of NaNs in df_balrog after scaler: {df_balrog.isna().sum().sum()}")

In [None]:
if cfg['EMULATE_GALAXIES'] is True:
    if cfg['APPLY_YJ_TRANSFORM_FLOW_RUN'] is True:
        if cfg['TRANSFORM_COLS_RUN'] is None:
            trans_col = df_gandalf.keys()
        else:
            trans_col = cfg['TRANSFORM_COLS_RUN']
        print("apply yj transform on df_gandalf")
        df_gandalf = gandalf.galaxies.yj_inverse_transform_data(
            data_frame=df_gandalf,
            columns=trans_col
        )
        df_balrog = gandalf.galaxies.yj_inverse_transform_data(
            data_frame=df_balrog,
            columns=trans_col
        )
    print(f"Number of NaNs in df_gandalf after yj inverse transformation: {df_gandalf.isna().sum().sum()}")
    print(f"Number of NaNs in df_balrog after yj inverse transformation: {df_balrog.isna().sum().sum()}")

In [None]:
if cfg['EMULATE_GALAXIES'] is True:
    df_gandalf['true_detected'] = np.ones(len(df_gandalf))
    df_gandalf["probability"] = arr_gandalf_prob_calib[arr_gandalf_detected_calib]
    df_gandalf[cfg['CUT_COLS_RUN']] = arr_masked_cut_cols

    df_balrog['true_detected'] = arr_true_detected[arr_gandalf_detected_calib]
    df_balrog["probability"] = np.ones(len(df_balrog))
    df_balrog[cfg['CUT_COLS_RUN']] = arr_masked_cut_cols

    print(f"Length gandalf catalog: {len(df_gandalf)}")
    print(f"Length balrog catalog: {len(df_balrog)}")

In [None]:
if cfg['EMULATE_GALAXIES'] is True:
    if df_gandalf.isna().sum().sum() > 0:
        print("Warning: NaNs in df_gandalf_rescaled")
        print(f"Number of NaNs in df_gandalf: {df_gandalf.isna().sum().sum()}")
        df_gandalf.dropna(inplace=True)

    if df_balrog.isna().sum().sum() > 0:
        print("Warning: NaNs in df_gandalf_rescaled")
        print(f"Number of NaNs in df_gandalf: {df_balrog.isna().sum().sum()}")
        df_balrog.dropna(inplace=True)

In [None]:
print(f"Length gandalf catalog: {len(df_gandalf)}")
print(f"Length balrog catalog: {len(df_balrog)}")

In [None]:
for col in df_gandalf.keys():
    if "unsheared" in col:
        print(
            f"{col}: {df_gandalf[col].min()}/{df_balrog[col].min()}\t{df_gandalf[col].max()}/{df_balrog[col].max()}")

In [None]:
df_balrog["unsheared/mag_r"]

In [None]:
df_gandalf["unsheared/mag_r"]

In [None]:
df_gandalf["unsheared/flux_r"] = mag2flux(df_gandalf["unsheared/mag_r"])
df_balrog["unsheared/flux_r"] = mag2flux(df_balrog["unsheared/mag_r"])

In [None]:
df_balrog_cut = df_balrog.copy()
df_gandalf_cut = df_gandalf.copy()

df_balrog_cut = apply_cuts(cfg, df_balrog_cut)
df_gandalf_cut = apply_cuts(cfg, df_gandalf_cut)

print(f"Length gandalf catalog: {len(df_gandalf_cut)}")
print(f"Length balrog catalog: {len(df_balrog_cut)}")
print(f"Length gandalf catalog: {len(df_gandalf)}")
print(f"Length balrog catalog: {len(df_balrog)}")

In [None]:
mcal = 'mcal_'
if cfg['PLOT_COLOR_COLOR_RUN'] is True:
    df_gandalf = calc_color(
        data_frame=df_gandalf_cut,
        colors=cfg['COLORS_RUN'],
        column_name=f"unsheared/{gandalf.lum_type.lower()}"
    )
    df_balrog = calc_color(
        data_frame=df_balrog_cut,
        colors=cfg['COLORS_RUN'],
        column_name=f"unsheared/{gandalf.lum_type.lower()}"
    )

    try:
        plot_compare_corner(
            data_frame_generated=df_gandalf_cut,
            data_frame_true=df_balrog_cut,
            dict_delta=None,
            epoch=None,
            title=f"{mcal} color-color plot",
            columns=["r-i", "i-z"],
            labels=["r-i", "i-z"],
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg[f'PATH_PLOTS_FOLDER'][f'{mcal.upper()}COLOR_COLOR_PLOT']}/{mcal}color_color.png",
            ranges=[(-8, 8), (-8, 8)]
        )
    except Exception as e:
        print(e)

In [None]:
if cfg['PLOT_RESIDUAL_RUN'] is True:
    try:
        hist_figure, ((stat_ax1), (stat_ax2), (stat_ax3)) = plt.subplots(nrows=3, ncols=1, figsize=(12, 12))
        hist_figure.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.5)
        hist_figure.suptitle(r"residual", fontsize=16)

        lst_axis_res = [
            stat_ax1,
            stat_ax2,
            stat_ax3
        ]

        lst_xlim_res = [
            (-2.5, 2.5),
            (-2.5, 2.5),
            (-2.5, 2.5)
        ]

        df_hist_Balrog = pd.DataFrame({
            "dataset": ["Balrog" for _ in range(len(df_balrog[f"unsheared/mag_r"]))]
        })
        df_hist_generated = pd.DataFrame({
            "dataset": ["gaNdalF" for _ in range(len(df_gandalf[f"unsheared/mag_r"]))]
        })
        for band in cfg['BANDS_RUN']:
            df_hist_Balrog[f"BDF_MAG_DERED_CALIB - unsheared/mag {band}"] = \
                df_balrog[f"BDF_MAG_DERED_CALIB_{band.upper()}"] - df_balrog[f"unsheared/mag_{band}"]
            df_hist_generated[f"BDF_MAG_DERED_CALIB - unsheared/mag {band}"] = \
                df_balrog[f"BDF_MAG_DERED_CALIB_{band.upper()}"] - df_gandalf[f"unsheared/mag_{band}"]

        for idx, band in enumerate(cfg['BANDS_RUN']):
            sns.histplot(
                data=df_hist_Balrog,
                x=f"BDF_MAG_DERED_CALIB - unsheared/mag {band}",
                ax=lst_axis_res[idx],
                element="step",
                stat="density",
                color="dodgerblue",
                bins=50,
                label="Balrog"
            )
            sns.histplot(
                data=df_hist_generated,
                x=f"BDF_MAG_DERED_CALIB - unsheared/mag {band}",
                ax=lst_axis_res[idx],
                element="step",
                stat="density",
                color="darkorange",
                fill=False,
                bins=50,
                label="gaNdalF"
            )
            lst_axis_res[idx].axvline(
                x=df_hist_Balrog[f"BDF_MAG_DERED_CALIB - unsheared/mag {band}"].median(),
                color='dodgerblue',
                ls='--',
                lw=1.5,
                label="Mean Balrog"
            )
            lst_axis_res[idx].axvline(
                x=df_hist_generated[f"BDF_MAG_DERED_CALIB - unsheared/mag {band}"].median(),
                color='darkorange',
                ls='--',
                lw=1.5,
                label="Mean gaNdalF"
            )
            lst_axis_res[idx].set_xlim(lst_xlim_res[idx][0], lst_xlim_res[idx][1])
            if idx == 0:
                lst_axis_res[idx].legend()
            else:
                lst_axis_res[idx].legend([], [], frameon=False)
        hist_figure.tight_layout()
        if cfg['SAVE_PLOT_RUN'] is True:
            plt.savefig(f"{cfg['PATH_PLOTS_FOLDER'][f'{mcal.upper()}RESIDUAL_PLOT']}/{mcal}residual_plot.png")
        if cfg['SHOW_PLOT_RUN'] is True:
            plt.show()
        plt.clf()
        plt.close()
    except Exception as e:
        print(e)

In [None]:
if cfg['PLOT_CHAIN_RUN'] is True:
    try:
        plot_compare_corner(
            data_frame_generated=df_gandalf_cut,
            data_frame_true=df_balrog_cut,
            dict_delta=None,
            epoch=None,
            title=f"{mcal} chain plot",
            show_plot=cfg['SHOW_PLOT_RUN'],
            save_plot=cfg['SAVE_PLOT_RUN'],
            save_name=f"{cfg[f'PATH_PLOTS_FOLDER'][f'{mcal.upper()}CHAIN_PLOT']}/{mcal}chainplot.png",
            columns=[
                f"unsheared/{gandalf.lum_type.lower()}_r",
                f"unsheared/{gandalf.lum_type.lower()}_i",
                f"unsheared/{gandalf.lum_type.lower()}_z",
                "unsheared/snr",
                "unsheared/size_ratio",
                "unsheared/T",
                # "unsheared/weight",
            ],
            labels=[
                f"{gandalf.lum_type.lower()}_r",
                f"{gandalf.lum_type.lower()}_i",
                f"{gandalf.lum_type.lower()}_z",
                "snr",
                "size_ratio",
                "T",
                # "weight"
            ],
            ranges=[(17, 25), (17, 25), (17, 25), (-2, 300), (0, 6), (0, 3)]  # , (10, 80)
        )
    except Exception as e:
        print(e)

In [None]:
if cfg['PLOT_CONDITIONS'] is True:
    for condition in cfg['CONDITIONS']:
        try:
            cond_figure, (
                (stat_ax1), (stat_ax2), (stat_ax3)) = \
                plt.subplots(nrows=3, ncols=1, figsize=(12, 12))
            cond_figure.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.5)
            cond_figure.suptitle(f"BDF_MAG_DERED_CALIB - unsheared/mag", fontsize=16)

            outputs = ['unsheared/mag_' + b for b in cfg['BANDS_RUN']]
            true_outputs = ['BDF_MAG_DERED_CALIB_' + b.upper() for b in cfg['BANDS_RUN']]
            output_errs = ['unsheared/mag_err_' + b for b in cfg['BANDS_RUN']]

            lst_axis_con = [
                stat_ax1,
                stat_ax2,
                stat_ax3
            ]

            if df_balrog[condition].size == 0:
                print("df_balrog[condition] empty")
            else:
                cond_lims = np.percentile(df_balrog[condition], [2, 98])
                standard_levels = 10  # [0.393, 0.865, 0.989]

            for idx, out in enumerate(zip(outputs, output_errs, true_outputs)):
                output_ = out[0]
                output_err_ = out[1]
                true_output_ = out[2]

                diff_true = (df_balrog[true_output_] - df_balrog[output_]) / df_balrog[output_err_]
                df_conditional_true = pd.DataFrame({
                    condition: df_balrog[condition],
                    f"residual band {cfg['BANDS_RUN'][idx]}": diff_true,
                    "dataset": ["Balrog" for _ in range(len(df_balrog[condition]))]
                })
                bin_means_true, bin_edges_mean_true, binnumber_true = binned_statistic(
                    df_balrog[condition], diff_true, statistic='median', bins=10, range=cond_lims)
                bin_stds_true, bin_edges_true, binnumber_true = binned_statistic(
                    df_balrog[condition], diff_true, statistic=median_abs_deviation, bins=10, range=cond_lims)
                xerr_true = (bin_edges_mean_true[1:] - bin_edges_mean_true[:-1]) / 2
                xmean_true = (bin_edges_mean_true[1:] + bin_edges_mean_true[:-1]) / 2
                lst_axis_con[idx].errorbar(
                    xmean_true, bin_means_true, xerr=xerr_true, yerr=bin_stds_true, color='dodgerblue', lw=2,
                    label='Balrog')

                diff_generated = (df_gandalf[true_output_] - df_gandalf[output_]) / df_gandalf[
                    output_err_]
                df_conditional_generated = pd.DataFrame({
                    condition: df_gandalf[condition],
                    f"residual band {cfg['BANDS_RUN'][idx]}": diff_generated,
                    "dataset": ["gaNdalF" for _ in range(len(df_gandalf[condition]))]
                })
                bin_means_generated, bin_edges_mean_generated, binnumber_mean_generated = binned_statistic(
                    df_gandalf[condition], diff_generated, statistic='median', bins=10, range=cond_lims)
                bin_stds_generated, bin_edges_generated, binnumber_generated = binned_statistic(
                    df_gandalf[condition], diff_generated, statistic=median_abs_deviation, bins=10,
                    range=cond_lims)
                xerr_generated = (bin_edges_mean_generated[1:] - bin_edges_mean_generated[:-1]) / 2
                xmean_generated = (bin_edges_mean_generated[1:] + bin_edges_mean_generated[:-1]) / 2
                lst_axis_con[idx].errorbar(
                    xmean_generated, bin_means_generated, xerr=xerr_generated, yerr=bin_stds_generated,
                    color='darkorange', lw=2, label='gaNdalF')
                m, s = np.median(diff_generated), median_abs_deviation(diff_generated)
                range_ = [m - 4 * s, m + 4 * s]

                sns.kdeplot(
                    data=df_conditional_true,
                    x=condition,
                    y=f"residual band {cfg['BANDS_RUN'][idx]}",
                    fill=True,
                    thresh=0,
                    levels=standard_levels,  # 10
                    color="dodgerblue",
                    legend="Balrog",
                    ax=lst_axis_con[idx]
                )
                sns.kdeplot(
                    data=df_conditional_generated,
                    x=condition,
                    y=f"residual band {cfg['BANDS_RUN'][idx]}",
                    fill=False,
                    thresh=0,
                    levels=standard_levels,  # 10
                    alpha=.5,
                    color="darkorange",
                    legend="gaNdalF",
                    ax=lst_axis_con[idx]
                )
                lst_axis_con[idx].set_xlim(cond_lims)
                lst_axis_con[idx].set_ylim(range_)
                lst_axis_con[idx].axhline(np.median(diff_true), c='dodgerblue', ls='--', label='median Balrog')
                lst_axis_con[idx].axhline(0, c='grey', ls='--', label='zero')
                lst_axis_con[idx].axhline(np.median(diff_generated), c='darkorange', ls='--',
                                          label='median gaNdalF')
                lst_axis_con[idx].axvline(np.median(df_balrog[condition]), c='grey', ls='--',
                                          label='median conditional')
            lst_axis_con[0].legend()
            cond_figure.tight_layout()
            if cfg['SAVE_PLOT_RUN'] is True:
                plt.savefig(f"{cfg['PATH_PLOTS_FOLDER'][f'{mcal.upper()}CONDITIONS_PLOT']}/{mcal}{condition}_plot.png")
            if cfg['SHOW_PLOT_RUN'] is True:
                plt.show()
            plt.clf()
            plt.close()

        except ValueError:
            print(f"Value Error for {condition}")

In [None]:
if cfg['PLOT_HIST'] is True:
    try:
        hist_figure_2, ((hist_ax1), (hist_ax2), (hist_ax3)) = \
            plt.subplots(nrows=3, ncols=1, figsize=(12, 12))
        hist_figure_2.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.5)
        hist_figure_2.suptitle(r"magnitude histogram", fontsize=16)

        lst_axis_his = [
            hist_ax1,
            hist_ax2,
            hist_ax3
        ]

        for ax_idx, his_ax in enumerate(lst_axis_his):
            sns.histplot(
                data=df_balrog_cut,
                x=f"unsheared/mag_{cfg['BANDS_RUN'][ax_idx]}",
                ax=his_ax,
                element="step",
                stat="count",
                color="dodgerblue",
                fill=True,
                binwidth=0.2,
                log_scale=(False, True),
                label="balrog"
            )
            sns.histplot(
                data=df_gandalf_cut,
                x=f"unsheared/mag_{cfg['BANDS_RUN'][ax_idx]}",
                ax=his_ax,
                element="step",
                stat="count",
                color="darkorange",
                fill=False,
                log_scale=(False, True),
                binwidth=0.2,
                label="gaNdalF"
            )
            his_ax.axvline(
                x=df_balrog_cut[f"unsheared/mag_{cfg['BANDS_RUN'][ax_idx]}"].median(),
                color='dodgerblue',
                ls='--',
                lw=1.5,
                label="Mean Balrog"
            )
            his_ax.axvline(
                x=df_gandalf_cut[f"unsheared/mag_{cfg['BANDS_RUN'][ax_idx]}"].median(),
                color='darkorange',
                ls='--',
                lw=1.5,
                label="Mean gaNdalF"
            )
        plt.legend()
        if cfg['SAVE_PLOT_RUN'] == True:
            plt.savefig(f"{cfg['PATH_PLOTS_FOLDER'][f'{mcal.upper()}HIST_PLOT']}/{mcal}magnitude_histogram.png")
        if cfg['SHOW_PLOT_RUN'] == True:
            plt.show()
        plt.clf()
        plt.close()
    except Exception as e:
        print(e)

In [None]:
import pickle
df_gandalf_cut.rename(columns={"ID": "true_id"}, inplace=True)
with open(f"{cfg['PATH_CATALOGS']}/{cfg['FILENAME_GANDALF_CATALOG']}_{cfg['NUMBER_SAMPLES']}_b.pkl", "wb") as f:
    pickle.dump(df_gandalf_cut.to_dict(), f, protocol=2)

In [None]:
f"{cfg['PATH_CATALOGS']}/{cfg['FILENAME_GANDALF_CATALOG']}_{len(df_gandalf_cut)}_b.pkl"

In [None]:
len(df_gandalf_cut)