In [None]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sys.path.insert(0, '../src')
from classifier_results_metrics import output_metrics_folds, output_metrics

In [None]:
def read_dataframe(path_name, filter=None):
    df_all = None
    thresholds = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    for i in range(4):
        file_names = glob.glob(f"{path_name}{i}/*.csv")
        file_name = [x for x in file_names if f"fold{i}" in x][0]
        # df = pr_auc_df(file_name, fold=i)
        for t in thresholds:
            df = output_metrics_folds(file_name, fold=i, label=f"threshold_{t}",
                                    metrics=['mcc', 'pr_auc'], threshold=t)
            if df_all is None:
                df_all = df
            else:
                df_all = pd.concat([df_all, df])
    if filter is not None:
        df_all = df_all.query(f"test_dir == '{filter}'")
    return df_all

def read_all_dataframes(path_names, filters):
    df_combined = None
    for path_name, filter in zip(path_names, filters):
        df = read_dataframe(path_name, filter)

        if df_combined is None:
            df_combined = df
        else:
            df_combined = pd.concat([df_combined, df])
    return df_combined

def read_dataframe_look_angle(path_name, filter=None):
    meta_data_file_path = "../data/original_dataset/results.csv"
    meta_data_df = pd.read_csv(meta_data_file_path)
    meta_data_df["uuid"] = [f"000{x}"[-4:] for x in meta_data_df["uuid"]]
    df_look_angle = None
    thresholds = [0.9]
    for la in [0.0, 90.0, 180.0, 270.0]:
        for i in range(4):
            fold_path = f"../data/test_set_fold{i}.txt"
            with open(fold_path, "r") as fin:
                uuids = [line.strip('\n') for line in fin.readlines()[1:]]
            look_angles = meta_data_df.query(f"uuid in {uuids}")["look_angle"].values
            
            file_names = glob.glob(f"{path_name}{i}/*.csv")
            file_name = [x for x in file_names if f"fold{i}" in x][0]
            # df = pr_auc_df(file_name, fold=i)
            for t in thresholds:
                df = output_metrics_folds_mask(file_name, fold=i, label=f"threshold_{t}",
                                            metrics=['mcc', 'pr_auc'], threshold=t,
                                            indexes=np.where(look_angles == la))
                df["look_angle"] = la
                if df_look_angle is None:
                    df_look_angle = df
                else:
                    df_look_angle = pd.concat([df_look_angle, df])
    if filter is not None:
        df_look_angle = df_look_angle.query(f"test_dir == '{filter}'")
    return df_look_angle

def read_all_dataframes_look_angle(path_names, filters):
    df_combined = None
    for path_name, filter in zip(path_names, filters):
        df = read_dataframe_look_angle(path_name, filter)

        if df_combined is None:
            df_combined = df
        else:
            df_combined = pd.concat([df_combined, df])
    return df_combined

def output_metrics_folds_mask(file_path, fold=None, label=None, metrics=['pr_auc'], threshold=0.6,
                              indexes=None):
    df = pd.read_csv(file_path,
            converters={"conf": lambda x: np.array(x.strip("[]").replace("'", "").split(", "))})
    if indexes is not None:
        df["conf"] = df.apply(lambda row: row["conf"][indexes] if row["iteration"] != -1 else row["conf"], axis=1)
    final_results = df[df["iteration"] == -1]
    df = df[df["iteration"] != -1]
    df = output_metrics(df, metrics=metrics, threshold=threshold).fillna(0)
    # df = df[["iteration", "band", "model", "test_dir", "pr_auc", "conf"]]
    if fold is not None:
        df.insert(len(df.columns), "fold", fold)
    if label is not None:
        df.insert(len(df.columns), "label", label)
    for col in ['tp', 'tn', 'fp', 'fn']:
        if col in df.columns:
            df.drop(col, axis=1, inplace=True)
    return df

In [None]:
path_names = ["../outputs_baseline_latent_rotation_0/fold", "../outputs_baseline_latent_rotation/fold"]
filters = ['rotation_0', 'rotation']
df_all = read_all_dataframes(path_names, filters)
df_all

In [None]:
df_look_angle = read_all_dataframes_look_angle(path_names, filters)
df_look_angle

In [None]:
f, a = plt.subplots(2, 1, figsize=(13, 2*8))
sns.barplot(data=df_all.query("model == 'unet' and test_dir == 'rotation_0'"),
            ax=a[0], x="test_band", y="mcc", hue="label")
sns.barplot(data=df_all.query("model == 'unet' and test_dir == 'rotation'"),
            ax=a[1], x="test_band", y="mcc", hue="label")
plt.show()

In [None]:
f, a = plt.subplots(2, 1, figsize=(13, 2*8))
sns.barplot(data=df_look_angle.query("model == 'unet' and test_dir == 'rotation_0'"),
            ax=a[0], x="test_band", y="mcc", hue="look_angle")
sns.barplot(data=df_look_angle.query("model == 'unet' and test_dir == 'rotation'"),
            ax=a[1], x="test_band", y="mcc", hue="look_angle")
plt.show()

In [None]:
df_look_angle.query("model == 'unet' and "
             "label == 'threshold_0.9' and " 
             "test_dir == 'rotation_0' and "
             "test_band == 'X'")

In [None]:
df_look_angle.query("model == 'unet' and "
             "label == 'threshold_0.9' and " 
             "test_dir == 'rotation' and "
             "test_band == 'X'")