# Analysis of A05 results

## Data loading & filtering

In [None]:
from dissect.utils.data_processing import load_curves, load_trait, filter_df
from dissect.visual.widgets import get_filtering_widgets, get_choices

trait_name = "a05"
curves = load_curves()
df_trait = load_trait(trait_name) # load all results for the given trait
df_trait = curves.merge(df_trait, "right", "curve") # join curve metadata to trait results
filtering_widgets = get_filtering_widgets(trait_name)

## Data inspection

In [None]:
df = filter_df(df_trait, get_choices(filtering_widgets))
df

In [None]:
df.agg(set) # display only distinct values

In [None]:
df.describe()

## Data visualization

In [None]:
from dissect.visual.visualization import normalized_barplot, normalized_bubbleplot

In [None]:
for column in ("least", "full"):
    normalized_barplot(df, column, title=f"Normalized barplot of {column}")

In [None]:
def normalized_bubbleplot(df, xfeature, yfeature, title="Normalized bubble plot", xlab=None, ylab=None):
    if not xlab:
        xlab = xfeature
    if not ylab:
        ylab = yfeature

    df = df[["simulated", xfeature, yfeature]]
    df = df.dropna(subset=(xfeature, yfeature))
    print(df[df["simulated"] == False])
    std = df[df["simulated"] == False]
    print(std)
    std = std.drop(["simulated"], axis=1)
    sim = df[df["simulated"] == True]
    print(sim)
    sim = sim.drop(["simulated"], axis=1)
    
    
    
    std_counts = std.value_counts()
    sim_counts = sim.value_counts()
#     print(std_counts)
#     print(sim_counts)
    std_positions = zip(*std_counts.index)
    sim_positions = zip(*sim_counts.index)

    std_area = 30 ** 2 * std_counts.values / sum(std_counts.values)
    sim_area = 30 ** 2 * sim_counts.values / sum(sim_counts.values)
    
#     for i in std_positions:
#         print(i)
#     for i in sim_positions:
#         print(i)
#     print(*std_positions)
    plt.figure(figsize=(10, 6))
    plt.scatter(*std_positions, s=std_area, alpha=0.5, label=f"Standard curves n={len(std)}")
    plt.scatter(*sim_positions, s=sim_area, alpha=0.5, label=f"Simulated curves n={len(sim)}")
    plt.legend()
    plt.title(title)
    plt.xlabel(xlab)
    plt.ylabel(ylab)
    plt.show()

In [None]:
for l in [2,3]:
    normalized_bubbleplot(df[df["l"] == l], f"least", f"full", title=f"Normalized bubble plot for l={l}")

## Playground

In [None]:
df
# df = df[["simulated", "least", "full"]]
# df = df.dropna(subset=("least", "full"))
# df[df["simulated"]==False]

In [None]:
df[df["l"] == 2]