# DiSSECT Playground

In [None]:
# Import dependencies
from dissect.analysis.data_processing import get_curves, get_trait, find_outliers
import plotly.express as px
import pandas as pd

In [None]:
# Define your data source or use the default
SOURCE = 'https://dissect.crocs.fi.muni.cz/'

In [None]:
TRAIT = "a05"  # Select trait
# Define filtering query
QUERY = {
    # Curve filters
    "bits": 256,
    "cofactor": 1,
    "category": ["secg", "random"],
    # Trait params
    "l": [2, 3, 5]
}

In [None]:
# Load curves
curves = get_curves(SOURCE, QUERY)
curves

In [None]:
# Load trait results
trait = get_trait(SOURCE, TRAIT, QUERY)
trait

## Visualization

In [None]:
FEATURE = "full"  # Select feature for inspection
PARAM = "l"       # Select parameter for inspection

In [None]:
# Normalized count

for param_value in set(trait[PARAM]):
    px.histogram(
        trait[trait[PARAM] == param_value],
        x=FEATURE,
        color="standard",
        histfunc="count",
        histnorm="percent",
        barmode="group",
        title=f"Normalized count {TRAIT}['{FEATURE}']: {PARAM}={param_value}"
    ).show()

In [None]:
# Normalized bubbles

FEATURE_X = "least"
FEATURE_Y = "full"

for param_value in set(trait[PARAM]):
    value_counts = trait[trait[PARAM] == param_value][[FEATURE_X, FEATURE_Y, "standard"]].value_counts().to_frame("counts")
    value_counts.reset_index(level=(0, 1, 2), inplace=True)
    std = value_counts["standard"]
    value_counts.loc[std, "counts"] /= sum(value_counts.loc[std, "counts"])
    value_counts.loc[~std, "counts"] /= sum(value_counts.loc[~std, "counts"])
    px.scatter(
        value_counts,
        x=FEATURE_X,
        y=FEATURE_Y,
        size="counts",
        color="standard",
        title=f"Normalized bubbles {TRAIT}['{FEATURE_X}','{FEATURE_Y}']: {PARAM}={param_value}"
    ).show()

In [None]:
for param_value in set(trait[PARAM]):
    px.violin(trait[trait[PARAM] == param_value], y=FEATURE).show()

In [None]:
find_outliers(trait, ["least", "full"])