# Visualização dos Resultados

## Setup do Notebook

In [20]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
layout_settings = {
    "width": 720, 
    "height": 480,
    "font": dict(size=16),
    "margin": dict(l=10, r=10, t=40, b=10),
}

all_colors = px.colors.qualitative.Plotly

In [54]:
def get_rgba(hex, opacity=0.3):
    return "rgba(%d, %d, %d, %.2f)" % (tuple(int(hex.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)) + (opacity,))

def add_mean_std_trace(fig, df, name, idx, x_col, y_col):
    df_mean = df.groupby(df.index).mean()
    df_std = df.groupby(df.index).std()

    color = all_colors[idx % len(all_colors)]
    fig.add_trace(go.Scatter(
        name=name,
        x=df_mean[x_col],
        y=df_mean[y_col],
        mode="lines+markers", # lines+markers
        line=dict(color=color),
    ))
    fig.add_traces(go.Scatter(
        name=name + " upper",
        x=df_mean[x_col],
        y=df_mean[y_col] + df_std[y_col],
        mode="lines",
        line=dict(width=0),
        showlegend=False
    ))
    fig.add_traces(go.Scatter(
        name=name + " lower",
        x=df_mean[x_col],
        y=df_mean[y_col] - df_std[y_col],
        line=dict(width=0),
        mode='lines',
        fillcolor=get_rgba(color),
        fill='tonexty',
        showlegend=False,
    ))

def plot_line(runs, x_col, y_col, title, xaxis_title=None, yaxis_title=None):
    fig = go.Figure()
    for idx, (name, dfs) in enumerate(runs):
        df = pd.concat(dfs)
        add_mean_std_trace(fig, df, name, idx, x_col, y_col)

    fig.update_layout(title=title, xaxis_title=xaxis_title, yaxis_title=yaxis_title, **layout_settings)
    fig.show()

## Experimentos

In [63]:
experiments = {
    "inserts": {
        "Sondagem Linear": [
            pd.read_csv("outputs/run_inserts_linear.tsv", sep="\t")
        ],
        "Sondagem Quadrática": [
            pd.read_csv("outputs/run_inserts_quad.tsv", sep="\t")
        ]
    },
    "in_out_net": {
        "Sondagem Linear": [
            pd.read_csv("outputs/run_in_out_net_linear.tsv", sep="\t")
        ],
        "Sondagem Quadrática": [
            pd.read_csv("outputs/run_in_out_net_quad.tsv", sep="\t")
        ]
    },
    "in_max": {
        "Sondagem Linear": [
            pd.read_csv("outputs/run_in_max_linear.tsv", sep="\t")
        ],
        "Sondagem Quadrática": [
            pd.read_csv("outputs/run_in_max_quad.tsv", sep="\t")
        ]
    }
}

In [64]:
runs = experiments["in_max"].items()
plot_line(
    runs,
    "load_factor",
    "comparisons",
    title="Comparações por fator de carga",
    xaxis_title="Fator de carga",
    yaxis_title="Comparações",
)

In [67]:
runs = experiments["in_max"].items()
plot_line(
    runs,
    "load_factor",
    "collisions",
    title="Colisões por fator de carga",
    xaxis_title="Fator de carga",
    yaxis_title="Colisões",
)

In [65]:
runs = experiments["in_max"].items()
plot_line(
    runs,
    "load_factor",
    "groups_mean",
    title="Tamanho de agrupamentos por fator de carga",
    xaxis_title="Fator de carga",
    yaxis_title="Tamanho de agrupamentos",
)

In [69]:
runs = experiments["in_max"].items()
plot_line(
    runs,
    "load_factor",
    "groups_total",
    title="Número de agrupamentos por fator de carga",
    xaxis_title="Fator de carga",
    yaxis_title="Número de agrupamentos",
)

In [66]:
runs = experiments["in_max"].items()

# the max load_factor per run in a bar chart
stats = []
for idx, (name, dfs) in enumerate(runs):
    arr = np.array([df["load_factor"].max() for df in dfs])
    stats.append(
        {
            "Sondagem": name,
            "Fator de Carga Máximo": arr.mean(),
            "Desvio Padrão": arr.std(),
        }
    )

fig = px.bar(stats, x="Sondagem", y="Fator de Carga Máximo", error_y="Desvio Padrão",)
fig.update_layout(title="Fator de carga máximo por tipo de sondagem", **layout_settings)
fig.show()