In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
import plotly.express as px
from plotly.offline import init_notebook_mode

init_notebook_mode(connected=False)

In [None]:
import sys

sys.path.insert(0, "python")
from experiments import shorten_protocol_names_for_df

In [None]:
def filter_estimator(
    df: pd.DataFrame, value: str, col: str = "estimator"
) -> pd.DataFrame:
    return df[df[col] == value].copy()


def extract_results_for_visualization(df: pd.DataFrame) -> pd.DataFrame:
    return df.drop(["inverse_rank", "entropy"], axis=1).melt(
        value_vars=["hit_ratio", "ndcg", "message_spread_ratio"],
        var_name="metric",
        id_vars=[
            "graph_model",
            "protocol",
            "adversary_ratio",
            "adversary_type",
            "estimator",
            "broadcast_mode",
        ],
    )


def update_figure(fig, width=1300, height=1000):

    fig.update_layout(
        # TODO: it does not work in case of facet_col.. fix it!
        #    xaxis = dict(
        #        tickmode = 'linear',
        #        tick0 = 0.0,
        #        dtick = 0.05
        #    ),
        width=width,
        height=height,
    )
    fig.update_layout(
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0.0)
    )

# 1. Results with passive adversary

In [None]:
passive_random_df = pd.read_csv("scripts/random_reg_1000_with_dandelions.csv")
passive_random_df["graph_model"] = "random_regular"
passive_random_df["adversary_type"] = "passive"
passive_random_df.shape

In [None]:
passive_goerli_df = pd.read_csv("scripts/goerli_with_dandelions.csv")
passive_goerli_df["graph_model"] = "goerli_testnet"
passive_goerli_df["adversary_type"] = "passive"
passive_goerli_df.shape

#### Shorten protocol names before visualization

In [None]:
passive_random_df = shorten_protocol_names_for_df(passive_random_df)
passive_goerli_df = shorten_protocol_names_for_df(passive_goerli_df)

## i.) Estimator comparison: first reach vs. first sent

- random regular graph with 1000 nodes and 50 degree
- 5% of all nodes send messages (~ number of messages)

In [None]:
passive_random_results = extract_results_for_visualization(passive_random_df)

In [None]:
passive_random_results.head()

### Observations

- Adversary is more efficient with higher adversary ratio - **OK**
- Adversary can better deanonymize simple Broadcast than Dandelion(++) - **OK**
- Dandelion and Dandelion++ has comparable results with the same broadcast probability - **OK**
- Average message spread ratio (fraction of nodes receiving each message) is 1.0 despite the protocol - **OK**

In [None]:
fig = px.box(
    passive_random_results,
    x="adversary_ratio",
    y="value",
    color="protocol",
    facet_col="estimator",
    facet_row="metric",
)
update_figure(fig)
fig.write_image("figures/passive_estimator_check.png")
fig.show()

## ii.) Graph model comparison: random regular vs. Goerli testnet

- Goerli testnet has approximately 1,5K nodes and 20K edges
- 5% of all nodes send messages (~ number of messages)

In [None]:
passive_goerli_results = extract_results_for_visualization(passive_goerli_df)

In [None]:
passive_first_sent_results = pd.concat(
    [
        passive_random_results[passive_random_results["estimator"] == "first_sent"],
        passive_goerli_results[passive_goerli_results["estimator"] == "first_sent"],
    ]
)

### Observations (only first sent estimator results are shown!)

- Adversary ratio and protocol parameters trend are similar - **OK**
- In general, adversary is less effective for the Goerli testnet than for random regular graphs - **Interesting**
- Dandelion results have smaller deviation (because we use the same network structure 10 times) - **OK**
- Average message spread ratio (fraction of nodes receiving each message) is 1.0 despite the protocol and graph model - **OK**

In [None]:
fig = px.box(
    passive_first_sent_results,
    x="adversary_ratio",
    y="value",
    color="protocol",
    facet_col="graph_model",
    facet_row="metric",
)
update_figure(fig)
fig.write_image("figures/graph_model_comparision.png")
fig.show()

## iii.) Broadcast to all nodes (instead of sqrt amount of them)

In [None]:
passive_random_all_bc_df = pd.read_csv(
    "scripts/random_reg_1000_with_dandelions_all_bc.csv"
)
passive_random_all_bc_df["graph_model"] = "random_regular"
passive_random_all_bc_df["adversary_type"] = "passive"
passive_random_all_bc_df.shape

In [None]:
passive_goerli_all_bc_df = pd.read_csv("scripts/goerli_with_dandelions_all_bc.csv")
passive_goerli_all_bc_df["graph_model"] = "goerli_testnet"
passive_goerli_all_bc_df["adversary_type"] = "passive"
passive_goerli_all_bc_df.shape

#### Shorten protocol names before visualization

In [None]:
passive_random_all_bc_df = shorten_protocol_names_for_df(passive_random_all_bc_df)
passive_goerli_all_bc_df = shorten_protocol_names_for_df(passive_goerli_all_bc_df)

In [None]:
passive_random_all_bc_results = extract_results_for_visualization(
    passive_random_all_bc_df
)
passive_goerli_all_bc_results = extract_results_for_visualization(
    passive_goerli_all_bc_df
)

In [None]:
passive_bc_first_sent_results = pd.concat(
    [
        passive_random_results[passive_random_results["estimator"] == "first_sent"],
        passive_goerli_results[passive_goerli_results["estimator"] == "first_sent"],
        passive_random_all_bc_results[
            passive_random_all_bc_results["estimator"] == "first_sent"
        ],
        passive_goerli_all_bc_results[
            passive_goerli_all_bc_results["estimator"] == "first_sent"
        ],
    ]
)

### Observations (only first sent estimator results are shown!)

- Adversary can better deanonymize if message is sent to all neighbors - **OK**

In [None]:
fig = px.box(
    passive_bc_first_sent_results[
        passive_bc_first_sent_results["metric"] == "hit_ratio"
    ],
    x="adversary_ratio",
    y="value",
    color="protocol",
    facet_col="broadcast_mode",
    facet_row="graph_model",
    labels={"value": "hit_ratio"},
)
update_figure(fig, 1200, 500)
fig.write_image("figures/broadcast_mode_hit_ratio.png")
fig.show()

- message spread is less in case of broadcasting message to all neighbors - **WHY?**

In [None]:
fig = px.box(
    passive_bc_first_sent_results[
        passive_bc_first_sent_results["metric"] == "message_spread_ratio"
    ],
    x="adversary_ratio",
    y="value",
    color="protocol",
    facet_col="broadcast_mode",
    facet_row="graph_model",
    labels={"value": "message_spread_ratio"},
)
update_figure(fig, 1200, 500)
fig.write_image("figures/broadcast_mode_message_spread.png")
fig.show()

# 2. Active vs. passive adversary

In [None]:
active_random_df = pd.read_csv(
    "scripts/random_reg_1000_with_dandelions_active_adversary.csv"
)
active_random_df["graph_model"] = "random_regular"
active_random_df["adversary_type"] = "active"
active_random_df.shape

In [None]:
active_goerli_df = pd.read_csv("scripts/goerli_with_dandelions_active_adversary.csv")
active_goerli_df["graph_model"] = "goerli_testnet"
active_goerli_df["adversary_type"] = "active"
active_goerli_df.shape

#### Shorten protocol names before visualization

In [None]:
active_random_df = shorten_protocol_names_for_df(active_random_df)
active_goerli_df = shorten_protocol_names_for_df(active_goerli_df)

In [None]:
active_random_results = extract_results_for_visualization(active_random_df)
active_goerli_results = extract_results_for_visualization(active_goerli_df)

In [None]:
passive_active_results = pd.concat(
    [
        passive_random_results[passive_random_results["estimator"] == "first_sent"],
        passive_goerli_results[passive_goerli_results["estimator"] == "first_sent"],
        active_random_results[active_random_results["estimator"] == "first_sent"],
        active_goerli_results[active_goerli_results["estimator"] == "first_sent"],
    ]
)

### Observations

- Active adversary effect on message spread behaves as expected - **OK**

In [None]:
fig = px.box(
    passive_active_results[passive_active_results["metric"] == "message_spread_ratio"],
    x="adversary_ratio",
    y="value",
    color="protocol",
    facet_col="adversary_type",
    facet_row="graph_model",
    labels={"value": "message_spread_ratio"},
)
update_figure(fig, 1200, 500)
fig.write_image("figures/passive_vs_active_adversary_message_spread.png")
fig.show()

- **BUT** How can performance be lower for active adversary? **Only adversary nodes can stop message propagation so they receive the same information, right? then how?**
   - actually results are comparable (probably the difference is not significant)

In [None]:
fig = px.box(
    passive_active_results[passive_active_results["metric"] == "ndcg"],
    x="adversary_ratio",
    y="value",
    color="protocol",
    facet_col="adversary_type",
    facet_row="graph_model",
    labels={"value": "ndcg"},
)
update_figure(fig, 1200, 500)
fig.write_image("figures/passive_vs_active_adversary_ndcg.png")
fig.show()

In [None]:
fig = px.box(
    passive_active_results[passive_active_results["metric"] == "hit_ratio"],
    x="adversary_ratio",
    y="value",
    color="protocol",
    facet_col="adversary_type",
    facet_row="graph_model",
    labels={"value": "hit_ratio"},
)
update_figure(fig, 1200, 500)
fig.write_image("figures/passive_vs_active_adversary_hit_ratio.png")
fig.show()

### TODO: experiment with random regular degree
### TODO: experiment with weighted nodes: stake (for message source selection)
### TODO: experiment with weighted nodes: centrality (for adversary node selection)
### TODO: design experiment for contact time quantiles