In [None]:
import os
import pandas as pd
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

from plots import make_folder, SingleMetrics, CombinedMetrics
import plots

# Necessary folders to start
CSV_FOLDER = "./csvs/retrieval_3"
IMG_FOLDER = "./imgs/retrieval_3"

# make sure that the output folder exists
make_folder(IMG_FOLDER, "keeping track of the generated images")


In [None]:
# Read all the available csv files in the given folder
def read_files_with(target: str):
    files = []
    for dir, _, files in os.walk(CSV_FOLDER):
        for file in files:
            if target in file:
                files.append(dir+"/"+file)
            else:
                continue
    print(f"found {len(files)} with {target} files in {CSV_FOLDER}")


## Analysis of the data
#### Individual metrics

In [None]:
# Individual metrics
def print_avg_lookup(df):
    print(f"lookup_wallclock_time\t\t\t {df.lookup_wallclock_time.mean()}")
    print(f"attempted_nodes\t\t\t\t\t {df.attempted_nodes.mean()}")
    print(f"finished_connection_attempts\t {df.finished_connection_attempts.mean()}")
    print(f"successful_connections\t\t\t {df.successful_connections.mean()}")
    print(f"failed_connections\t\t\t\t {df.failed_connections.mean()}")
    print(f"total_discovered_nodes\t\t\t {df.total_discovered_nodes.mean()}")
    print(f"retrievable\t\t\t\t\t\t {df.retrievable.mean()}")
    print(f"accuracy\t\t\t\t\t\t {df.accuracy.mean()}")


# Display the sigle metrics of the test individually
files = read_files_with("retrieval_lookup_nn")
for file in files:
    df = pd.read_csv(file)
    print("\nmax simulated lookup delay")
    display(df.loc[df['lookup_aggregated_delay'].idxmax()])

    print("\nmin simulated lookup delay")
    display(df.loc[df['lookup_aggregated_delay'].idxmin()])

    print("\navg simulated lookup delay")
    print_avg_lookup(df)
    metrics = SingleMetrics(file, IMG_FOLDER, "Retrievals", {
        "retrievable": {
            "title_tag": "retriebable",
            "xlabel_tag": "retriebable",
            "ylabel_tag": "",
        },})

#### Aggregated accross samples

In [None]:
# aggregate metrics across runs
files = read_files_with("lookup_nn")
unifiedMetrics = CombinedMetrics(
    files=files, aggregator="fast_delay_range",
    operation="retrieval",
    filters=["y0.125", "cdr50-75"], output_image_folder=IMG_FOLDER,
    metrics={
        "retrievable": {
            "title_tag": "retriebable",
            "xlabel_tag": "retriebable",
            "ylabel_tag": "",
        },
    },
    legend=[
        plots.RETRIEVAL_NODES,
        plots.CONCURRENT_SAMPLES,
        plots.FAST_ERROR_RATE,
        plots.CONNECTION_DELAYS,
        plots.GAMMA,
    ])

In [None]:
# example to reproduce the network details
import seaborn as sns
import matplotlib.pyplot as plt


file = CSV_FOLDER+"/retrieval_lookup_network_nn12000_rn100_sampl100_fer10_ser0_cdr50-75_fdr50-100_sdr0_k20_a3_b20_y0.125_steps3.csv"

df = pd.read_csv(file)
data = df.groupby(["from", "to"]).count()
data = data.reset_index()
data = data.rename(columns={"Unnamed: 0": "total_connections"})
data = data.sort_values(by="total_connections", ascending=False)
pivoted_data = data.pivot(index="from", columns="to", values="total_connections").fillna(0)
display(pivoted_data)

# plot heatmap of connections
cmap = sns.cm.rocket_r

sns.set()
plt.show()
g = sns.heatmap(data=pivoted_data, xticklabels="to", yticklabels="from", cmap = cmap)


In [None]:
# example to reproduce the network details
import networkx as nx
import plotly.graph_objects as go


file = CSV_FOLDER+"/retrieval_lookup_network_nn12000_rn100_sampl100_fer10_ser0_cdr50-75_fdr50-100_sdr0_k20_a3_b20_y0.125_steps3.csv"

df = pd.read_csv(file)
df = df.groupby(["from", "to"]).size().reset_index(name="count")
top_interactions = df.sort_values('count', ascending=False).head(10000)  # top 10000 interactions
display(top_interactions)

G = nx.from_pandas_edgelist(top_interactions, 'to', 'from', ['count'])
pos = nx.spring_layout(G)

for node in G.nodes():
    G.nodes[node]['pos'] = list(pos[node])

edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

node_x = [pos[node][0] for node in G.nodes()]
node_y = [pos[node][1] for node in G.nodes()]

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='YlGnBu',
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))

node_adjacencies = []
node_text = []
for node in G.nodes():
    adjacencies = list(G.adj[node])  # List of nodes adjacent to the current node
    num_connections = len(adjacencies)

    node_adjacencies.append(num_connections)
    node_text.append(f'Node id: {node}<br># of connections: {num_connections}')

node_trace.marker.color = node_adjacencies
node_trace.text = node_text

fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='Network of Top Address Interactions',
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=0),
                    annotations=[dict(
                        text="Based on top interactions",
                        showarrow=False,
                        xref="paper", yref="paper",
                        x=0.005, y=-0.002)],
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )
fig.update_layout(title_text="DHT network's interactions")
fig.show()
