# Analysis of the CID pinging phase

  

In [None]:
## Import dependencies
import sqlalchemy as sa
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

## DB Credentials
HOST="localhost"
PORT="5433"
DB="hoarder_test"
USER="user"
PASSWD="password"

# Connecte with the DB
engine = sa.create_engine(f'postgresql://{USER}:{PASSWD}@{HOST}:{PORT}/{DB}')

## plotting style
fig_size= (7,4)
sns.set_context("talk", font_scale=1)


In [None]:
## get the median time of each fetch time

sql_query="""
    SELECT 
        cid_hash,
        ping_round, 
        fetch_time
    FROM fetch_results
    ORDER BY ping_round;
"""
ping_rounds = pd.read_sql_query(sql_query, engine)

avg_fetcht = ping_rounds.groupby("ping_round").mean()
hours_dist = avg_fetcht["fetch_time"].to_numpy()

hours_dist = (hours_dist - hours_dist[0]) / 3600
print(hours_dist)

In [None]:
## Get the active peers distribution per ping_round

sql_query = """
	SELECT 
		ping_round,
		conn_error,
		count(conn_error) as error_count
	FROM ping_results
	GROUP BY ping_round, conn_error
	ORDER BY ping_round, conn_error;
"""

error_dists = pd.read_sql_query(sql_query, engine)



# plot
pv_table = error_dists.pivot(index=["ping_round"], columns="conn_error", values="error_count")
pv_table = pv_table.fillna(0)
aux = pd.DataFrame(pv_table.to_records())

print(aux)

aux = aux.drop("ping_round", axis=1) 
## Make a boxplot with the distribution
aux.plot()
plt.xlabel("Time Since Publication (Hours)")
plt.ylabel("Error Count")
plt.legend(loc="center left", bbox_to_anchor=(1, 0, 0.5, 1), prop={'size': 16})
plt.show()




In [None]:
engine.dispose()