In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display
import scienceplots

# Configuration
TAG = '100_jobs'
FOLDER = f"../results/{TAG}/"
PROV_JOB = f"{FOLDER}{TAG}_provide_jobs_summary.csv"
RETRIEVE_JOB = f"{FOLDER}{TAG}_retrieval_jobs_summary.csv"
INDV_PROVS = f"{FOLDER}{TAG}_indv_provide_summary.csv"
INDV_RETRIEVALS = f"{FOLDER}{TAG}_indv_retrieval_summary.csv"

# plot style/configuration
plt.style.context(['science', 'ieee', 'std-colors'])

In [None]:
# plots
def plot_cdf(df=pd.DataFrame, column=str, hue=None, opts=dict, saveto=str, xlim=tuple):
    df = df.sort_values(column)
    g = sns.ecdfplot(data=df, x=column, hue=None)
    g.set(title=opts['t'], xlabel=opts['x'], ylabel=opts['y'])
    if saveto != "":
        plt.savefig(saveto)
    if xlim != ():
        plt.xlim(xlim[0], xlim[1])
    plt.show()

def plot_pdf(df=pd.DataFrame, column=str, hue=None, opts=dict, saveto=str, xlim=tuple):
    df = df.sort_values(column)
    g = sns.displot(data=df, x=column, kde = False, hue=None)
    g.set(title=opts['t'], xlabel=opts['x'], ylabel=opts['y'])
    if saveto != "":
        plt.savefig(saveto)
    if xlim != ():
        plt.xlim(xlim[0], xlim[1])
    plt.show()


In [None]:
# get the INDIVIDUAL retrievals
retrievals = pd.read_csv(INDV_RETRIEVALS)
display(retrievals)

# Retrieval times:
# cdf
plot_cdf(df=retrievals, column='retrieval_duration_ms', opts={
    't': 'Retrieval time CDF distribution',
    'x': 'Retrieval lookup time (ms)',
    'y': "Retrieval's CDF",
    }, saveto="", xlim=(0, 2_000))
# pdf
plot_pdf(df=retrievals, column='retrieval_duration_ms', opts={
    't': 'Retrieval time PDF distribution',
    'x': 'Retrieval lookup time (ms)',
    'y': "Retrievals",
    }, saveto="", xlim=(0, 2_000))

# hops
plot_cdf(df=retrievals, column='total_hops', opts={
    't': 'Retrieval hops CDF distribution',
    'x': 'Number of total hops',
    'y': "Hops's CDF",
    }, saveto="", xlim=(0, 100))

# pdf
plot_pdf(df=retrievals, column='total_hops', opts={
    't': 'Retrieval hops PDF distribution',
    'x': 'Number of total hops',
    'y': "Hops",
    }, saveto="", xlim=(0, 100))


In [None]:
# get the JOB Summary retrievals
retrievals = pd.read_csv(RETRIEVE_JOB)

# Retrieval times:
# cdf
plot_cdf(df=retrievals, column='duration_ms', opts={
    't': 'Retrieval time CDF distribution',
    'x': 'Retrieval lookup time (ms)',
    'y': "Retrieval's CDF",
    }, saveto="", xlim=(0, 3_000))
# pdf
plot_pdf(df=retrievals, column='duration_ms', opts={
    't': 'Retrieval time PDF distribution',
    'x': 'Retrieval lookup time (ms)',
    'y': "Retrievals",
    }, saveto="", xlim=(0, 3_000))

