# Latency CDF

In [None]:
import pandas as pd
import math
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pycountry
import os

def country_name(country_iso):
    return pycountry.countries.get(alpha_2=country_iso).name

def filter_for(df, name, value):
    return df.loc[df[name] == value]

def filter_for_range(df, name, lower_bound, upper_bound):
    return df[(df[name] >= lower_bound) & (df[name] < upper_bound)]

In [None]:
tls_data_file = "./parquet/tls_data.parquet"
probes_data_file = "./parquet/ripe_atlas_probe_data.parquet"

tls_data = pd.read_parquet(tls_data_file)
probes = pd.read_parquet(probes_data_file).set_index('id')
df = tls_data.join(probes, on='prb_id')

df

In [None]:
results_dir = './cdf'

start2022 = datetime(2022, 1, 1).timestamp()
start2023 = datetime(2023, 1, 1).timestamp()
start2024 = datetime(2024, 1, 1).timestamp()
future = datetime(2030, 1, 1).timestamp()

df2022 = filter_for_range(df, 'timestamp', start2022, start2023)
df2023 = filter_for_range(df, 'timestamp', start2023, start2024)
df2024 = filter_for_range(df, 'timestamp', start2024, future)

def per_country_cdf(unfiltered_df2022, unfiltered_df2023, unfiltered_df2024, c, title):
    _df2022 = filter_for(unfiltered_df2022, 'country', c)
    _df2023 = filter_for(unfiltered_df2023, 'country', c)
    _df2024 = filter_for(unfiltered_df2024, 'country', c)
    
    rt_columns = [[v for v in df['rt'].tolist() if v > 0] for df in [_df2022, _df2023, _df2024]]
    log_rts_l = [np.log(rt_column) for rt_column in rt_columns]

    if np.sum([len(l) for l in log_rts_l]) == 0: return
    
    fig, ax = plt.subplots()
    for idx, log_rts in enumerate(log_rts_l):
        if len(log_rts) == 0: continue        
        ax.ecdf(log_rts, label=str(2022 + idx))
    ax.legend()

    pred = 1200
    xticks_locations = [v for v in np.arange(2, np.max([np.max(l) for l in log_rts_l if len(l) > 0]), 0.5) if v < math.log(pred)]
    xticks_labels = [str(int(v)) for v in np.round(np.exp(xticks_locations), 0) if v < pred]

    plt.xticks(xticks_locations, xticks_labels)
    plt.yticks(np.arange(0, 1, 0.1))
    ax.tick_params(axis='both', which='major', labelsize=10)
    ax.spines[['right', 'top']].set_visible(False)
    plt.savefig(results_dir + "/cdf_" + title.lower().replace(" ", "_") + ".pdf")
    plt.show()
    plt.clf()

def per_country_histogram(unfiltered_df2022, unfiltered_df2023, unfiltered_df2024, c, title):
    _df2022 = filter_for(unfiltered_df2022, 'country', c)
    _df2023 = filter_for(unfiltered_df2023, 'country', c)
    _df2024 = filter_for(unfiltered_df2024, 'country', c)
    
    rt_columns = [[v for v in df['rt'].tolist() if v > 0] for df in [_df2022, _df2023, _df2024]]
    log_rts_l = [np.log(rt_column) for rt_column in rt_columns]

    if np.sum([len(l) for l in log_rts_l]) == 0: return
    
    fig, ax = plt.subplots()
    for idx, log_rts in enumerate(log_rts_l):
        if len(log_rts) == 0: continue
        counts, bins = np.histogram(log_rts, 150)
        ax.stairs(counts, bins, label=str(2022 + idx))
    ax.legend()

    pred = 1200
    xticks_locations = [v for v in np.arange(2, np.max([np.max(l) for l in log_rts_l if len(l) > 0]), 0.5) if v < math.log(pred)]
    xticks_labels = [str(int(v)) for v in np.round(np.exp(xticks_locations), 0) if v < pred]

    plt.xticks(xticks_locations, xticks_labels)
    ax.tick_params(axis='both', which='major', labelsize=10)
    ax.spines[['right', 'top']].set_visible(False)
    plt.savefig(results_dir + "/" + title.lower().replace(" ", "_") + ".pdf")
    plt.show()
    plt.clf()

if not os.path.isdir(results_dir): os.mkdir(results_dir)
for country in set(df['country'].tolist()):
    if type(country) is not str: continue
    
    per_country_histogram(df2022, df2023, df2024, country, 'Histogram of Latencies of Starlink Probes in ' + country_name(country))
    per_country_cdf(df2022, df2023, df2024, country, 'Latencies of Starlink Probes in ' + country_name(country))