# Latency CDF

In [None]:
import pandas as pd
import math
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pycountry
import os

def country_name(country_iso):
    return pycountry.countries.get(alpha_2=country_iso).name

def filter_for(df, name, value):
    return df.loc[df[name] == value]

def filter_for_range(df, name, lower_bound, upper_bound):
    return df[(df[name] >= lower_bound) & (df[name] < upper_bound)]

In [None]:
tls_data_file = "./parquet/tls_data.parquet"
probes_data_file = "./parquet/ripe_atlas_probe_data.parquet"

tls_data = pd.read_parquet(tls_data_file)
probes = pd.read_parquet(probes_data_file).set_index('id')
df = tls_data.join(probes, on='prb_id')

df

In [None]:
results_dir = './cdf'

start2022 = datetime(2022, 1, 1).timestamp()
start2023 = datetime(2023, 1, 1).timestamp()
start2024 = datetime(2024, 1, 1).timestamp()
future = datetime(2030, 1, 1).timestamp()

df2022 = filter_for_range(df, 'timestamp', start2022, start2023)
df2023 = filter_for_range(df, 'timestamp', start2023, start2024)
df2024 = filter_for_range(df, 'timestamp', min, future)
def per_country_cdf(df, c, title):
    rt_column = [v for v in df['rt'].tolist() if v > 0]
    log_rts = np.log(rt_column)
    
    fig, ax = plt.subplots()
    ax.ecdf(log_rts)
    ax.set_xlabel('CDF of Latencies in ms')

    xticks_locations = np.arange(2, np.max(log_rts), 0.5)
    xticks_labels = np.round(np.exp(xticks_locations), 0)

    plt.xticks(xticks_locations, xticks_labels)
    plt.yticks(np.arange(0, 1, 0.1))
    ax.tick_params(axis='both', which='major', labelsize=7)
    plt.title(title)
    plt.savefig(results_dir + "/" + title.lower().replace(" ", "_") + ".pdf")
    plt.show()

if not os.path.isdir(results_dir): os.mkdir(results_dir)
for country in set(df['country'].tolist()):
    per_country_cdf(df2022, country, 'Latencies in 2022 of Starlink Probes in ' + country_name(country))
    per_country_cdf(df2023, country, 'Latencies in 2023 of Starlink Probes in ' + country_name(country))
    per_country_cdf(df2024, country, 'Latencies in 2024 of Starlink Probes in ' + country_name(country))