In [None]:
%load_ext autoreload
%autoreload 2
import csv

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib_inline
import numpy as np
import pandas as pd
import utils
from plots import color, time_bar_plot, cumulative_traffic_plot

matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
plt.rcParams['figure.figsize'] = [15, 6]

pd.set_option('future.no_silent_downcasting', True)
pd.options.mode.copy_on_write = True


In [None]:
logs = utils.load_antler_df(
    # '../tests/results/1gbps/*/*_streams.json*',
    # '../tests/results/1gbps/*/*_codel_streams.json*',
    '../tests/results/1gbps/6h-pareto1.2-heavy7/*_streams.json*',
)

In [None]:
logs = utils.load_antler_df(
    # '../tests/results/10gbps/*-2/*_streams.json*',
    # '../tests/results/10gbps/*-3/*_streams.json*',
    # '../tests/results/10gbps/dead/*_streams.json*',
    # '../tests/results/10gbps/newpie1/*pie_streams.json*',
    # '../tests/results/10gbps/newpie3/*pie_streams.json*',
    # '../tests/results/10gbps/4h-10g-*/*_codel_streams.json*',
    # '../tests/results/10gbps/4h-10g-*/*fifo*_streams.json*',
    # '../tests/results/10gbps/4h-10g-*/*cake_streams.json*',
    '../tests/results/10gbps/*/*_streams.json*',
)

In [None]:
algo_order = [
    'age-pfifofast',
    'pfifo',
    'age-cake',
    'cake',
    'age-prio-fqcodel',
    'fqcodel',
    'age-prio-codel',
    'codel',
    'age-prio-fqpie',
    'fqpie',
    'age-prio-pie',
    'pie',
]
logs['algorithm'] = logs['algorithm'].cat.set_categories(algo_order)

In [None]:
fig, ax = plt.subplots()

df = logs

df = df.groupby(['date', 'algorithm'], observed=False).agg(total_data=('size', 'sum'), n_exps=('date', 'nunique'))
df['total_data'] = df['total_data'] / (1_000_000_000 / 8 * 4 * 3600)

# df = logs.groupby(['date', 'algorithm'], observed=False).agg(total_data=('size', 'sum'), n_exps=('date', 'nunique'))
# df['total_data'] = df['total_data'] / (10_000_000_000 / 8 * 4 * 3600)

df.unstack().plot(kind='bar', y='total_data', rot=0, ax=ax, color=color)
ax.set_title('10gbps, 4 hours')
ax.axhline(1, linestyle='-')
ax.legend(ncol=2)
ax.grid(axis='y')
ax.set_axisbelow(True)
ax.legend(loc='lower center', ncol=3)

fig.suptitle('Goodput divided by the link capacity')
fig.tight_layout()

In [None]:
fig, ax = plt.subplots()

# df = logs[(logs['start_at'] > 3600) & (logs['start_at'] + logs['time'] < 6*3600 - 60)] # 1gbps
df = logs[(logs['start_at'] > 1800) & (logs['start_at'] + logs['time'] < 4 * 3600 - 6)]  # 10gbps
df = df[~df['algorithm'].isin(['cake', 'age-cake'])]
df = df[df['time'] > 0]

time_bar_plot(df.groupby(['date', 'algorithm'], observed=False), ax=ax)
ax.set_title('10gbps, 4 hours')
ax.legend(loc='lower center', ncol=3)
ax.grid(axis='y')
ax.set_axisbelow(True)

fig.suptitle('Mean completion time per connection')
fig.tight_layout()

In [None]:
from itertools import chain

algonames = ['FIFO', 'CAKE', 'FQ-CoDel', 'CoDel', 'FQ-PIE', 'PIE']
algonames = chain.from_iterable((x,x) for x in algonames)
algotoname = dict(zip(algo_order, algonames))

def rename_algos(x):
    if x.startswith('age-'):
        return f'Size-based {algotoname[x]}'
    else:
        return f'{algotoname[x]}'


In [None]:
# df = logs[(logs['start_at'] > 3600) & (logs['start_at'] + logs['time'] < 6*3600 - 60)]
df = logs[(logs['start_at'] > 1800) & (logs['start_at'] + logs['time'] < 4 * 3600 - 60)]
data = df.groupby(['algorithm'], observed=False).agg(mean_time=('time', 'mean'), sem_time=('time', 'sem'))
# data['interval'] = data['sem_time'] * 1.96 # 95% confidence interval
data['color'] = data.index.map(lambda x: mpl.colors.to_hex(mpl.colormaps['tab20'].colors[algo_order.index(x)]).replace('#', '0x'))
data['gain'] = data['mean_time'] / data['mean_time'].shift(-1) - 1
data.index = data.index.map(rename_algos)
print(data.to_csv(columns=['mean_time', 'sem_time', 'gain'], header=False, sep='\t', quoting=csv.QUOTE_NONNUMERIC))

In [None]:
from itertools import batched

print('\\hline')
print('\\multirow{2}{*}{Algorithm} & \\multicolumn{2}{c|}{Size-based version} & \\multicolumn{2}{c|}{Base version} & \\multirow{2}{*}{Gain} \\\\')
# print('\\hline')
print(' & {Mean FCT} & {95\\% CI} &  {Mean FCT} & {95\\% CI} &   \\\\')
print('\\hline\n\\hline')
for x, y in batched(data.index, 2):
    print(
        f'{x.removeprefix('Size-based ')} & '
        f'{data.loc[x, "mean_time"]*1000:.4g} & {data.loc[x, "sem_time"] * 1.96 *1000:.3g} &'
        f'{data.loc[y, "mean_time"]*1000:.4g} & {data.loc[y, "sem_time"] * 1.96 *1000:.3g} & '
        f'{data.loc[x, "gain"]*100:.2g} \\% \\\\'
    )
    print('\\hline')

In [None]:
threshold = 6332751
# df = logs[(logs['start_at'] > 3600) & (logs['start_at'] + logs['time'] < 6*3600 - 60)]
df = logs[(logs['start_at'] > 1800) & (logs['start_at'] + logs['time'] < 4*3600 - 60)]
# df = df[~df['algorithm'].str.contains('pie') | df['date'].str.contains('newpie2')]
# df = df[~df['date'].str.contains('newpie1')]


df = df.groupby(['algorithm', 'size'], observed=True).agg(mean_time=('time', 'mean')).unstack('algorithm')
df.columns = df.columns.map(lambda x: rename_algos(x[1]))

print(df.to_csv(sep='\t', index=True, quoting=csv.QUOTE_NONNUMERIC))

# fig, ax = plt.subplots(figsize=(18, 8))
# df.plot(kind='line', y='mean_time', rot=0, ylabel='time (s)', color=color, ax=ax, linewidth=0.5)

# ax.axvline(x=threshold, linestyle='--', color='red', alpha=0.5)
# ax.set_xscale('log')
# ax.set_yscale('log')

# fig.suptitle('Completion time distribution')
# fig.tight_layout()

In [None]:
# thresholds = [90475270, 6332751, 13746699]
# df = logs[(logs['start_at'] > 3600) & (logs['start_at'] + logs['time'] < 6*3600 - 60)]

# fig, axs = plt.subplots(len(df['date'].cat.categories),1, figsize=(18, 16), sharex=True)
# for (date, dfg), ax, thx in zip(df.groupby('date', observed=True), axs, thresholds):
#     dfg = dfg.groupby(['algorithm', 'size'], observed=False).agg(mean_time=('time', 'mean'), sem_time=('time', 'sem')).unstack('algorithm')
#     dfg = dfg.rolling(3, center=True).mean()
#     dfg.plot(kind='line', y='mean_time', rot=0, ylabel='time (s)', color=color, ax=ax, linewidth=0.5)

#     ax.set_title(date)
#     ax.axvline(x=thx, linestyle='--', color='red', alpha=0.5)
#     ax.set_xscale('log')
#     ax.set_yscale('log')

# fig.suptitle('Completion time distribution - no extra delay')
# fig.tight_layout()

# thresholds = [19500715, 19500715]
# df = logs[(logs['start_at'] > 360) & (logs['start_at'] + logs['time'] < 4*3600 - 6)]

# fig, [axs] = plt.subplots(len(df['date'].cat.categories),1, figsize=(18, 8), sharex=True, squeeze=False)
# for (date, dfg), ax, thx in zip(df.groupby('date', observed=True), axs, thresholds):
#     dfg = dfg.groupby(['algorithm', 'size'], observed=False).agg(mean_time=('time', 'mean'), sem_time=('time', 'sem')).unstack('algorithm')
#     dfg = dfg.rolling(3, center=True).mean()
#     dfg.plot(kind='line', y='mean_time', rot=0, ylabel='time (s)', color=color, ax=ax, linewidth=0.5)

#     ax.set_title(date)
#     ax.axvline(x=thx, linestyle='--', color='red', alpha=0.5)
#     ax.set_xscale('log')
#     ax.set_yscale('log')

fig.suptitle('Completion time distribution')
fig.tight_layout()

In [None]:
# df = logs[(logs['start_at'] > 3600) & (logs['start_at'] + logs['time'] < 6*3600 - 60)]
df = logs
bins = [0, 1_000_000, 10_000_000, 90475270, 800_000_000, 6_000_000_000, np.inf]
labels = ['<1M', '1M-10M', '10M-90M', '90M-800M', '800M-6G', '6G+']
df['size_cat'] = pd.cut(df['size'], bins=bins, labels=labels)

df.groupby(['date', 'algorithm', 'size_cat'], observed=True).size().unstack()

In [None]:
# df = logs[(logs['setup'] == setup_renames['long2']) & (logs['seed'] == '15867')]
# df = logs[logs['date'] == '6h-pareto1.2-heavy']
df = logs#[logs['date'] == '4h-10g-2']
df['ended_at'] = df['start_at'] + df['time']
timepoints = np.linspace(0, df['ended_at'].max(), 200)
algos = df['algorithm'].cat.categories
partial_means = np.zeros((0, len(algos)))
partial_sems = np.zeros((0, len(algos)))

for t in timepoints:
    # c = df[(df['ended_at'] <= t) & (df['ended_at'] > 3600) & (df['ended_at'] < 6*3600 - 20)]
    c = df[df['ended_at'] <= t]
    c = c.groupby('algorithm', observed=False).agg(mean_time=('time', 'mean'), sem_time=('time', 'sem'))
    partial_means = np.vstack([partial_means, c.loc[algos, 'mean_time']])
    # partial_sems = np.vstack([partial_sems, c.loc[algos, 'sem_time']])

fig, ax = plt.subplots(sharey=True)
for i, a in enumerate(algos):
    ax.plot(timepoints, partial_means[:, i], label=a, color=color[i])
    # lower, upper = partial_means[:, i] - 1.96 * partial_sems[:, i], partial_means[:, i] + 1.96 * partial_sems[:, i]
    # ax.fill_between(timepoints, lower, upper, alpha=0.3, color=color[i])
    ax.axhline(partial_means[-1, i], linestyle='--', color=color[i], alpha=0.5)
ax.legend(loc='lower right')
ax.set_ylabel('mean completion time (s)')
ax.set_xlabel('elapsed time (s)')
ax.set_title('Expanding mean completion time per connection, for runs with the same seed')
fig.tight_layout()
# df.sort_values('start_at').groupby('algorithm', observed=True)['start_at', 'time'].expanding().mean().unstack().plot()

In [None]:
df = logs[logs['date'] == logs['date'].cat.categories[1]]
df = df[df['actor'] == 33].sort_values('start_at')
g = df.groupby(['algorithm'], observed=True)['size'].apply(lambda x: list(x)[18400:18420])
pd.DataFrame(dict(g))

In [None]:
df = logs
bins = [0, 1_000_000, 10_000_000, 55000000, 90475270, 500_000_000, 800_000_000, np.inf]
labels = ['<1M', '1M-10M', '10M-55M', "55M-86M", '86M-500M', '500M-800M', '>800M']
df['size_cat'] = pd.cut(df['size'], bins=bins, labels=labels)
df = logs[logs['date'] == logs['date'].cat.categories[1]]

df = df.groupby(['date', 'algorithm', 'actor'], observed=True).apply(lambda g: g.nsmallest(18000, 'start_at')).reset_index(drop=True)
df.groupby(['date', 'algorithm', 'size_cat'], observed=True).size().unstack()

In [None]:
# df = logs[logs['date'] == logs['date'].cat.categories[1]]
df = logs
print(len(df['size'].unique()))
df['time'].plot(kind='hist', bins=30, log=True)

In [None]:
df = logs
df['throughput'] = df['size'] / df['time']

def fairness_jaine(x):
    return np.sum(x) ** 2 / (len(x) * np.sum(x ** 2))

df = df.groupby(['date', 'algorithm'], observed=True).agg(fairness=('throughput', fairness_jaine))
df.unstack('algorithm').plot(kind='bar', y='fairness', rot=0, ylabel='fairness index', color=color)

In [None]:
df = logs
df = df[df['time'] > 0]
df['throughput'] = df['size'] / df['time']

# fig, axs = plt.subplots(1, 2, sharey=True, width_ratios=(1,1))
# for (date, dfd), ax in zip(df.groupby('setup', observed=True), axs):
#     for (algorithm, dfg), c in zip(dfd.groupby('algorithm', observed=False), color):

fig, ax = plt.subplots()

dfg = df[(df['date'] == '4h-new') & (df['algorithm'] == 'age-prio-codel')]
dfg = dfg.sort_values('throughput')
dfg.hist(bins=100, column='throughput')

lor = dfg['throughput'].cumsum()
lor /= lor.max()
x = np.arange(len(lor)) / len(lor)

idx = np.linspace(0, len(lor) - 1, 100).astype(int)
# idx = np.arange(len(lor))  # Use all points for the full curve
data = pd.DataFrame({'lorenz': lor.iloc[idx], 'x': x[idx]})

print(data.to_csv(sep='\t', index=False))

imax = np.argmax(x-lor)
# print(x[imax], lor.iloc[imax])

ax.plot(x, lor)
ax.legend(loc='upper left')
# ax.set_title(date)


In [None]:
def lorenz_curve_gap(x):
    x = x.sort_values()
    lor = x.cumsum()
    lor /= lor.max()
    x = np.arange(len(lor)) / len(lor)
    return np.max(x - lor)

# for df in [logs, logs]:
#     df['throughput'] = df['size'] / df['time']
#     df = df[df['throughput'] > 0]

#     df = df.groupby(['date', 'algorithm'], observed=True).agg(lorenz_gap=('throughput', lorenz_curve_gap))
#     df.unstack('algorithm').plot(kind='bar', y='lorenz_gap', rot=0, ylabel='Lorenz curve gap', color=color)

In [None]:
# df = logs[(logs['start_at'] > 3600) & (logs['start_at'] + logs['time'] < 6 * 3600 - 60)]
df = logs[(logs['start_at'] > 1800) & (logs['start_at'] + logs['time'] < 4*3600 - 60)]
# df = df[~df['date'].str.contains('newpie1')]

# df = df[~df['algorithm'].str.contains('pie') | df['date'].str.contains('newpie2')]

df = df[df['time'] > 0]
df['throughput'] = df['size'] / df['time']
df = df[df['throughput'] > 0]
df = df.groupby(['algorithm'], observed=True).agg(lorenz_gap=('throughput', lorenz_curve_gap))
df.index = df.index.map(rename_algos)
# df.plot(kind='bar', color=color)
print(df.to_csv(sep='\t', index=True, header=False, quoting=csv.QUOTE_NONNUMERIC))