In [None]:
import glob
import gzip

import matplotlib.pyplot as plt
import matplotlib_inline
import matplotlib as mpl
import numpy as np
import orjson
import pandas as pd

matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
plt.rcParams['figure.figsize'] = [14, 9]

pd.set_option('future.no_silent_downcasting', True)

In [None]:
%load_ext autoreload
%autoreload 2
import utils

In [None]:
logs = utils.load_log_df([
    '../logs/long2/*.json.gz',
])
logs.info()

In [None]:
append_algos = ['cake', 'cakeage']
algorithm_renames = {
    'age': 'age-prio-codel',
    'pfifofast': 'age-pfifofast',
    'cakeage': 'age-cake',
}
setup_renames = {
    'long2': '12 hours, 5 classes',
}

# logs['algorithm'] = logs['algorithm'].cat.reorder_categories([*logs['algorithm'].cat.categories.difference(append_algos), *append_algos])
logs['algorithm'] = logs['algorithm'].cat.rename_categories(lambda x: algorithm_renames.get(x, x))
logs['algorithm'] = logs['algorithm'].cat.reorder_categories(['age-cake', 'age-pfifofast', 'age-prio-codel', 'cake', 'codel', 'fqcodel', 'pfifo'])
logs['setup'] = logs['setup'].apply(lambda x: setup_renames.get(x, x))

In [None]:
cs =  mpl.color_sequences['tab20']
color = [cs[0], cs[2], cs[4], cs[6], cs[1], cs[5], cs[7], cs[3]]

def time_bar_plot(data: pd.DataFrame, **kwargs):
    data = data.agg(mean_time=('time', 'mean'), sem_time=('time', 'sem'))
    data['interval'] = data['sem_time'] * 1.96 # 95% confidence interval
    data.unstack().plot(kind='bar', y='mean_time', yerr='interval', rot=kwargs.pop('rot', 0), ylabel='time (s)', color=color, **kwargs)

def cumulative_traffic_plot(data: pd.DataFrame, **kwargs):
    data = data.groupby(['size']).agg(total_data=('size', 'sum')).cumsum()
    data.plot(kind='line', y='total_data', ylabel='total data (bytes)', rot=kwargs.pop('rot', 0), **kwargs)

In [None]:
logsa = utils.load_antler_df('../tests/results/6h-*/*.json')
logsa.info()

In [None]:
fig, [ax0, ax1] = plt.subplots(1, 2, figsize=(14, 6))
cumulative_traffic_plot(logs, ax=ax0, title='5 classes')
cumulative_traffic_plot(logsa, ax=ax1, title='continuous pareto alpha=1.2 bounded 300Ki-1Gi')
plt.legend(loc='lower right')
for ax in [ax0, ax1]:
    ax.set_ylim(0, None)
    ax.axvline(55*1024**2, color='red', linestyle='--')
fig.suptitle('Total traffic of flows shorter than x bytes')
fig.tight_layout()

In [None]:
fig, [ax0, ax1] = plt.subplots(1, 2, figsize=(14, 6), sharey=True)

df = logs.groupby(['setup', 'algorithm'], observed=True).agg(total_data=('size', 'sum'), n_exps=('date', 'nunique'))
df['total_data'] = df['total_data'] / df['n_exps'] / (1_000_000_000 / 8 * 12 * 3600)
df.unstack().plot(kind='bar', y='total_data', rot=0, ax=ax0, color=color)
ax0.set_title('5 classes')
ax0.axhline(1, linestyle='-')
ax0.axhline(0.7, linestyle='--')

df = logsa.groupby(['date', 'algorithm'], observed=True).agg(total_data=('size', 'sum'), n_exps=('date', 'nunique'))
df['total_data'] = df['total_data'] / df['n_exps'] / (1_000_000_000 / 8 * 6 * 3600)
df.unstack().plot(kind='bar', y='total_data', rot=0, ax=ax1, color=color)
ax1.set_title('continuous pareto alpha=1.2 bounded 300Ki-1Gi')
ax1.axhline(1, linestyle='-')
ax1.axhline(0.7, linestyle='--')

fig.suptitle('Goodput divided by the link capacity of 1 Gbps')
fig.tight_layout()

In [None]:
# fig, axes = plt.subplots(3,2, sharex=False)
# legend = False
# for ((size, group), ax) in zip(logsa[goodflows].groupby('size', observed=False), axes.flatten()):
#     time_bar_plot(group.groupby(['setup', 'algorithm'], observed=False), ax=ax)
#     ax.set_title(f'size={utils.prettyprint_bytes(size)}')
#     ax.get_legend().remove()
#     if not legend:
#         fig.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
#         legend = True
# fig.suptitle('Mean completion time per connection')
# plt.tight_layout()

In [None]:
# # df = logs[(logs['start_at'] > 3600)]
# dfa = logsa[(logsa['start_at'] > 20) & (logsa['start_at'] < 1800 - 20)]

# fig, (ax0, ax1) = plt.subplots(1,2, sharex=False, sharey=True)
# # time_bar_plot(df.groupby(['date', 'algorithm']), ax=ax0)
# time_bar_plot(dfa.groupby(['date', 'algorithm']), ax=ax1)
# ax0.set_title('python+iperf2 generator, 12h')
# ax1.set_title('antler generator, 30m')
# ax0.legend(loc='lower left')
# ax1.legend(loc='lower left')
# fig.suptitle('Mean completion time per connection')

In [None]:
fig, [ax0, ax1] = plt.subplots(1, 2, figsize=(14, 6), sharey=True)

df = logs[(logs['start_at'] > 3600) & (logs['start_at'] + logs['time'] < 12*3600 - 60)]
time_bar_plot(df.groupby(['setup', 'algorithm']), ax=ax0)
ax0.set_title('5 classes')
ax0.legend(loc='lower left')

df = logsa[(logsa['start_at'] > 3600) & (logsa['start_at'] + logsa['time'] < 6*3600 - 60)]
time_bar_plot(df.groupby(['date', 'algorithm']), ax=ax1)
ax1.set_title('continuous pareto alpha=1.2 bounded 300Ki-1Gi')
ax1.legend(loc='lower left')

fig.suptitle('Mean completion time per connection')
fig.tight_layout()

In [None]:
df = logsa
bins = [0, 1_000_000, 10_000_000, 55000000, 100_000_000, 800_000_000, np.inf]
labels = ['<1M', '1M-10M', '10M-55M', "55M-100M", '100M-800M', '>800M']
df['size_cat'] = pd.cut(df['size'], bins=bins, labels=labels)

logsa.groupby(['date', 'algorithm', 'size_cat'], observed=True).agg(total_flows=('date', 'count')).unstack()

In [None]:
# logs.groupby(['date', 'size']).agg(mean_time=('time', 'mean'))
logs.groupby(['setup', 'seed', 'algorithm', 'size'], observed=True).agg(total_flows=('date', 'count')).unstack()

In [None]:
# df = logs[(logs['setup'] == setup_renames['long2']) & (logs['seed'] == '15867')]
df = logsa[logsa['date'] == '6h-pareto1.2-heavy']
df['ended_at'] = df['start_at'] + df['time']
timepoints = np.linspace(0, df['ended_at'].max(), 200)
algos = df['algorithm'].cat.categories
partial_means = np.zeros((0, len(algos)))
partial_sems = np.zeros((0, len(algos)))

fig, ax = plt.subplots(sharey=True)

for t in timepoints:
    c = df[(df['ended_at'] <= t) & (df['ended_at'] > 3600) & (df['ended_at'] < 6*3600 - 20)]
    c = c.groupby('algorithm', observed=False).agg(mean_time=('time', 'mean'), sem_time=('time', 'sem'))
    partial_means = np.vstack([partial_means, c.loc[algos, 'mean_time']])
    partial_sems = np.vstack([partial_sems, c.loc[algos, 'sem_time']])

for i, a in enumerate(algos):
    ax.plot(timepoints, partial_means[:, i], label=a, color=color[i])
    ax.fill_between(timepoints, partial_means[:, i] - 1.96 * partial_sems[:, i], partial_means[:, i] + 1.96 * partial_sems[:, i], alpha=0.3, color=color[i])
    ax.axhline(partial_means[-1, i], linestyle='--', color=color[i], alpha=0.5)
ax.legend(loc='lower right')
ax.set_ylabel('mean completion time (s)')
ax.set_xlabel('elapsed time (s)')
ax.set_title('Expanding mean completion time per connection, for runs with the same seed')
fig.tight_layout()
# df.sort_values('start_at').groupby('algorithm', observed=True)['start_at', 'time'].expanding().mean().unstack().plot()

In [None]:
# fig, axs = plt.subplots(2, sharex=True, figsize=(14, 12))

# for algo, ax in zip(['age+prio+codel', 'cake'], axs):
#     df = logs[(logs['setup'] == setup_renames['long2']) & (logs['algorithm'] == algo)]
#     df['ended_at'] = df['start_at'] + df['time']
#     timepoints = np.linspace(0, df['ended_at'].max(), 200)
#     partial_means = np.zeros((0, 3))
#     partial_sems = np.zeros((0, 3))
#     seeds = df['seed'].unique()


#     for t in timepoints:
#         c = df[(df['ended_at'] <= t) & (df['ended_at'] > 3600)].groupby('seed', observed=False).agg(mean_time=('time', 'mean'), sem_time=('time', 'sem'))
#         partial_means = np.vstack([partial_means, c.loc[seeds, 'mean_time']])
#         partial_sems = np.vstack([partial_sems, c.loc[seeds, 'sem_time']])

#     for i, a in enumerate(seeds):
#         ax.plot(timepoints, partial_means[:, i], label=a)
#         ax.fill_between(timepoints, partial_means[:, i] - 1.96 * partial_sems[:, i], partial_means[:, i] + 1.96 * partial_sems[:, i], alpha=0.3, label=f'{a} 95% CI')
#         ax.axhline(partial_means[-1, i], linestyle='--', color=f'C{i}', alpha=0.5)
#     ax.legend()
#     ax.set_ylabel('mean completion time (s)')
#     ax.set_xlabel('elapsed time (s)')
#     ax.set_title(algo)

# fig.suptitle('Expanding mean completion time per connection')
# fig.tight_layout()

In [None]:
# time_bins = np.linspace(0, 60, 150)

# for d in [logs['date'].cat.categories[-2], logs['date'].cat.categories[-1]]:
#     df = logs[(logs['date'] == d) & logs['algorithm'].isin(['age+prio+codel', 'cake'])]

#     for algorithm, df_group in df.groupby('algorithm', observed=True):
#         df_group['start_at'] = df_group['start_at'] - df_group['start_at'].min()
#         df_group['end_at'] = df_group['start_at'] + df_group['time']

#         active_counts = pd.DataFrame(index=time_bins, columns=df_group['size'].unique()).fillna(0).infer_objects(copy=False)

#         for time in time_bins:
#             active_at_t = df_group[(df_group['start_at'] <= time) & (df_group['end_at'] > time)]  # Instances active at `time`
#             counts = active_at_t['size'].value_counts()  # Count per size
#             active_counts.loc[time, counts.index] = counts.values  # Store in DataFrame

#         active_counts = active_counts.fillna(0)
#         active_counts = active_counts[active_counts.columns.sort_values()]
#         active_counts.columns = active_counts.columns.map(utils.prettyprint_bytes)  
#         active_counts.plot(kind='area', stacked=True)

#         plt.title(f'Active instances over time ({algorithm}), date: {d}')
#         plt.xlabel('time from experiment start (s)')
#         plt.ylabel('active connections')
#         plt.ylim(0, 30)

