In [None]:
import glob
import gzip

import matplotlib.pyplot as plt
import matplotlib_inline
import numpy as np
import orjson
import pandas as pd

matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
plt.rcParams['figure.figsize'] = [19, 8]

pd.set_option('future.no_silent_downcasting', True)

In [None]:
%load_ext autoreload
%autoreload 2
import utils

log_file_globs = [
    # '../logs/pow1/*.json.gz',
    # '../logs/pow2/*.json.gz',
    '../logs/pow3/*.json.gz',
    '../logs/pow4/*.json.gz',
]

logs = utils.load_log_df(log_file_globs)
logs.info()

In [None]:
algorithm_renames = {
    'age': 'age+prio+codel',
    'pfifofast': 'age+pfifofast',
}
setup_renames = {
    'pow1': '1. 40 actors, 0.2 sec sleep',
    'pow2': '2. 40 actors, 0.6 sec sleep',
    'pow3': '3. 60 actors, 0.6 sec sleep',
    'pow4': '4. 80 actors, 1.0 sec sleep',
}

logs['algorithm'] = logs['algorithm'].apply(lambda x: algorithm_renames.get(x, x))
logs['setup'] = logs['setup'].apply(lambda x: setup_renames.get(x, x))

In [None]:
def time_bar_plot(data: pd.DataFrame, **kwargs):
    data = data.agg(mean_time=('time', 'mean'), sem_time=('time', 'sem'))
    data['interval'] = data['sem_time'] * 1.96 # 95% confidence interval
    data.unstack().plot(kind='bar', y='mean_time', yerr='interval', rot=kwargs.pop('rot', 0), ylabel='time (s)', **kwargs)

In [None]:
orjson.loads(gzip.open(next(glob.iglob('../logs/pow3/*.json.gz'))).read())['metadata']

In [None]:
df = logs.groupby(['setup', 'algorithm'], observed=True).agg(total_data=('size', 'sum'), n_exps=('date', 'nunique'))
df['total_data'] = df['total_data'] / df['n_exps'] / (1_000_000_000 / 8 * 1200)
# df.loc[df.index.get_level_values(0) == '4way9', ['total_data']] = df.loc[df.index.get_level_values(0) == '4way9', ['total_data']] / 960 * 1200
df.unstack().plot(kind='bar', y='total_data', rot=0)
plt.axhline(1, linestyle='--')
plt.axhline(0.7, linestyle='--')
plt.title('Throughput divided by the link capacity of 1 Gbps')

In [None]:
df = logs.groupby(['setup', 'size']).agg(total_data=('size', 'sum'), n_exps=('date', 'nunique'))
df['total_data'] = df['total_data'] / df['n_exps'] / 5
df.unstack().plot(kind='bar', y='total_data', ylabel='total data (bytes)', rot=0)
plt.title('Total data sent per experiment by each flow class')

In [None]:
# fig, axes = plt.subplots(1, logs['setup'].nunique(), sharey=True)

# for (setup, data), ax in zip(logs.groupby('setup'), axes):
#     time_bar_plot(data.groupby(['size', 'algorithm']), ax=ax)
#     ax.set_title(setup)
#     plt.yscale('log')
# fig.suptitle('Mean completion time per connection')
# plt.tight_layout()

In [None]:
fig, axes = plt.subplots(3,2, sharex=True)
for ((size, group), ax) in zip(logs.groupby('size'), axes.flatten()):
    time_bar_plot(group.groupby(['date', 'algorithm'], observed=False), ax=ax)
    ax.set_title(f'size={utils.prettyprint_bytes(size)}')
    ax.legend(loc='upper right')
fig.suptitle('Mean completion time per connection')
plt.tight_layout()

In [None]:
time_bar_plot(logs.groupby(['setup', 'algorithm']))
plt.title('Mean completion time per connection')

In [None]:
fig, axes = plt.subplots(1, logs['setup'].nunique(), sharey=True)
for (setup, data), ax in zip(logs.groupby('setup'), axes):
    time_bar_plot(data.groupby(['date', 'algorithm'], observed=True), ax=ax)
    ax.set_title(setup)
fig.suptitle('Mean completion time per connection')
plt.tight_layout()

In [None]:
# fig, axes = plt.subplots(1, logs['setup'].nunique(), sharey=True)
# for (setup, data), ax in zip(logs.groupby('setup'), axes):
#     df = data.groupby(['date', 'algorithm'], observed=True).agg(mean_time=('time', lambda x: 2**np.mean(np.log2(x))))
#     df.unstack().plot(kind='bar', y='mean_time', ylabel='time (s)', rot=0, ax=ax)
#     ax.set_title(setup)
# fig.suptitle('Geometric mean completion time per connection')
# fig.tight_layout()

In [None]:
# logs.groupby(['date', 'size']).agg(mean_time=('time', 'mean'))
# logs.groupby(['date', 'algorithm']).count()

In [None]:
df = logs[logs['date'] == logs['date'].cat.as_ordered().max()]
time_bins = np.linspace(0, 1200, 150)

for algorithm, df_group in df.groupby('algorithm', observed=True):
    df_group['start_at'] = df_group['start_at'] - df_group['start_at'].min()
    df_group['end_at'] = df_group['start_at'] + df_group['time']

    active_counts = pd.DataFrame(index=time_bins, columns=df_group['size'].unique()).fillna(0).infer_objects(copy=False)

    for time in time_bins:
        active_at_t = df_group[(df_group['start_at'] <= time) & (df_group['end_at'] > time)]  # Instances active at `time`
        counts = active_at_t['size'].value_counts()  # Count per size
        active_counts.loc[time, counts.index] = counts.values  # Store in DataFrame

    active_counts = active_counts.fillna(0)
    # active_counts.columns = active_counts.columns.sort_values().map(utils.prettyprint_bytes)
    active_counts.plot(kind='area', stacked=True)

    plt.title(f'Active instances over time ({algorithm})')
    plt.xlabel('time from experiment start (s)')
    plt.ylabel('active connections')



In [None]:
for x in np.logspace(5, 8, 5) * 3:
    print(round(x, -5), x/300000)

In [None]:
52*1024*1024