In [None]:
import matplotlib.pyplot as plt
import matplotlib_inline
import numpy as np
import pandas as pd

matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
plt.rcParams['figure.figsize'] = [19, 8]

In [None]:
import utils

log_file_globs = ['../logs/4way3/*.json.gz', '../logs/4way4/*.json.gz', '../logs/4way5/*.json.gz', '../logs/4way6/*.json.gz']

logs = utils.load_log_df(log_file_globs)
logs.info()

In [None]:
algorithm_renames = {
    'age': 'age+prio+codel',
    'pfifofast': 'age+pfifofast',
}

logs['algorithm'] = logs['algorithm'].apply(lambda x: algorithm_renames.get(x, x))

In [None]:
logs.describe(include='all')

In [None]:
def time_bar_plot(data: pd.DataFrame, **kwargs):
    data = data.agg(mean_time=('time', 'mean'), sem_time=('time', 'sem'))
    data['interval'] = data['sem_time'] * 1.96 # 95% confidence interval
    data.unstack().plot(kind='bar', y='mean_time', yerr='interval', rot=45, ylabel='time (s)', **kwargs)

In [None]:
df = logs.groupby(['date', 'algorithm']).agg(total_data=('size', 'sum'))
df.unstack().plot(kind='bar', y='total_data', ylabel='total data (bytes)', rot=45)
plt.axhline(1_000_000_000 / 8 * 480, linestyle='--')
plt.axhline(1_000_000_000 / 8 * 960, linestyle='--')

In [None]:
time_bar_plot(logs.groupby(['date', 'algorithm']))

In [None]:
time_bar_plot(logs.groupby(['setup', 'algorithm']))

In [None]:
time_bar_plot(logs.groupby(['setup', 'size', 'algorithm']))
plt.yscale('log')

In [None]:
_, axes = plt.subplots(2, 2, sharex=True)
for ((size, group), ax) in zip(logs.groupby('size'), axes.flatten()):
    time_bar_plot(group.groupby(['setup', 'algorithm']), ax=ax)
    ax.set_title(f'size={utils.prettyprint_bytes(size)}')

In [None]:
df = logs.loc[logs['setup'].isin(['4way4', '4way5'])]

_, axes = plt.subplots(2, 2, sharex=True)
for ((size, group), ax) in zip(df.groupby('size'), axes.flatten()):
    group.boxplot(column=['time'], by=['algorithm'], grid=False, rot=45, fontsize=8, showfliers=False, ax=ax)
    ax.set_title(f'size={utils.prettyprint_bytes(size)}')
    ax.set_ylabel('time (s)')

In [None]:
df = logs.groupby(['date', 'algorithm']).agg(mean_time=('time', lambda x: np.average(x, weights=logs.loc[x.index, 'size'])))
df.unstack().plot(kind='bar', y='mean_time', ylabel='time (s)', rot=45)

In [None]:
df = logs.groupby(['date', 'algorithm']).agg(mean_time=('time', lambda x: 2**np.mean(np.log2(x))))

df.unstack().plot(kind='bar', y='mean_time', rot=45, ylabel='time (s)')

In [None]:
df = logs.groupby(['date', 'algorithm']).agg(
    mean_time=('time', lambda x: 2 ** np.average(np.log2(x), weights=logs.loc[x.index, 'size']))
)

df.unstack().plot(kind='bar', y='mean_time', rot=45, ylabel='time (s)')

In [None]:
df = logs.groupby(['size', 'date']).agg(total_data=('size', 'sum'))
df.unstack().plot(kind='bar', y='total_data', ylabel='total data (bytes)', rot=45)

In [None]:
logs.groupby(['date']).agg(mean_size=('size', 'mean'))

In [None]:
utils.prettyprint_bytes(logs['size'].sum())

In [None]:
df = logs[logs['date'] == '1739456726']
bins = np.arange(0, 960, 1)
time_bins = np.linspace(0, 960, 100)

for algorithm, df_group in df.groupby('algorithm'):
    df_group['start_at'] = df_group['start_at'] - df_group['start_at'].min()
    df_group['end_at'] = df_group['start_at'] + df_group['time']

    active_counts = pd.DataFrame(index=time_bins, columns=df_group['size'].unique()).fillna(0)

    for time in time_bins:
        active_at_t = df_group[(df_group['start_at'] <= time) & (df_group['end_at'] > time)]  # Instances active at `time`
        counts = active_at_t['size'].value_counts()  # Count per type
        active_counts.loc[time, counts.index] = counts.values  # Store in DataFrame

    active_counts = active_counts.fillna(0)
    active_counts.plot(kind='area', stacked=True)

    plt.title(f'Active instances over time ({algorithm})')

