In [None]:
%load_ext autoreload
%autoreload 2
import matplotlib as mpl  # noqa: F401
import matplotlib.pyplot as plt
import matplotlib_inline
import numpy as np
import pandas as pd
import phfit
import toolkit2lps
import utils
from kneed import KneeLocator

matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
plt.rcParams['figure.figsize'] = [14, 9]

pd.set_option('future.no_silent_downcasting', True)

In [None]:
logsa = utils.load_antler_df('../tests/results/6h-pareto1.2-heavy2-fixed/*_streams.json*')
logsa.info()

In [None]:
logsa.describe()

In [None]:
def kneedle(df):
    df = df['size'].value_counts().sort_index()
    df = (df * df.index).cumsum()
    return KneeLocator(df.index, df.values, curve='concave', direction='increasing')

kneedle(logsa).plot_knee_normalized()
print(kneedle(logsa).knee)

In [None]:
df = logsa[logsa['algorithm'] == 'pfifo']
t = np.linspace(df['start_at'].min()+60, df['start_at'].max(), 100)

k0 = np.array([kneedle(df[df['start_at'] < i]).knee for i in t])
plt.plot(t, k0, label='knee on [0, x]')

k1 = np.array([kneedle(df[(i-300 < df['start_at'])&(df['start_at'] < i)]).knee for i in t])
plt.plot(t, k1, label='knee on [x-300, x]')

k2 = np.array([kneedle(df[(i-1800 < df['start_at'])&(df['start_at'] < i)]).knee for i in t])
plt.plot(t, k2, label='knee on [x-1800, x]')


plt.legend()
plt.title('Kneedle knee')
plt.xlabel('start_at')
plt.ylabel('knee')
plt.ylim(0, None)

In [None]:
a = np.array([1.11713986e-001, 6.38232636e-003, 6.83096088e-001, 2.76915143e-004, 4.39688429e-003, 2.30470844e-002, 1.71086716e-001])

B = np.array(
    [
        [-1.97036236e-001, 0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000],
        [1.87675163e-001, -1.87675163e-001, 0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000],
        [0.00000000e000, 1.84837913e-001, -1.84837913e-001, 0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000],
        [0.00000000e000, 0.00000000e000, 1.06602464e-002, -1.06602464e-002, 0.00000000e000, 0.00000000e000, 0.00000000e000],
        [0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000, -2.87496206e-004, 0.00000000e000, 0.00000000e000],
        [0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000, -2.52611549e-003, 0.00000000e000],
        [0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000, 0.00000000e000, -1.54091661e-002],
    ]
)

toolkit2lps.optimal_threshold(a, B, body=4, tail=3, thmin=10, thmax=2000, rho=0.7)

In [None]:
cdf = logsa['size'].value_counts(normalize=True).sort_index().cumsum()
a, b = phfit.contfit(cdf.index, cdf.values, 4, 3)
toolkit2lps.optimal_threshold(a, b, body=4, tail=3, thmin=3e5, thmax=1e9, rho=0.875)