In [1]:
from copy import deepcopy
import timeit
import json
from pathlib import Path
from functools import partial

from panflute import convert_text
import numpy as np
import plotly.express as px
import pandas as pd
import statsmodels.api as sm
from map_parallel import map_parallel

In [2]:
# pandoc commit 6b962e6b0e729e925c4313427926cc6bfc5d3d1f
pandoc_path = Path("~/.local/bin/pandoc").expanduser()
# input ipynb
path = Path("holoview-load-only-manual_filtered.ipynb").expanduser()

In [3]:
!$pandoc_path --version

pandoc 2.16.2
Compiled with pandoc-types 1.22.1, texmath 0.12.3.3, skylighting 0.12.1,
citeproc 0.6, ipynb 0.2
User data directory: /Users/kolen/.local/share/pandoc
Copyright (C) 2006-2021 John MacFarlane. Web:  https://pandoc.org
This is free software; see the source for copying conditions. There is no
warranty, not even for merchantability or fitness for a particular purpose.


In [4]:
with path.open("r") as f:
    data = json.load(f)

In [5]:
n_original = len(data["cells"][0]["outputs"][0]["data"]["application/vnd.holoviews_load.v0+json"])
n_original

1679380

In [6]:
data["cells"][0]["outputs"][0]["data"]["application/vnd.holoviews_load.v0+json"] = '"'

In [7]:
def cook_new_data(data, n):
    res = deepcopy(data)
    res["cells"][0]["outputs"][0]["data"]["application/vnd.holoviews_load.v0+json"] = '"' * int(n)
    return res

In [8]:
def timethis(
    data,
    n,
    number=1,
    repeat=1,
):
    return np.mean(
        timeit.repeat(
            'convert_text(text, input_format="ipynb", output_format="native", pandoc_path=pandoc_path)',
            setup='import json; from panflute import convert_text; text = json.dumps(cook_new_data(data, n))',
            globals={"data": data, "n": n, "cook_new_data": cook_new_data, "pandoc_path": pandoc_path},
            number=number,
            repeat=repeat,
        )
    )

In [9]:
N = 18
n = 2**np.arange(1, N)

In [10]:
res = map_parallel(partial(timethis, data), n, mode="multithreading", processes=len(n))

In [11]:
df = pd.DataFrame(
    {
        "n": n,
        "time (s)": res,
    }
)

In [12]:
df

Unnamed: 0,n,time (s)
0,2,0.073833
1,4,0.099905
2,8,0.051142
3,16,0.057677
4,32,0.087921
5,64,0.055302
6,128,0.107645
7,256,0.055969
8,512,0.0989
9,1024,0.102706


In [13]:
df_fit = df.iloc[10:]

$$t = A n^x$$

$$\log t = C + x \log n$$

In [14]:
X = sm.add_constant(np.log(df_fit["n"]))
y = np.log(df_fit["time (s)"])
model = sm.OLS(y, X)
results = model.fit()
results.summary()

  warn("omni_normtest is not valid with less than 8 observations; %i "


0,1,2,3
Dep. Variable:,time (s),R-squared:,0.994
Model:,OLS,Adj. R-squared:,0.993
Method:,Least Squares,F-statistic:,881.7
Date:,"Thu, 09 Dec 2021",Prob (F-statistic):,8.12e-07
Time:,18:03:06,Log-Likelihood:,1.856
No. Observations:,7,AIC:,0.2879
Df Residuals:,5,BIC:,0.1797
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-15.8036,0.587,-26.924,0.000,-17.312,-14.295
n,1.7780,0.060,29.693,0.000,1.624,1.932

0,1,2,3
Omnibus:,,Durbin-Watson:,1.198
Prob(Omnibus):,,Jarque-Bera (JB):,0.582
Skew:,0.601,Prob(JB):,0.748
Kurtosis:,2.26,Cond. No.,70.0


Predicted time

In [15]:
np.exp(results.params.const + results.params.n * np.log(n_original))

16027.401232530488