In [None]:
import re
from pathlib import Path
from dataclasses import dataclass

from tqdm.autonotebook import tqdm
import papermill as pm

from recsys4daos.utils.notebooks import run_dao_notebook

In [None]:
all_notebooks = Path('.').glob('*.ipynb')
run_notebooks = sorted(p for p in all_notebooks if re.match(r'\d+_', p.name))
run_notebooks

In [None]:
MAX_CUTOFF = '2023-07-29'
OUTPUT_PATH: str = '../nbout/'
EXECUTION_ID: str = '2024-07-01'

In [None]:
@dataclass
class DaoToRun:
    # Name of the organization (see ./data/input)
    org_name: str
    # Frequency of the splits
    splits_freq: str = '7d'
    # Wether to normalize the folds (start at 00:00)
    splits_normalize: bool = True
    # Number of folds to use
    last_folds: int = 10
    # Date of the last fold to use
    last_fold: str = MAX_CUTOFF
    # Run until this notebook number
    run_until_nb: int = 100

ALL_DAOS: list[DaoToRun] = [
    DaoToRun(
        org_name='Decentraland',
        splits_freq='W-THU',
    ),
    DaoToRun(
        org_name='PancakeSwap',
        splits_freq='3d',
        cutoff_date='2023-07-01',
        run_until_nb=10,
    ),
]

In [None]:
def _nb_le(nb: Path, last_nb: int):
    return int(re.match(r'(\d+)_', nb.name)[1]) <= last_nb

for nb in run_notebooks:
    print(nb, _nb_le(nb, 11))

In [None]:
for dao in (b1 := tqdm(ALL_DAOS, desc='org')):
    b1.set_postfix_str(dao.org_name)
    for nb in (b2 := tqdm(_nb_le(run_notebooks, dao.run_until_nb)), desc='notebook')):
        b2.set_postfix_str(nb)
        run_dao_notebook(
            nb,
            OUTPUT_PATH,
            EXECUTION_ID,
            ORG_NAME=dao.org_name,
            SPLITS_FREQ=dao.splits_freq,
            LAST_FOLDS=dao.last_folds,
            SPLITS_NORMALIZE=dao.splits_normalize,
        )