In [None]:
import re
from pathlib import Path
from dataclasses import dataclass, asdict

import numpy as np
import pandas as pd

from tqdm.autonotebook import tqdm
import papermill as pm

from recsys4daos.utils.notebooks import run_dao_notebook

In [None]:
all_notebooks = Path('.').glob('*.ipynb')
run_notebooks = sorted(p for p in all_notebooks if re.match(r'\d+_', p.name))
run_notebooks

In [None]:
OUTPUT_PATH: str = '../nbout/'
EXECUTION_ID: str = '2024-07-03'

In [None]:
@dataclass
class DaoToRun:
    # Name of the organization (see ./data/input)
    org_name: str
    # Frequency of the splits
    splits_freq: str = '7d'
    # Wether to normalize the folds (start at 00:00)
    splits_normalize: bool = True
    # Number of folds to use
    last_folds: int = 10
    # Date of the last fold to use
    last_fold_date: str = None
    # Run until this notebook number (inclusive)
    run_until_nb: int = 100

ALL_DAOS: list[DaoToRun] = [
    DaoToRun(
        org_name='Decentraland',
        splits_freq='W-THU',
        run_until_nb=19,
        last_fold_date='2023-07-13',
    ),
    DaoToRun(
        org_name='PancakeSwap',
        splits_freq='3d',
        last_fold_date='2023-06-27',
        run_until_nb=19,
    ),
    DaoToRun(
        org_name='Balancer',
        run_until_nb=19,
        splits_freq='3d',
    ),
    DaoToRun(
        org_name='DEAD FoundationsDAO',
        run_until_nb=19,
        splits_freq='2d',
        last_fold_date='2021-11-28',
        last_folds=20,
    ),
    DaoToRun(
        org_name='Aave - Aavegotchi',
        run_until_nb=19,
        splits_freq='5d',
        last_fold_date='2023-05-05',
    ),
    DaoToRun(
        org_name='DAOSquare Grants',
        run_until_nb=19,
        splits_freq='W-FRI',
        last_fold_date='2021-07-02',
    ),
    DaoToRun(
        org_name='Genesis Alpha',
        run_until_nb=19,
        last_fold_date='2019-12-04',
    ),
    DaoToRun(
        org_name='HUWA-DAO',
        run_until_nb=19,
        splits_freq='W-THU',
        last_fold_date="2021-12-09",
    ),
    DaoToRun(
        org_name='Index Coop',
        run_until_nb=19,
        splits_freq='W-THU',
        last_fold_date='2023-07-13',
    ),
    DaoToRun(
        org_name='Lido',
        run_until_nb=19,
        splits_freq='W-WED',
        last_fold_date="2022-06-29",
    ),
    DaoToRun(
        org_name='MetaCartel - MetaCartel Ventures',
        run_until_nb=19,
        splits_freq='W-THU',
        last_fold_date="2022-01-06",
    ),
    DaoToRun(
        org_name='Plaza',
        run_until_nb=19,
        splits_freq='3d',
        last_folds=20,
        last_fold_date="2022-06-29",
    ),
    DaoToRun(
        org_name='SharkDAO',
        run_until_nb=19,
        splits_freq='3d',
        last_folds=20,
        last_fold_date="2022-05-30", 
    ),
    DaoToRun(
        org_name='dOrg',
        run_until_nb=19,
        splits_freq='2d',
        last_folds=20,
        last_fold_date="2022-01-31",
    ),
    
    DaoToRun(
        org_name='dxDAO - xDXdao',
        run_until_nb=19,
        splits_freq='W-THU',
        last_fold_date="2022-05-05",
    )
]

pd.DataFrame(map(asdict, ALL_DAOS)).sort_values('org_name', key=lambda c: c.str.lower())

In [None]:
def _nb_le(nb: Path, last_nb: int):
    return int(re.match(r'(\d+)_', nb.name)[1]) <= last_nb

def _filter_nb_le(nbs: list[Path], last_nb: int):
    return list(filter(lambda x: _nb_le(x, last_nb), nbs))

print(_filter_nb_le(run_notebooks, 11))

In [None]:
for dao in (b1 := tqdm(ALL_DAOS, desc='org')):
    b1.set_postfix_str(dao.org_name)
    for nb in (b2 := tqdm(_filter_nb_le(run_notebooks, dao.run_until_nb), desc='notebook', leave=False)):
        b2.set_postfix_str(nb)
        run_dao_notebook(
            nb,
            OUTPUT_PATH,
            EXECUTION_ID,
            ORG_NAME=dao.org_name,
            SPLITS_FREQ=dao.splits_freq,
            LAST_FOLDS=dao.last_folds,
            SPLITS_NORMALIZE=dao.splits_normalize,
            LAST_FOLD_DATE_STR=dao.last_fold_date,
        )