In [None]:
import re
from pathlib import Path
from typing import Any
from dataclasses import asdict

import numpy as np
import pandas as pd

from tqdm.autonotebook import tqdm
import papermill as pm

from recsys4daos.utils.notebooks import run_dao_notebook, DaoToRun

import paths

In [None]:
all_notebooks = Path('.').glob('*.ipynb')
run_notebooks = sorted(p for p in all_notebooks if re.match(r'\d+_', p.name))
run_notebooks

In [None]:
OUTPUT_PATH: str = '../nbout/'
EXECUTION_ID: str = '2024-09-24T10:00'

First, we define which DAOs we want to run the analysis in, and which of them will the recommenders be run.

In [None]:
ALL_DAOS: list[DaoToRun] = [
    DaoToRun(
        org_name='Decentraland',
        splits_freq='W-THU',
        run_until_nb=99,
        last_fold_date='2023-07-13',
    ),
    DaoToRun(
        org_name='HausDao Warcamp',
        run_until_nb=10,
        comment='There are no ten succesive folds with open proposals',
    ),
    DaoToRun(
        org_name='PancakeSwap',
        splits_freq='3d',
        last_fold_date='2023-06-27',
        run_until_nb=99,
        extra_hparams={
            'MAX_BATCH_SIZE': 9, # 512
        },
    ),
    DaoToRun(
        org_name='Balancer',
        run_until_nb=19,
        splits_freq='3d',
        comment='There are no ten succesive folds with open proposals, see 10_baseline.ipynb',
    ),
    DaoToRun(
        org_name='DEAD FoundationsDAO',
        run_until_nb=99,
        splits_freq='2d',
        last_fold_date='2021-11-28',
        last_folds=20,
        extra_hparams={
            'MAX_BATCH_SIZE': 9, # 512
            'GPUS': 32,
        },
    ),
    DaoToRun(
        org_name='MetaCartel - MetaCartel Ventures',
        run_until_nb=99,
        splits_freq='W-THU',
        last_fold_date="2022-01-06",
    ),
    # Literally two users
    # DaoToRun(
    #     org_name='DAOSquare Grants',
    #     # Wont run more because it has about 1vpp
    #     # and worked for about one week
    #     run_until_nb=10,
    #     splits_freq='W-FRI',
    #     last_fold_date='2021-07-02',
    # ),
    DaoToRun(
        org_name='Genesis Alpha',
        run_until_nb=99,
        last_fold_date='2019-12-04',
    ),
    DaoToRun(
        org_name='NFTX',
        run_until_nb=1,
        splits_freq='W-THU',
        last_fold_date="2021-08-26",
        comment="Some proposals have votes after they were closed",
    ),
    DaoToRun(
        org_name='HUWA-DAO',
        run_until_nb=99,
        splits_freq='2d',
        last_fold_date="2021-11-13",
        last_folds=6,
    ),
    DaoToRun(
        org_name='Index Coop',
        run_until_nb=99,
        splits_freq='W-THU',
        last_fold_date='2023-07-13',
    ),
    DaoToRun(
        org_name='Lido',
        run_until_nb=99,
        splits_freq='W-WED',
        last_fold_date="2022-07-06",
        comment="It is not in the top 20 DAOs and it has just 223 proposals. Will not run GNN on it.",
    ),
    # Too few users
    # DaoToRun(
    #     org_name='Plaza',
    #     run_until_nb=19,
    #     splits_freq='3d',
    #     last_folds=20,
    #     last_fold_date="2022-06-29",
    # ),
    DaoToRun(
        org_name='SharkDAO',
        run_until_nb=99,
        splits_freq='3d',
        last_folds=10,
        last_fold_date="2022-04-27", 
    ),
    DaoToRun(
        org_name='dOrg',
        run_until_nb=99,
        splits_freq='2d',
        last_folds=10,
        last_fold_date="2022-02-18",
    ),
    DaoToRun(
        org_name='dxDAO - xDXdao',
        run_until_nb=99,
        splits_freq='W-THU',
        last_fold_date="2022-05-05",
    ),
    DaoToRun(
        org_name='Bancor',
        run_until_nb=10,
        splits_freq="3d",
        last_fold_date="2022-01-26",
        comment="I could not find any folds with enough proposals in test",
    ),
    DaoToRun(
        org_name='Raid Guild',
        run_until_nb=10,
        splits_freq='W-THU',
        last_fold_date="2021-12-30",
        comment="I could not find any folds with enough proposals in test",
    ),
    # Not enough proposals
    DaoToRun(
        org_name='JuiceboxDAO',
        run_until_nb=1,
    ),
    DaoToRun(
        org_name='Frax',
        run_until_nb=99,
        splits_freq='5d',
        last_fold_date='2022-05-26',
    ),
    DaoToRun(
        org_name='Bent Finance',
        run_until_nb=10,
        last_fold_date='2023-04-30',
        splits_freq='2d',
    ),
    DaoToRun(
        org_name='Aave - Aavegotchi',
        run_until_nb=99,
        splits_freq='5d',
        last_fold_date='2023-05-01',
        extra_hparams={
            'WINDOW_SIZES': ['1d', '7d', '14d'], # TODO: Set to something that makes sense
            'PLN_MODEL_NAMES': ['all-MiniLM-L12-v2'],
        },
    ),
    DaoToRun(
        org_name='Aura Finance',
        run_until_nb=99,
        last_fold_date='2023-07-14',
        splits_freq='7d',
        extra_hparams={
            'MIN_BATCH_SIZE': 4, # 16
        }
    ),
    DaoToRun(
        org_name='Magic Square',
        run_until_nb=99,
        last_fold_date='2023-07-17',
        splits_freq='7d',
        extra_hparams={
            'MIN_BATCH_SIZE': 4, # 16
        }
    ),
    DaoToRun(
        org_name='Good Morning News',
        run_until_nb=10,
        comment="""
        A strange DAO used as a news portal. A proposal was created every day with four
        interesting links or headlines. People voted on them, but I still don't know if 
        it was a kind of "like", or if the winning news were fully developed the next
        day.
        """
    ),
]

paths.save_daos_to_run(ALL_DAOS)
dfd = pd.DataFrame(map(asdict, ALL_DAOS))
# dfd['nprops'] = dfd['org_name'].apply(lambda n: len(paths.load_proposals(n)))
dfd['nprops'] = dfd['org_name'].apply(lambda n: paths.load_votes(n)['proposal'].nunique())
dfd['nvoters'] = dfd['org_name'].apply(lambda n: paths.load_votes(n)['voter'].nunique())
dfd = dfd.sort_values('org_name', key=lambda c: c.str.lower())
dfd.style.background_gradient(cmap='YlGnBu', subset=['run_until_nb', 'nprops', 'nvoters'])

In [None]:
def run_dao_with_consts(nb: Path, dao: DaoToRun):
    run_dao_notebook(
        nb,
        OUTPUT_PATH,
        EXECUTION_ID,
        EXTRA_HPARAMS=dao.extra_hparams,
        ORG_NAME=dao.org_name,
        SPLITS_FREQ=dao.splits_freq,
        LAST_FOLDS=dao.last_folds,
        SPLITS_NORMALIZE=dao.splits_normalize,
        LAST_FOLD_DATE_STR=dao.last_fold_date,
    )

We want to run the explore notebook before anything else, and then show in a table a kind of meta-analysis

In [None]:
# Run every "explore"
for dao in (b1 := tqdm(ALL_DAOS, desc='org')):
    run_dao_with_consts(Path(".")/"01_explore.ipynb", dao)

In [None]:
daos_info = paths.load_daos_data()
# print(daos_info)
dfi = pd.DataFrame.from_dict(daos_info, orient='index')
_table = (dfd
    .set_index('org_name')
    .join(dfi)
    .sort_values('nprops', ascending=False)
    .drop(columns=['comment', 'splits_normalize', 'last_folds', 'dfv_last_proposal', 'max_open_freq', 'max_proposals_ws'])
)
def _color_row(row):
    c = ''
    if row['run_until_nb'] >= 99:
        c = 'background-color: green'
    elif row['run_until_nb'] >= 20:
        c = 'background-color: orange'
    elif row['run_until_nb'] >= 10:
        c = 'background-color: yellow'
    return [c] * len(row)
    
# _style = _table.style.apply(_color_row, axis=1)
_style = (_table
    .style
    .background_gradient(cmap='YlGnBu', subset=['run_until_nb', 'nprops', 'nvoters'])
    .format(precision=2, thousands=' ')    
)
_style

In [None]:
_table = (dfd
    .set_index('org_name')
    .join(dfi)
    # .sort_values('nprops', ascending=False)
    .query('run_until_nb == 99')
    .drop(columns=['comment', 'splits_normalize', 'last_folds', 'dfv_last_proposal', 'max_open_freq', 'max_proposals_ws'])
).copy()
_table['mot_round'] = _table['median_open_time'].apply(lambda x: "{:.1f} days".format(pd.Timedelta(x).round('12h').total_seconds() / (24*3600)).replace(".0", ""))

_table = _table[['mot_round', 'splits_freq', 'last_fold_date', 'folds_avg_open_proposals']]
display(_table)
print(_table.to_latex())

In [None]:
def _nb_le(nb: Path, last_nb: int):
    return int(re.match(r'(\d+)_', nb.name)[1]) <= last_nb

def _filter_nb_le(nbs: list[Path], last_nb: int):
    return list(filter(lambda x: _nb_le(x, last_nb), nbs))

print(_filter_nb_le(run_notebooks, 11))

In [None]:
MAX_RUN_UNTIL = 100

for dao in (b1 := tqdm(ALL_DAOS, desc='org')):
    b1.set_postfix_str(dao.org_name)
    for nb in (b2 := tqdm(_filter_nb_le(run_notebooks, min(dao.run_until_nb, MAX_RUN_UNTIL)), desc='notebook', leave=False)):
        b2.set_postfix_str(nb)
        run_dao_with_consts(nb, dao)

In [None]:
import requests; requests.post("https://ntfy.sh/grasia_notebooks", data="Finished running all notebooks")