In [1]:
from pathlib import Path
from IPython.display import Markdown
import re

import papermill as pm

from src import paths

import datetime as dt

In [2]:
# Parameters that will be passed to the other notebooks

ORG_NAME = 'Decentraland'
ORG_PLATFORM = 'snapshot'
SPLITS_FREQ: str = 'W-THU' # split weekly
SPLITS_NORMALIZE = True # Wether or not to move everything to 00:00
LAST_SPLITS = 10 # Use just last 10 splits
USE_ORG_NAMES = False # See load_pandas_df

# Parameters of this notebook
DO_TRAIN = True
EXECUTION_ID: bool = dt.datetime.utcnow().isoformat() # Used to see wether to skip completed notebooks
CUTOFF_DATE_STR: str = None # Cutoff date for the dataset

In [3]:
# Display the diagram from the README.md

README = Path('./README.md')

with open(README, 'r') as f:
    diagrams = re.findall(r'```mermaid[\s\S]*```', f.read())

Markdown("\n".join(diagrams))

```mermaid
graph LR
10_baseline --> 20_results

10_baseline --> 11_pln-tune -.-> 12_hybrid
07_microsoft_tuning --> 09_analyze_results --> 12_hybrid
12_hybrid --> 20_results
10_baseline --> 09_analyze_results
```

## Define some common things

In [37]:
import nbformat

def is_increasing(l):
    return (not l) or all(x<y for x,y in zip(l, l[1:]))

def isCompleted(fname):
    nb = nbformat.read(fname, as_version=4)
    execution_counts = [c['execution_count'] for c in nb['cells'] if c['cell_type'] == 'code' and c['source'].strip()]
    
    return all((x is not None for x in execution_counts)) and is_increasing(execution_counts)

# print(isCompleted('nbout/Decentraland/04c_dao-census-onedao.ipynb'))
# print(isCompleted('nbout/Balancer/10_baseline_mp.ipynb'))

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]
True


In [20]:
def getOldExec(fname):
    nb = nbformat.read(fname, as_version=4)
    return nb.metadata['papermill']['parameters'].get('EXECUTION_ID')

# isSameExec(fname = 'nbout/Decentraland/04c_dao-census-onedao.ipynb')

In [21]:
def run_notebook(name):
    assert Path(name).exists(), f"No existe el fichero {name}"
    
    outpath = Path(f'./nbout/{ORG_NAME.replace(" / ", "_")}')
    outpath.mkdir(parents=True, exist_ok=True)
    outfile = outpath/name

    params = pm.inspect_notebook(name)
    for p in ['ORG_NAME', 'ORG_PLATFORM', 'SPLITS_FREQ', 'SPLITS_NORMALIZE', 'LAST_SPLITS', 'USE_ORG_NAMES', 'CUTOFF_DATE_STR']:
        assert p in params, f'{p} is not in notebook params'

    if outfile.exists() and EXECUTION_ID:
        oldExec = getOldExec(outfile)
        
        if oldExec == EXECUTION_ID and isCompleted(outfile):
            print(f"Skipping {outfile} with EXECUTION_ID {EXECUTION_ID}")
            return
        elif oldExec != EXECUTION_ID:
            print(f"Different exec, re-running ({oldExec} != {EXECUTION_ID})")
        else:
            print(f"Was not complete, re-running")
    
    pm.execute_notebook(
        name, 
        outfile,
        autosave_cell_every=30,
        parameters=dict(
            ORG_NAME = ORG_NAME,
            ORG_PLATFORM = ORG_PLATFORM,
            SPLITS_FREQ = SPLITS_FREQ,
            SPLITS_NORMALIZE = SPLITS_NORMALIZE,
            LAST_SPLITS = LAST_SPLITS,
            USE_ORG_NAMES = USE_ORG_NAMES,
            EXECUTION_ID = EXECUTION_ID,
            CUTOFF_DATE_STR = CUTOFF_DATE_STR,
        ),
    )
# run_notebook('11_pln-tune.ipynb', 'snapshot', 'Decentraland')

## Run data from dao

In [22]:
#papermill_description=dao-census
run_notebook('04c_dao-census-onedao.ipynb')

Skipping nbout/Decentraland/04c_dao-census-onedao.ipynb with EXECUTION_ID 2024-03-01T18:59:51.742381


## Run baseline

In [23]:
#papermill_description=baseline
run_notebook('10_baseline_mp.ipynb')

Passed unknown parameter: EXECUTION_ID


Was not complete, re-running


Executing:   0%|          | 0/54 [00:00<?, ?cell/s]

In [24]:
#papermill_description=check_train
import sys
if not DO_TRAIN: 
    sys.exit(0)

## Run pln_tune

In [25]:
#papermill_description=pln_tune
# if paths.pln_mdf(ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE).exists():
#     print(f"Skipping {ORG_NAME} because it has been done")    
run_notebook('11_pln-tune.ipynb')

Passed unknown parameter: EXECUTION_ID


Was not complete, re-running


Executing:   0%|          | 0/59 [00:00<?, ?cell/s]

2024-03-01 19:05:05.043528: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-01 19:05:05.043550: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-01 19:05:05.043566: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Run microsoft_tuning

In [None]:
#papermill_description=microsoft
run_notebook('07_microsoft_tuning.ipynb')

## Run analyze_results

In [None]:
#papermill_description=analyze
nb = run_notebook('09_analyze_results.ipynb')

## Run hybrid

In [None]:
#papermill_description=hybrid
run_notebook('12_hybrid.ipynb')

## Run results

In [None]:
#papermill_description=results
run_notebook('20_results.ipynb')