# Make backtesting commands

Generate the CLI commands that I need to perform backtesting on a series of months.

In [None]:
import os
import pathlib
import pandas as pd

In [None]:
DATA_DIR = pathlib.Path(os.getenv('DATA_DIR'))

In [None]:
pd.date_range('2020-08-01', '2021-12-01', freq='1MS')

Model version 7 was deployed in July of 2019, so if we begin backtesting on august 2020 we have a full year of training data.

In [None]:
training_lengths = ['30D', '90D', '180D', '270D', '365D']
dates = pd.date_range('2020-08-01', '2021-12-01', freq='1MS')
for training_length in training_lengths:
    for i in range(len(dates) - 1):
        train_begin = str(dates[i] - pd.to_timedelta(training_length))[:10]
        train_end = str(dates[i])[:10]
        val_begin = str(dates[i])[:10]
        val_end = str(dates[i+1])[:10]
        print(
            f'smc01_train experiment=emos_gdps_metar '
            f'experiment/dataset=gdps_metar_bootstrap '
            f'experiment.timeout_min=120 '
            f'logging.mlflow.run_name=emos_backtest_bootstrap_{training_length} '
            f'experiment.split.train_begin={train_begin[:10]} '
            f'experiment.split.train_end={train_end} '
            f'experiment.split.val_begin={val_begin} '
            f'experiment.split.val_end={val_end} '
            f'hydra/launcher=slurm '
            f'-m &'
        )


In [None]:
#training_lengths = ['30D', '90D', '180D', '270D', '365D']
training_lengths = ['30D']

dates = pd.date_range('2020-08-01', '2021-12-01', freq='1MS')
for training_length in training_lengths:
    for i in range(len(dates) - 1):
        train_begin = str(dates[i] - pd.to_timedelta(training_length))[:10]
        train_end = str(dates[i])[:10]
        val_begin = str(dates[i])[:10]
        val_end = str(dates[i+1])[:10]
        print(
            f'smc01_train experiment=attention_gdps_metar_finetune_backtest '
            f'experiment/split=train_7_val_7 '
            f'experiment/dataset=gdps_metar_step,gdps_metar_step_bootstrap '
            f'logging.mlflow.run_name=attention_finetune_backtest_{training_length} '
            f'experiment.split.train_begin={train_begin[:10]} '
            f'experiment.split.train_end={train_end} '
            f'experiment.split.val_begin={val_begin} '
            f'experiment.split.val_end={val_end} '
            f'experiment.freeze_upper=True,False '
            f'hydra.launcher.mem_gb=30 hydra/launcher=slurm '
            f'-m &'
        )

In [None]:
dates = pd.date_range('2020-08-01', '2021-12-01', freq='1MS')
for i in range(len(dates) - 1):
    train_begin = str(dates[i] - pd.to_timedelta(training_length))[:10]
    train_end = str(dates[i])[:10]
    val_begin = str(dates[i])[:10]
    val_end = str(dates[i+1])[:10]
    print(
        f'smc01_validate '
        f'experiment=raw_model_gdps_metar '
        f'experiment/dataset=gdps_metar_bootstrap '
        f'experiment.timeout_min=60 '
        f'logging.mlflow.run_name=raw_gdps_backtest_bootstrap '
        f'experiment.split.val_begin={val_begin} '
        f'experiment.split.val_end={val_end} '
        f'&&'
    )


# Progressive finetuning

In [None]:
training_lengths = ['30D', '60D', '90D', '180D', '270D', '365D']
train_end = pd.to_datetime('2020-07-01')
for max_epochs in [1, 2]:
    for training_length in training_lengths:
        train_begin = str(train_end - pd.to_timedelta(training_length))[:10]
        print(
            f'smc01_train '
            f'experiment=attention_gdps_metar_finetune_progressive '
            f'experiment.max_epochs={max_epochs} '
            f'logging.mlflow.run_name=attention_gdps_metar_finetune_progressive_{training_length}_{max_epochs}ep '
            f'experiment.timeout_min=1200 experiment.split.train_begin={train_begin[:10]} '
            f'hydra/launcher=slurm '
            f'-m &'
        )


In [None]:
training_lengths = ['30D', '60D', '90D', '180D', '270D', '365D']
train_end = pd.to_datetime('2020-07-01')
for training_length in training_lengths:
    train_begin = str(train_end - pd.to_timedelta(training_length))[:10]
    print(
        f'smc01_train '
        f'experiment=emos_gdps_metar_progressive '
        f'experiment.max_epochs=1 '
        f'experiment/dataset=gdps_metar_bootstrap '
        f'logging.mlflow.run_name=emosgdps_metar_progressive_{training_length}_1ep '
        f'experiment.timeout_min=1200 '
        f'experiment.split.train_begin={train_begin[:10]} '
        f'hydra/launcher=slurm '
        f'-m &'
    )


## Progressive with val

In [None]:
training_lengths = [30, 60, 90, 180, 270, 365, 700]
test_begin = pd.to_datetime('2020-12-01')
for training_length in training_lengths:
    val_end = str(test_begin)[:10]
    val_begin = str(test_begin - pd.to_timedelta(int(training_length * 0.2), unit='D'))[:10]
    train_end = str(val_begin)[:10]
    train_begin = str(test_begin - pd.to_timedelta(int(training_length), unit='D'))[:10]
    print(
        f'smc01_train '
        f'experiment=attention_gdps_metar_finetune '
        f'logging.mlflow.run_name=attention_gdps_metar_finetune_progressive_{training_length}_wval '
        f'experiment.freeze_upper=True experiment.timeout_min=1200 '
        f'experiment.split.train_begin={train_begin} '
        f'experiment.split.train_end={train_end} '
        f'experiment.split.val_begin={val_begin} '
        f'experiment.split.val_end={val_end} '
        f'hydra/launcher=slurm '
        f'-m &'
    )


In [None]:
training_lengths = [30, 60, 90, 180, 270, 365, 700]
test_begin = pd.to_datetime('2020-12-01')
for training_length in training_lengths:
    val_end = str(test_begin)[:10]
    val_begin = str(test_begin - pd.to_timedelta(int(training_length * 0.2), unit='D'))[:10]
    train_end = str(val_begin)[:10]
    train_begin = str(test_begin - pd.to_timedelta(int(training_length), unit='D'))[:10]
    print(
        f'smc01_train '
        f'experiment=emos_gdps_metar_progressive '
        f'experiment/dataset=gdps_metar_bootstrap '
        f'logging.mlflow.run_name=emosgdps_metar_progressive_{training_length}_wval '
        f'experiment.timeout_min=1200 experiment.split.train_begin={train_begin} '
        f'experiment.split.train_end={train_end} '
        f'experiment.split.val_begin={val_begin} '
        f'experiment.split.val_end={val_end} '
        f'hydra/launcher=slurm '
        f'-m &'
    )


## Re evaluations

In [None]:
tags = {
    30: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/23-02-34/4846114',
    60: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/23-02-34/4846110',
    90: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/23-07-57/4846117',
    180: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/23-02-34/4846112',
    270: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/23-02-34/4846113',
    365: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/23-02-34/4846111',
}

In [None]:
for duration, checkpoint in tags.items():
    print(
        f'smc01_validate '
        f'experiment=emos_gdps_metar_progressive '
        f'checkpoint_path={checkpoint} '
        f'logging.mlflow.experiment_name=SMC01_CMOS_REVAL '
        f'logging.mlflow.run_name=emos_gdps_metar_progressive_wval_{duration}_1feature'
    )

### Attention

In [None]:
tags = {
    30: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/19-22-25/4845695',
    60: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/19-22-25/4845694',
    90: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/19-22-25/4845693',
    180: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/19-22-25/4845691',
    270: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/19-22-25/4845690',
    365: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/19-22-25/4845692',
    700: DATA_DIR / 'runs/postprocessing/multirun/2022-05-29/11-17-30/4847953',
}

In [None]:
for duration, checkpoint in tags.items():
    print(
        f'smc01_validate '
        f'experiment=attention_gdps_metar_finetune_progressive '
        f'checkpoint_path={checkpoint} '
        f'logging.mlflow.experiment_name=SMC01_CMOS_REVAL '
        f'logging.mlflow.run_name=attention_gdps_metar_finetune_progressive_{duration}'
    )

### Attention with freeze upper

In [None]:
tags = {
    30: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/20-15-18/4845783',
    60: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/20-12-07/4845779',
    90: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/20-15-18/4845784',
    180: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/20-15-18/4845786',
    270: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/20-12-07/4845777',
    365: DATA_DIR / 'runs/postprocessing/multirun/2022-05-28/20-12-07/4845780',
}

In [None]:
for duration, checkpoint in tags.items():
    print(
        f'smc01_validate '
        f'experiment=attention_gdps_metar_finetune_progressive '
        f'checkpoint_path={checkpoint} '
        f'logging.mlflow.experiment_name=SMC01_CMOS_REVAL '
        f'logging.mlflow.run_name=attention_gdps_metar_finetune_progressive_{duration}_freeze_upper'
    )

### Attention with epoch limit

In [None]:
runs = [
    DATA_DIR / 'runs/postprocessing/multirun/2022-05-29/11-41-06/4847998',
    DATA_DIR / 'runs/postprocessing/multirun/2022-05-29/11-40-55/4847997',
    DATA_DIR / 'runs/postprocessing/multirun/2022-05-29/11-40-34/4847996',
]

In [None]:
for r in runs:
    print(
        f'smc01_validate '
        f'experiment=attention_gdps_metar_finetune_progressive '
        f'checkpoint_path={r} '
        f'logging.mlflow.experiment_name=SMC01_CMOS_REVAL '
        f'logging.mlflow.run_name=attention_gdps_metar_finetune_progressive_30_freeze_upper'
    )