In [None]:
!pip3 install -r code/requirements.txt

In [None]:
import os
import sys
module_path = "/home/ec2-user/SageMaker/code"
if module_path not in sys.path:
    sys.path.append(module_path)
    
from sagemaker.mxnet import MXNet
from sagemaker import get_execution_role

role = get_execution_role()

In [None]:
!aws s3 cp --recursive /home/ec2-user/.mxnet/gluon-ts/datasets/  s3://causal-generalization/gluon-datasets/

In [None]:
from gluonts.dataset.repository.datasets import get_dataset, dataset_recipes

ds_e = get_dataset("electricity", regenerate=False)

In [None]:
import numpy as np
from gluonts.dataset import util

def report_data(ds):
    num_examples = 0
    len_examples = []
    for entry in iter(ds):
        num_examples += 1
        train_series = util.to_pandas(entry)
        len_examples.append(len(train_series))
    print(f"dataset has {num_examples} timeseries examples with on average {np.mean(len_examples):.2f} many time steps.")
    
report_data(ds_e.train)
report_data(ds_e.test)

In [None]:
ds_t = get_dataset("traffic", regenerate=False)
report_data(ds_t.train)
report_data(ds_t.test)

In [None]:
ds_m4 = get_dataset("m4_hourly", regenerate=False)
report_data(ds_m4.train)
report_data(ds_m4.test)

# test local

In [None]:
import eval_model_dataset as try_gluon
import importlib
importlib.reload(try_gluon)

In [None]:
try_gluon.train_and_eval('simple_ff', '/home/ec2-user/.mxnet/gluon-ts/datasets/m4_hourly/', 'results/simple_ff_run_1_test')

In [None]:
!cat results/simple_ff_run_1_test/results.json

# unit test

In [None]:
# We need to put the tests directory first in the path before it finds another tests module
sys.path = [module_path + "/tests"] + sys.path  
import test_eval_model_dataset

In [None]:
# Run this if you edited the module
importlib.reload(test_eval_model_dataset)

test_eval_model_dataset.test_interventional_dataset()

# launch remote

In [None]:
!./scripts/launch_remote_jobs.sh

# Analyze results

In [None]:
!mkdir -p gluon-experiments

In [None]:
!aws s3 cp --recursive --exclude "*" --include "*model.tar.gz" s3://causal-generalization/gluon-experiments gluon-experiments/

In [None]:
import tarfile
import glob
import json

ESTIMATORS = ["deep_ar", "wavenet", "transformer"]
NUM_REPITITION = 5

all_results = {}
for dataset in [
    "electricity", 
    "traffic", #"m4_hourly"
               ]:
    all_results[dataset] = {}
    version = "v2" if dataset == "electricity" else "v3"
    for estimator_name in ESTIMATORS:
        all_results[dataset][estimator_name] = {}
        for iteration in range(NUM_REPITITION):
            files = glob.glob(f"gluon-experiments/{version}{dataset}/{estimator_name}/runid-{iteration}/*/output/model.tar.gz")
            print(files)
            new_dir = files[0].split('model.tar.gz')[0]
            print(new_dir)
            tar = tarfile.open(files[0], "r:gz")
            tar.extractall(path=new_dir)
            tar.close()
            with open(f"{new_dir}/results.json", 'r') as file:
                all_results[dataset][estimator_name][iteration] = json.load(file)
            

In [None]:
from collections import defaultdict
# aggregate runs
agg_results = {}
for dataset in [
    "electricity", "traffic", #"m4_hourly"
               ]:
    agg_results[dataset] = {}
    for estimator_name in ESTIMATORS:
        agg_results[dataset][estimator_name] = defaultdict(list)
        for iteration in range(NUM_REPITITION):
            res = all_results[dataset][estimator_name][iteration]
            errors = res.keys()
            for (k, v) in res.items():
                agg_results[dataset][estimator_name][k].append(v)


In [None]:
import numpy as np
agg_mean = {}
combine_for_predictors = ["RMSE", "80_CI_width_", "80_CI_width_intervened1_pred", "80_CI_width_intervened2_pred"]
do_not_combine = ["RMSE1vs2", "interventional1_RMSE_pred1vs2", "interventional2_RMSE_pred1vs2"]
for dataset in [
    "electricity", "traffic", #"m4_hourly"
               ]:
    agg_mean[dataset] = {}
    for estimator_name in ESTIMATORS:
        agg_mean[dataset][estimator_name] = {}
        for k in do_not_combine:
            v = agg_results[dataset][estimator_name][k]
            agg_mean[dataset][estimator_name][k] = f"{np.mean(v):.3f} +/- {np.std(v):.3f}"
        for k in combine_for_predictors:
            v1 = agg_results[dataset][estimator_name][k + "1"]
            v2 = agg_results[dataset][estimator_name][k + "2"]
            v = v1 + v2
            agg_mean[dataset][estimator_name][k] = f"{np.mean(v):.3f} +/- {np.std(v):.3f}"

In [None]:
import pandas as pd

results_electricity = pd.DataFrame(agg_mean["electricity"])
results_traffic = pd.DataFrame(agg_mean["traffic"])

In [None]:
results_traffic

In [None]:
results_electricity

 ## For latex 

In [None]:
for model in results_electricity.columns:
    print(f"{model} & {results_electricity[model]['80_CI_width_']} & {results_electricity[model]['80_CI_width_intervened1_pred']} & {results_electricity[model]['80_CI_width_intervened2_pred']} \\ \\")

In [None]:
results_traffic = pd.DataFrame(agg_mean["traffic"])
for model in results_traffic.columns:
    print(f"& {results_traffic[model]['80_CI_width_']} & {results_traffic[model]['80_CI_width_intervened1_pred']} & {results_traffic[model]['80_CI_width_intervened2_pred']}")
          

# Graphs

In [None]:
import numpy as np
agg_mean = {}
agg_std = {}
combine_for_predictors = ["RMSE", "80_CI_width_", "80_CI_width_intervened1_pred", "80_CI_width_intervened2_pred"]
do_not_combine = ["RMSE1vs2", "interventional1_RMSE_pred1vs2", "interventional2_RMSE_pred1vs2"]
for dataset in [
    "electricity", "traffic"]:
    agg_mean[dataset] = {}
    agg_std[dataset] = {}
    for estimator_name in ESTIMATORS:
        agg_mean[dataset][estimator_name] = {}
        agg_std[dataset][estimator_name] = {}
        for k in do_not_combine + combine_for_predictors:
            if k in combine_for_predictors:
                v1 = agg_results[dataset][estimator_name][k + "1"]
                v2 = agg_results[dataset][estimator_name][k + "2"]
                v = v1 + v2
            else:
                v = agg_results[dataset][estimator_name][k]
            agg_mean[dataset][estimator_name][k] = np.mean(v)
            agg_std[dataset][estimator_name][k] = np.std(v)

In [None]:
def plot_dataset(dataset, agg_mean, agg_std):
    SMALL_SIZE = 11
    MEDIUM_SIZE = 13
    BIGGER_SIZE = 15

    plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
    plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
    plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
    plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
    plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
    plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
    plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

    barWidth = 0.3

    positions = np.arange(4)
    for i, estimator_name in enumerate(["deep_ar", "transformer", "wavenet"]):
        names = []
        vals = []
        err = []
        for k in ["RMSE", "RMSE1vs2", "interventional1_RMSE_pred1vs2", "interventional2_RMSE_pred1vs2"]:
            names.append(k)
            vals.append(agg_mean[dataset][estimator_name][k])
            err.append(agg_std[dataset][estimator_name][k])
        positions = [x + barWidth for x in positions]
        ax = plt.bar(positions, vals, yerr=err, width = barWidth, label=estimator_name)

    # general layout
    plt.xticks([r + barWidth*2 for r in range(4)], ["RMSE", "Statistical\nDisagreement", "Causal\nDisagreement\nacross ts", "Causal\nDisagreement\nwithin ts"])
    plt.ylabel('score')
    plt.yscale('log')
    plt.legend()
    plt.title(dataset)
    # Show graphic
    plt.show()

In [None]:
plot_dataset("electricity", agg_mean, agg_std)

In [None]:
plot_dataset("traffic", agg_mean, agg_std)

# Appendix

### Inspect results per run

In [None]:
import numpy as np

def run(idx):
    agg_run0 = {}

    combine_for_predictors = ["RMSE", "80_CI_width_", "80_CI_width_intervened1_pred", "80_CI_width_intervened2_pred"]
    do_not_combine = ["RMSE1vs2", "interventional1_RMSE_pred1vs2", "interventional2_RMSE_pred1vs2"]
    for dataset in [
        "electricity", "traffic", #"m4_hourly"
                   ]:
        agg_run0[dataset] = {}
        for estimator_name in ESTIMATORS:
            agg_run0[dataset][estimator_name] = {}
            for k in do_not_combine:
                v = agg_results[dataset][estimator_name][k]
                agg_run0[dataset][estimator_name][k] = f"{v[idx]:.1f} "
            for k in combine_for_predictors:
                v1 = agg_results[dataset][estimator_name][k + "1"]
                v2 = agg_results[dataset][estimator_name][k + "2"]
                v = [v1[idx]] + [v2[idx]]
                agg_run0[dataset][estimator_name][k] = f"{np.mean(v):.1f} +/- {np.std(v):.1f}"
    return agg_run0

In [None]:
agg_run0 = run(0)
pd.DataFrame(agg_run0["traffic"])

In [None]:
agg_run1 = run(1)
pd.DataFrame(agg_run1["traffic"])

In [None]:
pd.DataFrame(agg_mean["traffic"])

In [None]:
np.arange(10).reshape([2,5])