In [1]:
import os
os.chdir("../..")

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import itertools
import pandas as pd

from scripts.data_preparation import merge_station_data 
from scripts.models.cheng_model import fit_and_predict
from scripts.metrics import (
    macro_averaged_mean_squared_error,
    macro_averaged_mean_absolute_error
)

from fastai.tabular.all import * 

from torch.nn import (
    ReLU,
    LeakyReLU,
    Sigmoid
)

  return torch._C._cuda_getDeviceCount() > 0


In [16]:
def do_cv(parameters, data_subset: str='2015-01-01', val_split: str='2010-01-01'):
    """Perform cross validation."""
    scores = dict()
    for comb in parameters:
        print(str(comb) + ":")
        y_pred, y_true, _= fit_and_predict(
            data_subset=data_subset,
            test_split=val_split,
            verbose=False,
            **comb
        )
        mamse = macro_averaged_mean_squared_error(
                y_pred=y_pred,
                y_true=y_true
            )
        scores[mamse] = comb
        print(mamse)
    min_mamse = min(list(scores.keys()))
    best_params = scores[min_mamse]

    return mamse, best_params

In [37]:
parameters = {
    'layers': [
        [40, 40],
        [100, 40],
        [100, 100]
    ],
    'lr': [
        1e-2, 1e-3
    ],
    'actfn': [
        ReLU(),
        LeakyReLU()
    ],
    'bs': [
        32, 64, 128
    ],
    'epochs': [5, 10, 15],
    'engineer_date': [False, True]
}

keys, values = zip(*parameters.items())
experiments = [dict(zip(keys,v)) for v in itertools.product(*values)]

In [38]:
len(experiments)

216

Select at random 100 experiments

In [41]:
import random
random.seed(42)
combs = random.sample(experiments, 100)

In [42]:
mamse, best_params = do_cv(combs)

{'layers': [100, 100], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 5, 'engineer_date': True}:


1535


0.17550016937282686
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 15, 'engineer_date': False}:


1535


0.1826119402184168
{'layers': [40, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 64, 'epochs': 5, 'engineer_date': False}:


1535


0.19148934961029615
{'layers': [100, 100], 'lr': 0.001, 'actfn': ReLU(), 'bs': 64, 'epochs': 10, 'engineer_date': True}:


1535


0.1463844974885795
{'layers': [40, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 15, 'engineer_date': False}:


1535


0.32436575681457325
{'layers': [40, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 10, 'engineer_date': False}:


1535


0.32109073417461675
{'layers': [40, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 10, 'engineer_date': True}:


1535


0.1694573986363146
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 15, 'engineer_date': True}:


1535


0.07458472436621293
{'layers': [100, 100], 'lr': 0.001, 'actfn': ReLU(), 'bs': 64, 'epochs': 10, 'engineer_date': False}:


1535


0.18673038782669882
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 10, 'engineer_date': False}:


1535


0.1910257354010087
{'layers': [100, 100], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 15, 'engineer_date': True}:


1535


0.06264653530125826
{'layers': [100, 100], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 10, 'engineer_date': False}:


1535


0.31574831422327826
{'layers': [100, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 5, 'engineer_date': True}:


1535


0.415831842052424
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 15, 'engineer_date': False}:


1535


0.18521412052281566
{'layers': [100, 100], 'lr': 0.01, 'actfn': ReLU(), 'bs': 64, 'epochs': 5, 'engineer_date': True}:


1535


0.17530134167474992
{'layers': [100, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 32, 'epochs': 5, 'engineer_date': False}:


1535


0.327324712783297
{'layers': [40, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 64, 'epochs': 10, 'engineer_date': False}:


1535


0.1825500645002165
{'layers': [40, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 64, 'epochs': 5, 'engineer_date': True}:


1535


0.17816980611299313
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 15, 'engineer_date': True}:


1535


0.0923664857226317
{'layers': [40, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 5, 'engineer_date': True}:


1535


0.3848126674093314
{'layers': [40, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 15, 'engineer_date': True}:


1535


0.0911605336336774
{'layers': [100, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 10, 'engineer_date': True}:


1535


0.10430076730963947
{'layers': [100, 100], 'lr': 0.01, 'actfn': ReLU(), 'bs': 64, 'epochs': 15, 'engineer_date': False}:


1535


0.17781804528927922
{'layers': [100, 100], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 10, 'engineer_date': True}:


1535


0.17439864597352203
{'layers': [100, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 15, 'engineer_date': True}:


1535


0.14695922846774764
{'layers': [40, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 128, 'epochs': 10, 'engineer_date': False}:


1535


0.3841924077264449
{'layers': [100, 100], 'lr': 0.001, 'actfn': ReLU(), 'bs': 32, 'epochs': 10, 'engineer_date': True}:


1535


0.10291754450768607
{'layers': [100, 100], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 15, 'engineer_date': False}:


1535


0.18658414278129304
{'layers': [100, 100], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 15, 'engineer_date': True}:


1535


0.07036184806828144
{'layers': [100, 100], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 15, 'engineer_date': True}:


1535


0.09516284963146776
{'layers': [100, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 15, 'engineer_date': True}:


1535


0.06549046702215447
{'layers': [40, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 10, 'engineer_date': False}:


1535


0.31860357811804546
{'layers': [100, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 64, 'epochs': 5, 'engineer_date': False}:


1535


0.3846523095877703
{'layers': [100, 100], 'lr': 0.01, 'actfn': ReLU(), 'bs': 64, 'epochs': 5, 'engineer_date': False}:


1535


0.19029231983989975
{'layers': [40, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 15, 'engineer_date': True}:


1535


0.16465848886656603
{'layers': [40, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 32, 'epochs': 5, 'engineer_date': True}:


1535


0.1794149695724058
{'layers': [40, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 32, 'epochs': 15, 'engineer_date': False}:


1535


0.18598136186680075
{'layers': [100, 100], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 15, 'engineer_date': False}:


1535


0.10987621148890228
{'layers': [100, 100], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 10, 'engineer_date': False}:


1535


0.19054520830028318
{'layers': [100, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 128, 'epochs': 10, 'engineer_date': True}:


1535


0.08467477392727779
{'layers': [100, 100], 'lr': 0.001, 'actfn': ReLU(), 'bs': 32, 'epochs': 5, 'engineer_date': True}:


1535


0.18717077181128822
{'layers': [40, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 32, 'epochs': 10, 'engineer_date': True}:


1535


0.1128243767694631
{'layers': [100, 100], 'lr': 0.001, 'actfn': ReLU(), 'bs': 128, 'epochs': 15, 'engineer_date': False}:


1535


0.18920881669469708
{'layers': [100, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 128, 'epochs': 10, 'engineer_date': False}:


1535


0.18025893602643933
{'layers': [100, 100], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 10, 'engineer_date': False}:


1535


0.186021309852952
{'layers': [100, 100], 'lr': 0.001, 'actfn': ReLU(), 'bs': 128, 'epochs': 15, 'engineer_date': True}:


1535


0.1439203765267214
{'layers': [100, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 5, 'engineer_date': True}:


1535


0.17213873829732307
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 5, 'engineer_date': False}:


1535


0.3151486797797919
{'layers': [100, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 5, 'engineer_date': True}:


1535


0.18700336004077436
{'layers': [100, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 128, 'epochs': 15, 'engineer_date': False}:


1535


0.17983155745534107
{'layers': [100, 100], 'lr': 0.001, 'actfn': ReLU(), 'bs': 128, 'epochs': 5, 'engineer_date': True}:


1535


0.34178238606506534
{'layers': [40, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 5, 'engineer_date': True}:


1535


0.4446063486711356
{'layers': [40, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 64, 'epochs': 15, 'engineer_date': True}:


1535


0.07813345583593784
{'layers': [100, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 64, 'epochs': 10, 'engineer_date': True}:


1535


0.16543315490647517
{'layers': [100, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 15, 'engineer_date': True}:


1535


0.08326089620470228
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 5, 'engineer_date': True}:


1535


0.17636291708790353
{'layers': [100, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 5, 'engineer_date': False}:


1535


0.2451275506172059
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 10, 'engineer_date': False}:


1535


0.1881923992382906
{'layers': [100, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 10, 'engineer_date': True}:


1535


0.18398884568711024
{'layers': [100, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 32, 'epochs': 10, 'engineer_date': True}:


1535


0.09946803175701477
{'layers': [100, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 10, 'engineer_date': False}:


1535


0.18521542301546465
{'layers': [100, 100], 'lr': 0.01, 'actfn': ReLU(), 'bs': 32, 'epochs': 10, 'engineer_date': True}:


1535


0.0900092033057361
{'layers': [40, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 128, 'epochs': 5, 'engineer_date': True}:


1535


0.5178232459912008
{'layers': [40, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 128, 'epochs': 15, 'engineer_date': True}:


1535


0.07647281185625399
{'layers': [100, 100], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 15, 'engineer_date': True}:


1535


0.07947542815909103
{'layers': [40, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 15, 'engineer_date': False}:


1535


0.19060206917787248
{'layers': [100, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 32, 'epochs': 10, 'engineer_date': False}:


1535


0.18772079445612297
{'layers': [100, 100], 'lr': 0.01, 'actfn': ReLU(), 'bs': 128, 'epochs': 10, 'engineer_date': False}:


1535


0.18279683075732484
{'layers': [100, 100], 'lr': 0.001, 'actfn': ReLU(), 'bs': 128, 'epochs': 10, 'engineer_date': True}:


1535


0.2344964007298642
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 5, 'engineer_date': True}:


1535


0.17774613988985694
{'layers': [100, 100], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 5, 'engineer_date': True}:


1535


0.17639283539283276
{'layers': [100, 100], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 5, 'engineer_date': True}:


1535


0.17084861201114815
{'layers': [100, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 64, 'epochs': 10, 'engineer_date': False}:


1535


0.1954866507674755
{'layers': [100, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 10, 'engineer_date': True}:


1535


0.1014661286542724
{'layers': [40, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 32, 'epochs': 15, 'engineer_date': True}:


1535


0.0875276279667319
{'layers': [100, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 15, 'engineer_date': False}:


1535


0.19524730920629121
{'layers': [100, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 5, 'engineer_date': False}:


1535


0.19239595278947094
{'layers': [40, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 128, 'epochs': 15, 'engineer_date': True}:


1535


0.35687730753402724
{'layers': [40, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 10, 'engineer_date': False}:


1535


0.3824592924703827
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 5, 'engineer_date': False}:


1535


0.1941290680455332
{'layers': [40, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 64, 'epochs': 5, 'engineer_date': True}:


1535


0.33105572980663206
{'layers': [100, 100], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 5, 'engineer_date': False}:


1535


0.4009655470693525
{'layers': [100, 100], 'lr': 0.01, 'actfn': ReLU(), 'bs': 128, 'epochs': 5, 'engineer_date': True}:


1535


0.1758305439501997
{'layers': [100, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 64, 'epochs': 15, 'engineer_date': False}:


1535


0.18337787906975292
{'layers': [100, 100], 'lr': 0.01, 'actfn': ReLU(), 'bs': 32, 'epochs': 5, 'engineer_date': True}:


1535


0.17378266451555371
{'layers': [40, 40], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 10, 'engineer_date': True}:


1535


0.3725322057161768
{'layers': [100, 100], 'lr': 0.001, 'actfn': ReLU(), 'bs': 32, 'epochs': 15, 'engineer_date': False}:


1535


0.18052020531967267
{'layers': [100, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 64, 'epochs': 15, 'engineer_date': True}:


1535


0.07704624034277313
{'layers': [40, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 128, 'epochs': 10, 'engineer_date': False}:


1535


0.19115877990293845
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 15, 'engineer_date': True}:


1535


0.07749684913856941
{'layers': [100, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 10, 'engineer_date': True}:


1535


0.08412066818750977
{'layers': [40, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 32, 'epochs': 15, 'engineer_date': False}:


1535


0.19034898558750418
{'layers': [100, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 5, 'engineer_date': True}:


1535


0.16983851188800164
{'layers': [100, 100], 'lr': 0.001, 'actfn': ReLU(), 'bs': 64, 'epochs': 5, 'engineer_date': True}:


1535


0.18916255137528587
{'layers': [40, 40], 'lr': 0.001, 'actfn': ReLU(), 'bs': 128, 'epochs': 10, 'engineer_date': True}:


1535


0.33096821400842064
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 128, 'epochs': 15, 'engineer_date': False}:


1535


0.18214441853118246
{'layers': [100, 100], 'lr': 0.001, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 32, 'epochs': 5, 'engineer_date': True}:


1535


0.17377374219699582
{'layers': [40, 40], 'lr': 0.01, 'actfn': LeakyReLU(negative_slope=0.01), 'bs': 64, 'epochs': 10, 'engineer_date': True}:


1535


0.15253968748762145
{'layers': [100, 100], 'lr': 0.001, 'actfn': ReLU(), 'bs': 64, 'epochs': 15, 'engineer_date': True}:


1535


0.08252561227398826
{'layers': [100, 40], 'lr': 0.01, 'actfn': ReLU(), 'bs': 32, 'epochs': 5, 'engineer_date': False}:


1535


0.1911344423155763


In [43]:
best_params

{'layers': [100, 100],
 'lr': 0.01,
 'actfn': LeakyReLU(negative_slope=0.01),
 'bs': 64,
 'epochs': 15,
 'engineer_date': True}

In [44]:
# Save results to file.
fname = f"./cross_validation/cheng/opt_par_cv2.p"
pickle.dump(best_params, open(fname, "wb"))

In [None]:
scores = []
recalls = []
precisions = []

for _ in tqdm(range(1000)):
    y_pred, y_true, _ = fit_and_predict(**{
        'layers': [100, 100],
        'lr': 0.01,
        'actfn': LeakyReLU(negative_slope=0.01),
        'bs': 64,
        'epochs': 7,
        'engineer_date': True,
    }, verbose=False)
    
    mamae = macro_averaged_mean_absolute_error(y_true, y_pred)
    mamse = macro_averaged_mean_squared_error(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average=None)
    recall = recall_score(y_true, y_pred, average=None)

    scores.append({"mamae": mamae,"mamse": mamse}) 
    recalls.append(recall)
    precisions.append(precision)
        
scores_df = pd.DataFrame(scores)
recalls_df = pd.DataFrame(recalls)
precisions_df = pd.DataFrame(precisions)
recalls_df.columns = recalls_df.columns + 1
precisions_df.columns = precisions_df.columns + 1

fname = f"./cross_validation/cheng/"

pickle.dump(scores_df, open(fname+"scores_df2.p", "wb"))
pickle.dump(recalls_df, open(fname+"recalls_df2.p", "wb"))
pickle.dump(precisions_df, open(fname+"precisions_df2.p", "wb"))

In [5]:
precisions_df = pickle.load( open( "./cross_validation/cheng/precisions_df2.p", "rb"))
recalls_df = pickle.load( open( "./cross_validation/cheng/recalls_df2.p", "rb"))
scores_df = pickle.load( open( "./cross_validation/cheng/scores_df2.p", "rb"))

In [28]:
import scipy.stats as st
N = 100
alpha = 0.05
c_crit = st.t.ppf(1-alpha/2, N-1)

results = pd.concat([precisions_df.iloc[:100, :].mean(), recalls_df.iloc[:100, :].mean()], axis=1)
results.columns = ["Precision","Recall"]
results["CI_prec"] = c_crit * precisions_df.iloc[:100, :].std()/np.sqrt(N)
results["CI_rec"] = c_crit * recalls_df.iloc[:100, :].std()/np.sqrt(N)

avg_mamse = scores_df.iloc[:100, :]["mamse"].mean()
ci_mamse = c_crit * scores_df.iloc[:100, :]["mamse"].std() / np.sqrt(N)

In [30]:
print(
"""
MAMSE: {mamse}
+-CI: {ci_mamse}
""".format(
    mamse=round(avg_mamse, 3), ci_mamse=round(ci_mamse, 3)
))


MAMSE: 0.182
+-CI: 0.004



In [24]:
results.round(4)

Unnamed: 0,Precision,Recall,CI_prec,CI_rec
1,0.9495,0.9298,0.0017,0.0025
2,0.8708,0.873,0.002,0.002
3,0.8597,0.8838,0.0019,0.0026
4,0.7167,0.6103,0.0101,0.0133
