In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import chaospy
import numpoly
import yaml
import numpy as np
import pandas as pd
import sys, os

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use(["bmh", "../matplotlibrc"])

from sklearn.model_selection import train_test_split

In [None]:
sys.path.insert(0, os.getcwd() + "/../scripts")
import _helpers as h
import _plotters as p
from surrogate import build_surrogate
from neural_network import build_neural_network
from sobol import calculate_sobol

## PCE Surrogate Modelling

In [None]:
with open("../config.yaml", 'r') as stream:
    config = yaml.safe_load(stream)

In [None]:
datafile = "../results/capacities-50halton.csv"
order = 7
# scale = False

In [None]:
dataset = h.load_dataset(datafile)
distribution = h.NamedJ(config["uncertainties"])

In [None]:
train_set, test_set = train_test_split(dataset, **config["train_test_split"])

In [None]:
surrogate = build_surrogate(order, distribution, train_set)

In [None]:
train_samples = h.multiindex2df(train_set.index)
train_predictions = h.build_pce_prediction(surrogate, train_samples)

test_samples = h.multiindex2df(test_set.index)
test_predictions = h.build_pce_prediction(surrogate, test_samples)

### Evaluation

In [None]:
p.plot_histograms(dataset, [train_predictions, test_predictions])

In [None]:
dataset.mean()

In [None]:
h.calculate_errors(train_predictions, train_set).mean()

In [None]:
h.calculate_errors(test_predictions, test_set).mean()

### Sensitivity Analysis

In [None]:
sobol = calculate_sobol(surrogate, distribution)
sobol

In [None]:
p.plot_sobol(sobol)

## Pure Machine Learning with `sklearn`

In [None]:
with open("../config.yaml", 'r') as stream:
    config = yaml.safe_load(stream)

In [None]:
neural_network = build_neural_network(train_set, config["neural_network"])

In [None]:
train_predictions = h.build_ann_prediction(neural_network, train_samples, train_set)
test_predictions = h.build_ann_prediction(neural_network, test_samples, test_set)

### Evaluation

In [None]:
p.plot_histograms(dataset, [train_predictions, test_predictions])

In [None]:
h.calculate_errors(train_predictions, train_set).mean()

In [None]:
h.calculate_errors(test_predictions, test_set).mean()

## Multi-fidelity approach

- many more samples in very low resolution model

## Another Easy Benchmark to Beat:

- Surrogate is obtained from MC sampling for 37 nodes and 6-hourly resolution

In [None]:
# TODO