In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import chaospy
import numpoly
import yaml
import numpy as np
import pandas as pd
import sys, os
from itertools import combinations
from numpy.random import random

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use(["bmh", "../matplotlibrc"])

from sklearn.model_selection import train_test_split

In [None]:
sys.path.insert(0, os.getcwd() + "/../scripts")
import _helpers as h
import _plotters as p
from surrogate import build_surrogate
from neural_network import build_neural_network
from sobol import calculate_sobol

## PCE Surrogate Modelling

In [None]:
with open("../config.yaml", 'r') as stream:
    config = yaml.safe_load(stream)

In [None]:
with open("../config.pypsaeur.yaml", 'r') as stream:
    TECH_COLORS = yaml.safe_load(stream)["plotting"]["tech_colors"]

In [None]:
datafile = "../results/capacities-50halton.csv"
order = 1
distribution = h.NamedJ(config["uncertainties"])

In [None]:
dataset = h.load_dataset(datafile)

In [None]:
train_set, test_set = train_test_split(dataset, **config["train_test_split"])

In [None]:
surrogate = build_surrogate(order, distribution, train_set)

In [None]:
train_samples = h.multiindex2df(train_set.index)
train_predictions = h.build_pce_prediction(surrogate, train_samples)

test_samples = h.multiindex2df(test_set.index)
test_predictions = h.build_pce_prediction(surrogate, test_samples)

### Evaluation

In [None]:
p.plot_histograms(dataset, [train_predictions, test_predictions], fn="graphics/histograms.pdf")

In [None]:
dataset.mean()

In [None]:
h.calculate_errors(train_predictions, train_set).mean().round(3)

In [None]:
h.calculate_errors(test_predictions, test_set).mean().round(3)

## Error vs Order

In [None]:
list(range(1,7))

In [None]:
results = {}
for o in range(1,8):
    
    print(o, end=" ")

    surrogate = build_surrogate(o, distribution, train_set)

    test_samples = h.multiindex2df(test_set.index)
    test_predictions = h.build_pce_prediction(surrogate, test_samples)
    
    results[o] = h.calculate_errors(test_predictions, test_set)
    
df = pd.concat(results, axis=1)

In [None]:
measure = "mape"
data = df.T.unstack(level=0).loc[measure].unstack().T

colors = [TECH_COLORS[c] for c in data.columns]

fig, ax = plt.subplots(figsize=(4.5,3.5))
data.plot(ax=ax, color=colors)
plt.legend(bbox_to_anchor=(1.02,1))
plt.xlabel("order of polynomial")
plt.ylabel(measure.capitalize())
plt.title("800 training samples")
if measure == "mape":
    plt.ylim([0,30])
elif measure == "r2":
    plt.ylim([0.7,1.05])
plt.savefig(f"graphics/error-{measure}-vs-order.pdf", bbox_inches='tight')

## Error vs Samples

In [None]:
results = {}
for i in np.arange(100, 801, 25):
    
    print(i, end=" ")

    surrogate = build_surrogate(order, distribution, train_set[:i])

    test_samples = h.multiindex2df(test_set.index)
    test_predictions = h.build_pce_prediction(surrogate, test_samples)
    
    results[i] = h.calculate_errors(test_predictions, test_set)
    
df = pd.concat(results, axis=1)

In [None]:
data = df.T.unstack(level=0).loc["r2"].unstack().T

colors = [TECH_COLORS[c] for c in data.columns]

fig, ax = plt.subplots(figsize=(4.5,3.5))
data.plot(ax=ax, color=colors)
plt.legend(bbox_to_anchor=(1.02,1))
plt.xlabel("training samples")
plt.ylabel("R2")
plt.title("Order = 5")
#plt.savefig("graphics/error-r2-vs-samples-order-5.pdf", bbox_inches='tight')

### Sensitivity Analysis

In [None]:
sobol_t = calculate_sobol(surrogate, distribution)
sobol_t

In [None]:
sobol_m = calculate_sobol(surrogate, distribution, sobol='m')
sobol_m

In [None]:
def calculate_sobol_m2(surrogate, distribution, decimals=3):
    sobol = chaospy.Sens_m2(surrogate, distribution.J).round(decimals)
    return pd.DataFrame(sobol, index=distribution.names, columns=distribution.names)

In [None]:
sobol_m2_tsc = calculate_sobol_m2(surrogate["tsc"], distribution)

In [None]:
sobol_m2_transmission = calculate_sobol_m2(surrogate["transmission"], distribution)

In [None]:
sobol_m2_solar = calculate_sobol_m2(surrogate["solar"], distribution)

In [None]:
def plot_sobol_m2(sobol, title="", fn=None):
    fig, ax = plt.subplots(figsize=(3,3))
    mask=np.triu(np.ones(sobol.shape)).astype(np.bool)
    sns.heatmap(sobol, mask=mask, square=True, cmap="Blues",
            vmax=.2,
            vmin=0,
            annot=True,
            fmt=".2f",
            cbar=False,)
    plt.title(title)
    if fn is not None:
        plt.savefig(fn, bbox_inches='tight')

In [None]:
plot_sobol_m2(sobol_m2_tsc, "TSC", "graphics/sobol-tsc-m2.pdf")

In [None]:
plot_sobol_m2(sobol_m2_transmission, "transmission", "graphics/sobol-transmission-m2.pdf")

In [None]:
plot_sobol_m2(sobol_m2_solar, "solar", "graphics/sobol-solar-m2.pdf")

In [None]:
p.plot_sobol(sobol_t, fn='graphics/sobol-t.pdf')

In [None]:
p.plot_sobol(sobol_m, fn='graphics/sobol-m.pdf')

In [None]:
p.plot_sobol(sobol_t - sobol_m, fn='graphics/sobol-diff.pdf')

In [None]:
def plot_sobol_bar(sobol, relative=True, fn=None):

    fig, ax = plt.subplots(figsize=(5,3))

    colors = [TECH_COLORS[s] for s in sobol_t.index]
    
    if relative:
        sobol = sobol / sobol.sum()

    sobol.T.plot.bar(ax=ax, stacked=True, color=colors)
    plt.legend(bbox_to_anchor=(1,1.01), ncol=1)
    plt.ylim([0,1])

    if fn is not None:
        plt.savefig(fn, bbox_inches='tight')

In [None]:
plot_sobol_bar(sobol_m, relative=False, fn="graphics/sobol-m-bar.pdf")

In [None]:
plot_sobol_bar(sobol_t, relative=True, fn="graphics/sobol-t-bar.pdf")

In [None]:
plot_sobol_bar(sobol_t - sobol_m, relative=False, fn="graphics/sobol-diff-bar.pdf")

## 2D-Plots

In [None]:
def plot_2D(surrogate, distribution, variable, xname, yname, xsamples=(0.5,1.5,20), ysamples=(0.5,1.5,20), 
            fixed=1, dataset=None, contour_handles=None, vmin=130, vmax=270, levels=25, fn=None):
    
    xs = np.linspace(*xsamples)
    ys = np.linspace(*ysamples)

    surrogate_var = surrogate[variable]

    # TODO substitute distribution since only used for variable mapping
    to_qindex = distribution.mapping
    all_q = set(surrogate_var.names)

    qx = "q" + str(to_qindex[xname])
    qy = "q" + str(to_qindex[yname])

    if isinstance(fixed, (float, int)):
        fixed = {qo: fixed for qo in all_q - {qx, qy}}
    elif isinstance(fixed, dict):
        fixed = {"q" + str(to_qindex[k]): v for k, v in fixed.items()}
    else:
        raise NotImplementedError("Fixed input parameters not properly specified.")

    assert set(fixed.keys()).union({qx, qy}) == all_q, "Not all input parameters specified!"

    zpoly = surrogate_var(**fixed)

    z = np.array([zpoly(**{qx: xs, qy: y}) for y in ys])

    if contour_handles is None:

        def idx():
            return int(np.round(random())) % 2

        rng = [vmin-5,vmax+5]
        dim1 = [rng[idx()] for i in range(ys.shape[0])]
        zdummy = np.array(xs.shape[0]*[dim1])

        contour_handles = plt.contourf(xs, ys, zdummy, levels=25, vmin=vmin, vmax=vmax)

        plt.close()

    fig, ax = plt.subplots(figsize=(6,5))

    plt.contourf(xs, ys, z, levels=contour_handles.levels)

    cbar = plt.colorbar(contour_handles, label=variable)

    plt.xlabel(f"{xname}-cost")
    plt.ylabel(f"{yname}-cost")

    if dataset is not None:
        df = dataset.reset_index().astype(float)
        x = df[f"{xname}-cost"]
        y = df[f"{yname}-cost"]
        plt.scatter(x, y, marker='.', s=5, alpha=0.2, color='grey')

    if fn is not None:
        plt.savefig(fn, bbox_inches='tight')

    plt.close()

In [None]:
# TODO use multiprocessing
var = "tsc"
for i, j in combinations(distribution.names, 2):
    for fixed in [0.5,1.0]:
        plot_2D(surrogate, distribution, var, i, j, fixed=fixed, dataset=dataset, fn=f"graphics/2D/2D-{var}-{i}-{j}-{fixed}.pdf")

## 1D Plots

In [None]:
def plot_1D(surrogate, variable, parameter, coords, distribution, sample=10000, dataset=None, color_by_var=True, fn=None):
    
    poly = surrogate[variable]
    symbol = f"q{distribution.mapping[parameter]}"
    if color_by_var:
        color = TECH_COLORS[var]
    else:
        color = TECH_COLORS[parameter]
    percentiles = [5,25,50,75,95]
    
    P = []
    for coord in coords:
        symvalues = {symbol: coord}
        P.append(chaospy.Perc(poly(**symvalues), percentiles, distribution.J, sample=sample))
    P = np.array(P)
    
    fig, ax = plt.subplots(figsize=(3,3))
    plt.plot(coords, P[:,2], linewidth=1, label="Q50", color=color);
    plt.fill_between(coords, P[:,1], P[:,3], alpha=0.2, label="Q25/Q75", color=color)
    plt.fill_between(coords, P[:,0], P[:,4], alpha=0.2, label="Q5/Q95", color='grey')
    plt.ylabel(variable)
    plt.xlabel(f"{parameter}-cost")
    plt.legend(frameon=False)
    
    if var == "tsc":
        plt.ylim([130,270])
    elif var in ["H2", "battery"]:
        plt.ylim([0,250])
    elif var == "transmission":
        plt.ylim([0,900])
    else:
        plt.ylim([0,1400])
    
    if dataset is not None:
        df = dataset.reset_index().astype(float)
        plt.scatter(df[f"{parameter}-cost"], df[variable], marker='.', s=5, alpha=0.1, color='grey')

    if fn is not None:
        plt.savefig(fn, bbox_inches='tight')
        
    plt.close()

In [None]:
# TODO use multiprocessing
for var in set(dataset.columns):
    for param in distribution.names:
        print(param, var)
        fn = f"graphics/1D/1D-{var}-{param}.pdf"
        if os.path.isfile(fn):
            continue
        plot_1D(surrogate, var, param, np.linspace(0.5,1.5,25), distribution, sample=20000, dataset=dataset, fn=fn)

## Pure Machine Learning with `sklearn`

In [None]:
neural_network = build_neural_network(train_set, config["neural_network"])

In [None]:
train_predictions = h.build_ann_prediction(neural_network, train_samples, train_set)
test_predictions = h.build_ann_prediction(neural_network, test_samples, test_set)

In [None]:
p.plot_histograms(dataset, [train_predictions, test_predictions])

In [None]:
h.calculate_errors(train_predictions, train_set).mean()

In [None]:
h.calculate_errors(test_predictions, test_set).mean()

## Multi-fidelity approach

- many more samples in very low resolution model

## Another Easy Benchmark to Beat:

- Surrogate is obtained from MC sampling for 37 nodes and 6-hourly resolution

In [None]:
# TODO

In [None]:
fig, ax = plt.subplots(figsize=(4,3))
dataset.tsc.sort_values().reset_index().plot(ax=ax)

In [None]:
fig, ax = plt.subplots(figsize=(4,2.5))
df = (dataset.tsc/dataset.tsc.min()*100-100).sort_values().reset_index()
df.index = [i/len(df.index)*100 for i in df.index]
df.plot(ax=ax)
plt.xlabel("% of samples")
plt.ylabel("% more than least-cost")
plt.savefig("graphics/cost-duration-curve.pdf", bbox_inches='tight')

In [None]:
df = dataset.tsc.reset_index()
df = df.astype("float")

In [None]:
df["tscrel"] = df.tsc/df.tsc.min()

In [None]:
def plot_kde(df, lower, upper, parameter, fn=None):
    fig, ax = plt.subplots(figsize=(4,2.5))
    df[parameter].plot.hist(label='all', color='lightgray', density=True, alpha=0.8)
    df.loc[df.tscrel<lower][parameter].plot.kde(label=f'low (<{lower})')
    df.loc[df.tscrel>upper][parameter].plot.kde(label=f"high (>{upper})")
    df.loc[(df.tscrel>=lower) & (df.tscrel<=upper)][parameter].plot.kde(label='medium')
    plt.xlabel(parameter)
    plt.ylim([0,3])
    plt.legend(frameon=False)
    if fn is not None:
        plt.savefig(f"graphics/kde-{parameter}.pdf", bbox_inches='tight')

In [None]:
plot_kde(df, 1.2, 1.6, "onwind-cost", fn=True)

In [None]:
plot_kde(df, 1.2, 1.6, "offwind-cost", fn=True)

In [None]:
plot_kde(df, 1.2, 1.6, "solar-cost", fn=True)

In [None]:
plot_kde(df, 1.2, 1.6, "battery-cost", fn=True)

In [None]:
plot_kde(df, 1.2, 1.6, "H2-cost", fn=True)