In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import chaospy
import numpoly
import yaml
import numpy as np
import pandas as pd
import sys, os
from itertools import combinations, product
from numpy.random import random
import multiprocessing as mp
from sklearn import linear_model as lm

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use(["bmh", "../matplotlibrc"])

from sklearn.model_selection import train_test_split

In [None]:
sys.path.insert(0, os.getcwd() + "/../scripts")
import _helpers as h
import _plotters as p
from surrogate import build_surrogate, apply_multifidelity
from sobol import calculate_sobol, calculate_sobol_m2

## PCE Surrogate Modelling

In [None]:
lf_datafile = "../results/dataset_low.csv"
hf_datafile = "../results/dataset_high.csv"
dimension = "cost"
sense = "min"
eps = None
order = 3
multifidelity = "additive"

In [None]:
with open("../config.yaml", 'r') as stream:
    config = yaml.safe_load(stream)

In [None]:
with open("../config.pypsaeur.yaml", 'r') as stream:
    peur_cf = yaml.safe_load(stream)["plotting"]
    TECH_COLORS = peur_cf["tech_colors"]
    NAMES = peur_cf["nice_names"]

In [None]:
import pypsa

n = pypsa.Network("../data/elec_s_37_ec.nc")

ANNUITIES = pd.concat([
        n.generators.groupby('carrier').capital_cost.mean().div(1e3).round(1),
        n.storage_units.groupby('carrier').capital_cost.mean().div(1e3).round(1)
    ])
ANNUITIES["offwind"] = 0.5 * ANNUITIES["offwind-ac"] + 0.5 * ANNUITIES["offwind-dc"]
ANNUITIES.drop(["offwind-ac", "offwind-dc", "ror", "hydro", "PHS"], inplace=True)

In [None]:
uncertainties = config["uncertainties"]
distribution = h.NamedJ(uncertainties)

In [None]:
dataset = h.load_dataset(lf_datafile, dimension, sense, eps)
hf_dataset = h.load_dataset(hf_datafile, dimension, sense, eps)

In [None]:
dataset.drop("gini", axis=1, inplace=True)
hf_dataset.drop("gini", axis=1, inplace=True)

In [None]:
train_set, test_set = train_test_split(dataset, **config["train_test_split"])

In [None]:
sklearn = lm.Lars(verbose=True, fit_intercept=False)

In [None]:
surrogate = build_surrogate(order, distribution, train_set, sklearn)

In [None]:
train_samples = h.multiindex2df(train_set.index)
train_predictions = h.build_pce_prediction(surrogate, train_samples)

test_samples = h.multiindex2df(test_set.index)
test_predictions = h.build_pce_prediction(surrogate, test_samples)

## Validation

In [None]:
p.plot_histograms(dataset, [train_predictions, test_predictions], fn="graphics/histograms.pdf")

In [None]:
h.calculate_errors(train_predictions, train_set).round(2)

In [None]:
h.calculate_errors(test_predictions, test_set).round(2)

### Error vs Order

In [None]:
def plot_error_vs_order(save=False, sklearn=None, max_order=6, max_n=400):

    results = {}
    for o in range(0,max_order):

        print(o, end=" ")

        surrogate = build_surrogate(o, distribution, train_set[:max_n], sklearn)

        test_samples = h.multiindex2df(test_set.index)
        test_predictions = h.build_pce_prediction(surrogate, test_samples)

        results[o] = h.calculate_errors(test_predictions, test_set)

    df = pd.concat(results, axis=1)

    for measure in ["r2", "mape", "mae", "rmse"]:
        data = df.T.unstack(level=0).loc[measure].unstack().T

        colors = [TECH_COLORS[c] for c in data.columns]
        
        data.columns = data.columns.map(NAMES)

        fig, ax = plt.subplots(figsize=(3.5,3))

        data.plot(ax=ax, color=colors)

        plt.legend(bbox_to_anchor=(1.02,1), frameon=False)
        plt.xlabel("order of polynomial")
        plt.ylabel(measure.upper())
        plt.title(f"{len(train_set[:max_n])} training samples")
        plt.grid(None)
        plt.box(False)

        ylims = dict(r2=[0.6,1.05], mape=[0,30], mae=[0,100], rmse=[0,100])
        plt.ylim(ylims[measure])
        #if measure == "mape":
        #    plt.ylim([0,25])
        #elif measure == "r2":
        #    plt.ylim([0.6,1.05])
        #elif measure == "variance_explained":
        #    plt.ylim([0.6,1.05])

        if save:
            plt.savefig(f"graphics/error/error-{measure}-vs-order-sklearn.pdf", bbox_inches='tight')
            
        plt.close()

### Error vs Samples

In [None]:
def plot_error_vs_samples(save=False, sklearn=None, order=3):
    results = {}
    for i in np.arange(50, len(train_set)+1, 25):

        print(i, end=" ")

        surrogate = build_surrogate(order, distribution, train_set[:i], sklearn)

        test_samples = h.multiindex2df(test_set.index)
        test_predictions = h.build_pce_prediction(surrogate, test_samples)

        results[i] = h.calculate_errors(test_predictions, test_set)

    df = pd.concat(results, axis=1)

    for measure in ["r2", "mape", "mae", "rmse"]:

        data = df.T.unstack(level=0).loc[measure].unstack().T

        colors = [TECH_COLORS[c] for c in data.columns]
        
        data.columns = data.columns.map(NAMES)

        fig, ax = plt.subplots(figsize=(3.5,3))

        data.plot(ax=ax, color=colors)

        plt.legend(bbox_to_anchor=(1.02,1), frameon=False)
        plt.grid(None)
        plt.box(False)
        plt.xlabel("training samples")
        plt.ylabel(measure.upper())
        plt.title(f"order {order}")
        
        ylims = dict(r2=[0.6,1.05], mape=[0,30], mae=[0,100], rmse=[0,100])
        plt.ylim(ylims[measure])

        if save:
            plt.savefig(f"graphics/error/error-{measure}-vs-samples-order-{order}-sklearn.pdf", bbox_inches='tight')
            
        plt.close()

In [None]:
sklearn = lm.Lasso(fit_intercept=False, alpha=.005, )
plot_error_vs_samples(True, sklearn)

In [None]:
sklearn = lm.Lasso(fit_intercept=False, alpha=.005, )
plot_error_vs_order(True, sklearn)

# validate multifidelity correction

In [None]:
hf_surrogate = apply_multifidelity(
    surrogate,
    multifidelity,
    hf_datafile,
    dimension,
    sense,
    0.0,
    "none",
    None, 
    1,
    distribution
)

## Sensitivity Analysis

In [None]:
def plot_sobol_m2(sobol, title="", fn=None):
    
    sobol = sobol.copy()
    
    sobol.index = sobol.index.map(NAMES)
    sobol.columns = sobol.columns.map(NAMES)
    
    sobol = sobol.mul(100).round()
    
    fig, ax = plt.subplots(figsize=(2.5,2.5))
    
    mask=np.triu(np.ones(sobol.shape)).astype(np.bool)
    
    sns.heatmap(sobol, mask=mask, square=True, cmap="Purples",
            vmax=20,
            vmin=0,
            annot=True,
            #fmt=".2f",
            cbar=False,)
    
    plt.title(title)
    plt.box(False)
    
    if fn is not None:
        plt.savefig(fn, bbox_inches='tight')
        
    plt.close()

In [None]:
def plot_sobol_bar(sobol, relative=True, fn=None):
    
    sobol = sobol.copy()

    fig, ax = plt.subplots(figsize=(4,2.5))

    colors = [TECH_COLORS[s] for s in sobol.index]
    
    sobol.index = sobol.index.map(NAMES)
    sobol.columns = sobol.columns.map(NAMES)
    
    if relative: sobol = sobol / sobol.sum()
        
    sobol = sobol.mul(100).round()

    sobol.T.plot.bar(ax=ax, stacked=True, color=colors)
    
    plt.legend(bbox_to_anchor=(1.02,1.01), ncol=1, frameon=False, title="Cost Uncertainty")
    #plt.ylim([0,max(sobol.sum().max(), 100)])
    plt.ylim([0,125])
    plt.grid(None)
    plt.box(False)
    plt.ylabel("Sobol [%]")
    
    plt.xticks(rotation=-30, ha='left')
    
    if fn is not None:
        plt.savefig(fn, bbox_inches='tight')
        
    plt.close()

In [None]:
def plot_sobol(data, fn=None):
    
    data = data.copy()
    
    fig, ax = plt.subplots(figsize=(4, 7))

    data.index = data.index.map(NAMES)
    data.columns = data.columns.map(NAMES)
    
    data = data.mul(100).round()

    sns.heatmap(
        data,
        square=True,
        cmap="Purples",
        vmax=100,
        vmin=0,
        annot=True,
        #fmt=".2f",
        cbar=False,
    )
    plt.ylabel("Inputs")
    plt.xlabel("Outputs")
    if fn is not None:
        plt.savefig(fn, bbox_inches="tight")
        
    plt.close()

In [None]:
surrogates = {"high": hf_surrogate, "low": surrogate}

In [None]:
order = ["wind", "onwind", "offwind", "solar", "transmission", "H2", "battery", "tsc"]
for fid in ["high", "low"]:
    print("t")
    sobol_t = calculate_sobol(surrogates[fid], distribution)[order]
    plot_sobol(sobol_t, fn=f'graphics/sobol/sobol-t-{fid}.pdf')
    plot_sobol_bar(sobol_t, relative=False, fn=f"graphics/sobol/sobol-t-{fid}-bar.pdf")
    
    print("m")
    sobol_m = calculate_sobol(surrogates[fid], distribution, sobol='m')[order]
    plot_sobol(sobol_m, fn=f'graphics/sobol/sobol-m-{fid}.pdf')
    plot_sobol_bar(sobol_m, relative=False, fn=f"graphics/sobol/sobol-m-{fid}-bar.pdf")
    
    for attr in ["offwind", "onwind", "solar", "transmission"]:
        print(attr)
        sobol_m2 = calculate_sobol_m2(surrogates[fid][attr], distribution)
        plot_sobol_m2(sobol_m2, NAMES[attr], f"graphics/sobol/sobol-{attr}-m2-{fid}.pdf")

## only cost optimal surrogate

In [None]:
def plot_2D(surrogate, distribution, variable, xname, yname, xsamples=(0.5,1.5,20), ysamples=(0.5,1.5,20), 
            fixed=1, dataset=None, contour_handles=None, vmin=160, vmax=230, levels=25, fn=None):
    
    xs = np.linspace(*xsamples)
    ys = np.linspace(*ysamples)

    surrogate_var = surrogate[variable]

    # TODO substitute distribution since only used for variable mapping
    to_qindex = distribution.mapping
    all_q = set(surrogate_var.names)

    qx = "q" + str(to_qindex[xname])
    qy = "q" + str(to_qindex[yname])

    if isinstance(fixed, (float, int)):
        fixed = {qo: fixed for qo in all_q - {qx, qy}}
    elif isinstance(fixed, dict):
        fixed = {"q" + str(to_qindex[k]): v for k, v in fixed.items()}
    else:
        raise NotImplementedError("Fixed input parameters not properly specified.")

    assert set(fixed.keys()).union({qx, qy}) == all_q, "Not all input parameters specified!"

    zpoly = surrogate_var(**fixed)

    z = np.array([zpoly(**{qx: xs, qy: y}) for y in ys])

    if contour_handles is None:

        def idx():
            return int(np.round(random())) % 2

        rng = [vmin-5,vmax+5]
        dim1 = [rng[idx()] for i in range(ys.shape[0])]
        zdummy = np.array(xs.shape[0]*[dim1])

        contour_handles = plt.contourf(xs, ys, zdummy, levels=25, vmin=vmin, vmax=vmax)

        plt.close()

    fig, ax = plt.subplots(figsize=(6,5))

    #plt.contourf(xs, ys, z, levels=contour_handles.levels)
    plt.contourf(xs*ANNUITIES[xname], ys*ANNUITIES[yname], z, levels=contour_handles.levels)

    cbar = plt.colorbar(contour_handles, label=NAMES[variable])

    plt.xlabel(f"{NAMES[xname]} Cost [EUR/kW/a]")
    plt.ylabel(f"{NAMES[yname]} Cost [EUR/kW/a]")
    
    plt.box(False)
    plt.grid(None)

    if dataset is not None:
        df = dataset.reset_index().astype(float)
        x = df[f"{xname}-cost"]
        y = df[f"{yname}-cost"]
        #plt.scatter(x, y, marker='.', s=5, alpha=0.2, color='grey')
        plt.scatter(x*ANNUITIES[xname], y*ANNUITIES[yname], marker='.', s=5, alpha=0.2, color='grey')

    if fn is not None:
        plt.savefig(fn, bbox_inches='tight')

    plt.close()

In [None]:
# TODO use multiprocessing
var = "tsc"
for i, j in combinations(distribution.names, 2):
    
    print(f"{i} -- {j}")
    
    fr = distribution[i].lower[0]
    to = distribution[i].upper[0]
    xsamples = (fr, to, 10)
    
    fr = distribution[j].lower[0]
    to = distribution[j].upper[0]
    ysamples = (fr, to, 10)
    
    print("low")
    
    plot_2D(surrogate, distribution, var, i, j, fixed=1, 
            xsamples=xsamples, ysamples=ysamples, dataset=dataset, fn=f"graphics/2D/2D-{var}-{i}-{j}-low.pdf")
    
    print('high')
    
    plot_2D(hf_surrogate, distribution, var, i, j, fixed=1, 
        xsamples=xsamples, ysamples=ysamples, dataset=hf_dataset, fn=f"graphics/2D/2D-{var}-{i}-{j}-high.pdf")

In [None]:
def plot_1D(surrogate, variable, parameter, coords, distribution, sample=10000, dataset=None, color_by_var=True, fn=None):
    
    poly = surrogate[variable]
    symbol = f"q{distribution.mapping[parameter]}"
    if color_by_var:
        color = TECH_COLORS[variable]
    else:
        color = TECH_COLORS[parameter]
    percentiles = [5,25,50,75,95]
    
    P = []
    for coord in coords:
        symvalues = {symbol: coord}
        P.append(chaospy.Perc(poly(**symvalues), percentiles, distribution.J, sample=sample))
    P = np.array(P)
    
    fig, ax = plt.subplots(figsize=(3,3))
    plt.plot(coords * ANNUITIES[parameter], P[:,2], linewidth=1, label="Q50", color=color);
    plt.fill_between(coords * ANNUITIES[parameter], P[:,1], P[:,3], alpha=0.2, label="Q25/Q75", color=color)
    plt.fill_between(coords * ANNUITIES[parameter], P[:,0], P[:,4], alpha=0.2, label="Q5/Q95", color='grey')
    
    if variable == "tsc":
        plt.ylim([130,270])
        unit = "bn EUR/a"
    elif variable in ["H2", "battery"]:
        plt.ylim([0,450])
        unit = "GW"
    elif variable == "transmission":
        plt.ylim([0,700])
        unit = "TWkm"
    elif variable == "gini":
        plt.ylim([0,0.5])
        unit = "-"
    else:
        plt.ylim([0,1400])
        unit = "GW"
        
    plt.ylabel(f"{NAMES[variable]} [{unit}]", fontsize=10)
    plt.xlabel(f"{NAMES[parameter]} Cost\n[EUR/kW/a]", fontsize=10)
    plt.legend(frameon=False)
        
    plt.box(False)
    plt.grid(None)
    
    if dataset is not None:
        df = dataset.reset_index().astype(float)
        plt.scatter(df[f"{parameter}-cost"] * ANNUITIES[parameter], df[variable], marker='.', s=5, alpha=0.1, color='grey')

    if fn is not None:
        plt.savefig(fn, bbox_inches='tight')
        
    plt.close()

In [None]:
def plot_1D_mp(variant):
    var, param = variant
    fr = distribution[param].lower[0]
    to = distribution[param].upper[0]
    
    fn = f"graphics/1D/1D-{var}-{param}-low.pdf"
    plot_1D(surrogate, var, param, np.linspace(fr,to,10), distribution, sample=10000, dataset=dataset, fn=fn)
    fn = f"graphics/1D/1D-{var}-{param}-high.pdf"
    plot_1D(hf_surrogate, var, param, np.linspace(fr,to,10), distribution, sample=10000, dataset=hf_dataset, fn=fn)

In [None]:
variants = product(dataset.columns, distribution.names)
nprocesses = mp.cpu_count()
with mp.Pool(processes=nprocesses) as pool:
    x = pool.map(plot_1D_mp, variants)

In [None]:
def plot_cost_duration_curve(dataset):
    
    fig, ax = plt.subplots(figsize=(4,2.5))
    
    df = (dataset.tsc/dataset.tsc.min()*100-100).sort_values().reset_index()
    df.index = [i/len(df.index)*100 for i in df.index]
    df.plot(ax=ax, legend=False)
    
    plt.box(False)
    #plt.grid(None)
    plt.xlabel("% of samples")
    plt.ylabel("% more than least-cost")
    
    plt.savefig("graphics/cost-duration-curve.pdf", bbox_inches='tight')