# Historical validation - Levi's data

This notebook takes data for polymer production, applies to the model, and checks the production of primary chemicals, based on Peter Levi's chemical mapping.

This tests the production part of the model in isolation (separate from the calculation of current and future demand for polymers, based on the dynamic stock model).

In [None]:
import numpy as np
import logging
import re
import pandas as pd
import json

In [None]:
%load_ext autoreload
%autoreload 2

## Define levers

Load from `levels.xlsx`

In [None]:
from load_levers import read_levers
levers = read_levers("levers.xlsx")

## Model

In [None]:
import load_model
model_data = load_model.load_model()

In [None]:
from load_model_polymers import define_polymer_model_validation, polymer_objects, extra_demand_names
model, recipe_data = load_model.build_model(model_data)
define_polymer_model_validation(model)

In [None]:
flows_sym = model.to_flows(recipe_data, flow_ids=True)
func = model.lambdify(recipe_data)

# Load baseline data from levers

We use the baseline lever data (which represents default/baseline values for many parameters, not just future changes), supplemented with the specific historical polymer demand data.

In [None]:
from sankey_definitions import sdd_overall, sdd_lifecycle, sdd_eol, sdd_chemical_synthesis, sdd_primary, sdd_feedstock, palette
from floweaver import weave, Dataset
from utils import update_sankey_data

In [None]:
# Work around dodgy floweaver behaviour -- flows index is reset when Dataset
# is created. This only works because not using dim_process etc.
dataset = Dataset(flows_sym)
dataset._flows = dataset._table = flows_sym.set_index("id")

sdds = {
    "overall": sdd_overall,
    "feedstock": sdd_feedstock,
    "synthesis": sdd_chemical_synthesis,
    "lifecycle": sdd_lifecycle,
    "primary": sdd_primary,
    "eol": sdd_eol,
}

sankey_data = {
    k: weave(sdd, dataset, palette=palette)
    for k, sdd in sdds.items()
}

baseline_lever_settings = {lever.lever_id: lever.levels[0].level_id for lever in levers.levers}
test_params = levers.get_params(baseline_lever_settings, time_index=0)

Load the historical demand data

In [None]:
polymer_demand = pd.read_excel("../validation/levi_data.xlsx", sheet_name="Polymers", index_col="object")
polymer_demand

In [None]:
primary_chemicals = pd.read_excel("../validation/levi_data.xlsx", sheet_name="Primary", index_col="object")
primary_chemicals

## Check: historical polymer demand data vs stock model demand for 2020

In [None]:
baseline_flows = pd.read_csv("../outputs/baseline_flows.csv")
baseline_polymer_production = {}
for obj in polymer_objects:
    df = baseline_flows[(baseline_flows.target == obj) & baseline_flows.source.str.startswith("PolymerisationOf")]
    assert len(df) == 1
    baseline_polymer_production[obj] = df.iloc[0]["value"]
baseline_polymer_production = pd.Series(baseline_polymer_production)
baseline_polymer_production

In [None]:
cmp = pd.concat({
    "Levi": polymer_demand["demand"],
    "Baseline model": baseline_polymer_production,
}, axis=1).round(1)
cmp

In [None]:
cmp.plot.barh();

In [None]:
def get_historical_polymer_demand(year):
    assert year == 2013  # only year we have from Levi
    # Model is defined in tonnes
    s = polymer_demand["demand"] * 1e6
    return [s.get(obj, 0) for obj in polymer_objects]

def get_historical_extra_primary_demand(year):
    assert year == 2013  # only year we have from Levi
    # Model is defined in tonnes
    s = primary_chemicals["for_non_polymers"] * 1e6
    return [s.get(obj, 0) for obj in extra_demand_names]

In [None]:
get_historical_polymer_demand(2013)

In [None]:
get_historical_extra_primary_demand(2013)

In [None]:
test_params["Z_polymer"] = get_historical_polymer_demand(2013)

In [None]:
test_params["Z_extra"] = [0,0,0,0]

# Less capacity in 2013 than our base year of 2020
test_params['C_ethylene_from_methyl_alcohol'] = 1.88e6
test_params['C_xylenes_from_methyl_alcohol'] = 0
test_params['C_ethyl_alcohol_from_biomass'] = 0
    

In [None]:
sankey_data_with_data = {
    k: load_model.subs_in_sankey_data(d, func, test_params)
    for k, d in sankey_data.items()
}

Show a Sankey diagram with levers:

In [None]:
from ipywidgets import interactive_output, Box, HBox, VBox, Combobox, interact, Dropdown, Button, IntSlider, Layout

def update_sankey(sankey_id, time=0, **kwargs):
    params = levers.get_params(baseline_lever_settings, time_index=time)
    params["Z_polymer"] = get_historical_polymer_demand(2000)
    params["Z_extra"] = [0,0,0,0]
    # Less capacity in 2013 than our base year of 2020
    params['C_ethylene_from_methyl_alcohol'] = 1.88e6
    params['C_xylenes_from_methyl_alcohol'] = 0
    params['C_ethyl_alcohol_from_biomass'] = 0
    new_w = update_sankey_data(sankey_data[sankey_id], func(params)).to_widget(debugging=True)
    old_sankey = w.children[0]
    new_sankey = new_w.children[0]
    old_sankey.on_link_clicked(new_sankey._link_clicked_handlers.callbacks[0])
    old_sankey.links = new_sankey.links
    old_sankey.nodes = new_sankey.nodes
    old_sankey.order = new_sankey.order
    old_sankey.groups = new_sankey.groups
    w.children = (old_sankey, new_w.children[1])

def adjust_scale(factor):
    if w.children[0].scale is not None:
        w.children[0].scale *= factor    
    
btn_inc = Button(description="Scale +")
btn_dec = Button(description="Scale –")
btn_inc.on_click(lambda b: adjust_scale(1.1))
btn_dec.on_click(lambda b: adjust_scale(0.9))

control_widgets = {
    "sankey_id": Dropdown(value="primary", options=sankey_data.keys()),
    "time": IntSlider(min=0, max=6),
}

out = interactive_output(
    update_sankey,
    control_widgets
)
ui_layout = Layout(
    display='flex',
    flex_flow='row wrap',
    align_items='stretch', 
    width='90%'
)
ui = Box(list(control_widgets.values()) + 
         [btn_inc, btn_dec], layout=ui_layout)
w = sankey_data_with_data[control_widgets["sankey_id"].value].to_widget(width=1300, height=700, debugging=True)
VBox([ui, w])

Instead, focus on showing the detailed flows rather than the Sankey diagram:

In [None]:
def flows_for_historical_year(year, with_nonpolymer_demand=False):
    params = levers.get_params(baseline_lever_settings, time_index=0)
    params["Z_polymer"] = get_historical_polymer_demand(year)
    if with_nonpolymer_demand:
        # B, T, X, MethylAlcohol
        params["Z_extra"] = get_historical_extra_primary_demand(year)
    else:
        params["Z_extra"] = [0,0,0,0]
        
    # Less capacity in 2013 than our base year of 2020
    params['C_ethylene_from_methyl_alcohol'] = 1.88e6
    params['C_xylenes_from_methyl_alcohol'] = 0
    
    new_values = func(params)
    flows = flows_sym.copy().set_index("id")
    del flows["metric"]
    for k, row in flows.iterrows():
        flows.loc[k, "value"] = np.round(new_values[k] / 1e6, 1)
    return flows

In [None]:
flows_2013 = flows_for_historical_year(2013)
flows_2013_with_extra = flows_for_historical_year(2013, with_nonpolymer_demand=True)
flows_2013

In [None]:
flows_2013.query("source == 'TerephthalicAcidPhthalicAnhydrideDioctylPhthalate'") #["value"].sum()

In [None]:
primary_chemicals

In [None]:
excluded_targets = [
    "DealkylationOfTolueneForBenzene",
    "DisproportionationOfTolueneForXylenes",
]
cmp_primary = pd.DataFrame([
    {
        "object": k,
        "model": flows_2013.query("source == @k and target not in @excluded_targets")["value"].sum(),
        "model_extra": flows_2013_with_extra.query("source == @k and target not in @excluded_targets")["value"].sum(),
        "levi_modelled": row["for_modelled_polymers"].round(1),
        "levi_otherpolymers": row["for_otherpolymers"].round(1),
        "levi_unknown_polymers": row["for_unknown_polymers"].round(1),
        "levi_non_polymers": row["for_non_polymers"].round(1),
    }
    for k, row in primary_chemicals.iterrows()
]).set_index("object")
cmp_primary

In [None]:
import matplotlib.pyplot as plt
cmp_primary[["levi_modelled", "levi_otherpolymers", "levi_unknown_polymers",
             "levi_non_polymers"]].plot.barh(stacked=True, alpha=0.2);
cmp_primary[["levi_modelled", "levi_otherpolymers"]].plot.barh(stacked=True, alpha=0.7, ax=plt.gca());
plt.plot(cmp_primary["model"].values, range(len(cmp_primary)), marker='o', ls='none', c='k', label='model');
plt.plot(cmp_primary["model_extra"].values, range(len(cmp_primary)), marker='o', ls='none', c='C4', label='model with extra primary demand');
plt.legend();

In [None]:
import matplotlib.pyplot as plt
cmp_primary[["levi_modelled", "levi_otherpolymers"]].plot.barh(stacked=True, alpha=0.7, ax=plt.gca());
plt.plot(cmp_primary["model"].values, range(len(cmp_primary)), marker='o', ls='none', c='k', label='model');
plt.legend();

- Methanol slightly low due to neglecting dimethyl terephthalate into PET
- Xylenes and Toluene slightly high 
- We have neglected extra non-polymer demand for butene (butylenes) -- out of scope.
- The representation of "other polymers" is limited and could be improved.