# Interactive model with levers

This notebook loads the levers data and model, and presents an interactive interface to the model.

In [None]:
import numpy as np
import logging
import re
import pandas as pd
import json

In [None]:
%load_ext autoreload
%autoreload 2

## Define levers

Load from `levels.xlsx`

In [None]:
from load_levers import read_levers
levers = read_levers("levers.xlsx")

## Model

In [None]:
import load_model
model_data = load_model.load_model()

In [None]:
model, recipe_data = load_model.build_model(model_data)
other_results = load_model.define_model(model, recipe_data, levers)

In [None]:
flows_sym = model.to_flows(recipe_data, flow_ids=True)
func = model.lambdify(recipe_data)
func_other = model.lambdify(data=recipe_data, expressions=other_results)

# Use lever levels to get test parameter settings

If we want to see what the model output looks like for specific lever settings, use them here to find the parameter settings that can go into the model.

This duplicates the logic that is built into the interactive calculator app, which does this to re-run the model every time the levers are changed.

In [None]:
from sankey_definitions import sdd_overall, sdd_lifecycle, sdd_eol, sdd_chemical_synthesis, sdd_primary, sdd_feedstock, palette
from floweaver import weave, Dataset
from utils import update_sankey_data

In [None]:
# Work around dodgy floweaver behaviour -- flows index is reset when Dataset
# is created. This only works because not using dim_process etc.
dataset = Dataset(flows_sym)
dataset._flows = dataset._table = flows_sym.set_index("id")

sdds = {
    "overall": sdd_overall,
    "feedstock": sdd_feedstock,
    "synthesis": sdd_chemical_synthesis,
    "lifecycle": sdd_lifecycle,
    "primary": sdd_primary,
    "eol": sdd_eol,
}

sankey_data = {
    k: weave(sdd, dataset, palette=palette)
    for k, sdd in sdds.items()
}

baseline_lever_settings = {lever.lever_id: lever.levels[0].level_id for lever in levers.levers}
test_params = levers.get_params(baseline_lever_settings, time_index=0)

sankey_data_with_data = {
    k: load_model.subs_in_sankey_data(d, func, test_params)
    for k, d in sankey_data.items()
}

Show a Sankey diagram with levers:

In [None]:
from ipywidgets import interactive_output, Box, HBox, VBox, Combobox, interact, Dropdown, Button, IntSlider, Layout

def update_sankey(sankey_id, time=0, **kwargs):
    params = levers.get_params(kwargs, time_index=time)
    new_w = update_sankey_data(sankey_data[sankey_id], func(params)).to_widget(debugging=True)
    old_sankey = w.children[0]
    new_sankey = new_w.children[0]
    old_sankey.on_link_clicked(new_sankey._link_clicked_handlers.callbacks[0])
    old_sankey.links = new_sankey.links
    old_sankey.nodes = new_sankey.nodes
    old_sankey.order = new_sankey.order
    old_sankey.groups = new_sankey.groups
    w.children = (old_sankey, new_w.children[1])

def adjust_scale(factor):
    if w.children[0].scale is not None:
        w.children[0].scale *= factor    
    
btn_inc = Button(description="Scale +")
btn_dec = Button(description="Scale –")
btn_inc.on_click(lambda b: adjust_scale(1.1))
btn_dec.on_click(lambda b: adjust_scale(0.9))

lever_widgets = {
    lever.lever_id: Dropdown(
        description=lever.lever_id,
        options=[(f"{level.level_id}: {level.label}", level.level_id) for level in lever.levels],
        style={"description_width": "initial"}
    )
    for lever in levers.levers
    if len(lever.levels) > 1
}
control_widgets = {
    "sankey_id": Dropdown(value="overall", options=sankey_data.keys()),
    "time": IntSlider(min=0, max=6),
}

out = interactive_output(
    update_sankey,
    {**control_widgets, **lever_widgets}
)
ui_layout = Layout(
    display='flex',
    flex_flow='row wrap',
    align_items='stretch', 
    width='90%'
)
ui = Box(list(control_widgets.values()) + 
         [btn_inc, btn_dec] +
         list(lever_widgets.values()), layout=ui_layout)
w = sankey_data_with_data[control_widgets["sankey_id"].value].to_widget(width=1500, height=800, debugging=True)
VBox([ui, w])

Instead, focus on showing the detailed flows rather than the Sankey diagram:

In [None]:
flows = flows_sym.copy().set_index("id")
del flows["metric"]

In [None]:
@interact(
    material=Combobox(options=list(flows["material"].unique()), value=""),
    time=(0, 6),
    **lever_widgets
)
def update_flows(material="", time=0, **kwargs):
    params = levers.get_params(kwargs, time_index=time)
    new_values = func(params)
    for k, row in flows.iterrows():
        flows.loc[k, "value"] = np.round(new_values[k] / 1e6, 1)
    if material:
        return flows.query("material == @material")
    return flows

And the same for other outputs (emissions etc)

In [None]:
@interact(
    key=Combobox(options=list(other_results.keys()), value=""),
    time=(0, 6),
    **lever_widgets
)
def update_other_results(key="", time=0, **kwargs):
    settings = {**baseline_lever_settings, **kwargs}
    params = levers.get_params(settings, time_index=time)
    # Should disable this for testing, but in some cases it's not a problem because the
    # invalid division-by-zero is in a branch of the piecewise expression which will not be reached.
    with np.errstate(invalid='ignore'):
        new_values = func_other(params)
    df = pd.Series(new_values)
    if key:
        return df[df.index.str.contains(key)]
    return df

## Plots of results

In [None]:
@interact(**lever_widgets)
def update_plots(**kwargs):
    settings = {**baseline_lever_settings, **kwargs}
    results = pd.DataFrame([
        func_other(levers.get_params(settings, time_index=time))
        for time in range(7)
    ], index=list(range(2020, 2051, 5)))
    
    df = pd.DataFrame({
        "EndOfLife": results["EmissionsByStage_end_of_life"],
        "FertiliserUse": results["GHG_use_fertiliser"],
        "Downstream": results["EmissionsByStage_downstream"] + results["EmissionsByStage_organic_synthesis"],
        "PrimaryProduction": results["EmissionsByStage_primary_production"],
        "FertiliserProduction": results["GHG_production_fertiliser"],
        "Hydrogen": results["EmissionsByStage_hydrogen"],
        "Biomass": results["EmissionsByStage_biomass"],
        "Feedstock": results["EmissionsByStage_feedstocks"],
    }) / 1e9

    total = df.sum(axis=1)
    
    #print(df.iloc[-1])
    print(settings)
    
    ax = df.plot.area(stacked=True, color=["#4269D0", "#EFB118", "#FF725C",
                                          "#6CC5B0", "#3CA951", "#FF8AB7",
                                          "#A463F2", "#97BBF5", "#9C6B4E",
                                          "#9498A0"])
    ax.plot(df.index, total, lw=2, c="k", label="Total")
    ax.legend(bbox_to_anchor=(1.0, 1.0))
    ax.set_xlabel("Year")
    ax.set_ylabel("Total GHG emissions [MtCO2e]")
    ax.axhline(0, lw=0.5, c='k')
    return ax

## Variation in production routes

Show how some capacities vary

In [None]:
kwargs = {
    **baseline_lever_settings
}
def get_flows_for_levels(lever_settings):
    params = levers.get_params(lever_settings, time_index=6)
    new_values = func(params)
    my_flows = flows.copy()
    for k, row in flows.iterrows():
        my_flows.loc[k, "value"] = np.round(new_values[k] / 1e6, 1)
    return my_flows

In [None]:
def compare_flows(lever_settings):
    f = get_flows_for_levels(lever_settings)
    results = {
        "Ethanol to ethylene": f.query('source == "EthylAlcohol" and target == "DehydrationOfEthylAlcohol"')["value"].sum(),
        "Naphtha to ethylene": f.query('source == "Naphtha" and target == "SteamCrackingOfNaphtha"')["value"].sum(),
        "Naphtha to BTX": f.query('source == "Naphtha" and target.str.startswith("CatalyticReformingOfNaphtha")')["value"].sum(),
    }
    return pd.Series(results)

In [None]:
lever = [l for l in levers.levers if l.lever_id == "bioethanol_capacity"][0]
results = []
for level in '1234':
    settings = {
        **baseline_lever_settings,
        "bioethanol_capacity": level
    }
    results.append(compare_flows(settings))
results = pd.DataFrame(results, index=[level.label for level in lever.levels])
results

In [None]:
import seaborn as sns
sns.set_theme(style="darkgrid")
ax = results.plot.bar(stacked=True, ylabel="Mt per year");
ax.set_ylim(0, 1000);
ax.tick_params(axis='x', labelrotation=0);
ax.set_xticklabels([
    "Baseline\n(no change)",
    "Reference\nprojection",
    "2x greater\nthan reference",
    "4x greater\nthan reference",
])
ax.set_xlabel("Level of bioethanol capacity growth")
ax.annotate('Ethanol substitutes naphtha\nfor ethylene production', xy=(3.5, 170), xytext=(3.7, 170),
            fontsize=12, ha='left', va='center', # xycoords='axes fraction', 
            arrowprops=dict(arrowstyle='-[, widthB=3.6, lengthB=.5', lw=1.0, color='k'))

ax.annotate('Additional naphtha needed\nto replace BTX co-products\nfrom steam cracking',
            xy=(3.5, 610), xytext=(3.7, 610),
            fontsize=12, ha='left', va='center', # xycoords='axes fraction', 
            arrowprops=dict(arrowstyle='-[, widthB=5.8, lengthB=.5', lw=1.0, color='k'));

In [None]:
f.query('source == "Naphtha" and target == "SteamCrackingOfNaphtha"')

In [None]:
f.query('source == "Naphtha" and target.str.startswith("CatalyticReformingOfNaphtha")')

## Baseline emissions for comparison

In [None]:
results = func_other(levers.get_params(baseline_lever_settings, time_index=0))

In [None]:
print(f"Total emissions: {results['GHG_total']/1e12:.1f} Gt")

In [None]:
# For comparison to Cullen et al (2024): 1.9 Gt
excluding_biomass_fertiliser_use_eol = (
    results["EmissionsBySource_NG"] + 
    results["EmissionsBySource_Elec"] +
    results["EmissionsBySource_Direct"] +
    results["GHG_production_fertiliser"] +
    results["EmissionsBySource_Feedstock"] -
    results["EmissionsByStage_end_of_life"]
)
print(f"To compare to Cullen et al (2024): {excluding_biomass_fertiliser_use_eol/1e12:.1f} Gt")

In [None]:
# For comparison to Isella and Manca (2022): 1.4 Gt (excluding 1.3 Gt from "refining")
gate_to_gate = (
    results["EmissionsBySource_NG"] + 
    results["EmissionsBySource_Elec"] +
    results["EmissionsBySource_Direct"] +
    results["GHG_production_fertiliser"] -
    results["EmissionsByStage_end_of_life"]
)
print(f"To compare to Isella and Manca (2022): {gate_to_gate/1e12:.1f} Gt")

In [None]:
# For comparison to IEA: 0.94 Gt
IEA_emissions = (
    results["EmissionsBySource_NG"] + 
    results["EmissionsBySource_Direct"] +
    results["GHG_production_fertiliser"] -
    results["EmissionsByStage_end_of_life"]
)
print(f"To compare to IEA: {IEA_emissions/1e12:.1f} Gt")

## Other information

Show the end of life polymer quantities:

In [None]:
flows = update_flows(**baseline_lever_settings)
polymer_objects = [
    "LDPEPolyethylene",
    "HDPEPolyethylene",
    "PPPolypropylene",
    "PSPolystyrene",
    "PVCPolyvinylChloride",
    "PETPolyethyleneTerephthalatePolyesters",
    "Polyurethane",
    "SyntheticRubbers",
    "OtherPolymers",
    "LLDPE",
    "FibrePPA",
]
eol_polymers = {f"{name}AtEOL" for name in polymer_objects}
eol_polymer_quantities = flows[flows.target.isin(eol_polymers)].groupby("material")[["value"]].sum()
eol_polymer_quantities

In [None]:
eol_polymer_quantities.plot.barh();

In [None]:
eol_polymer_quantities.to_csv("~/Desktop/eol_polymers.csv")

How much BTX demand is there at baseline?

In [None]:
btx = [
    "Benzene",
    "Toluene",
    "Xylenes",
]
btx_quantities = flows[flows.source.isin(btx) & ~flows.target.str.startswith("OtherConsumptionOf") & ~flows.target.str.startswith("Disproportionation")] #.groupby("material")[["value"]].sum()
btx_quantities

In [None]:
btx_quantities.groupby("material")[["value"]].sum() / 1e6

Expected production values for these (from ICIS) are

In [None]:
btx_2013 = pd.DataFrame(
    [
        [12.3, 7.1, 23.5, 44.78, 49.1],
        [7.7, -11.6, 15.4, 26.51, 28.3],
        [0.3+0.6+0.8, 0.9+0.8+1.7, 36.2+2.1-1.9, 77.57, 103.4],
    ],
    columns=["cracking", "conversion", "refining", "icis_2013", "icis_2020",],
    index=["Benzene", "Toluene", "Xylenes"],
)
btx_2013

In [None]:
btx_2013["levi_total"] = btx_2013[["cracking", "conversion", "refining"]].sum(axis=1)
btx_2013["levi_gross"] = btx_2013[["cracking", "refining"]].sum(axis=1) + np.maximum(0, btx_2013["conversion"])
btx_2013["model"] = (btx_quantities.groupby("material")["value"].sum() / 1e6)

In [None]:
btx_2013

In [None]:
btx_2013["icis_2020"] - btx_2013["model"]

How much of the benzene is sourced from conversion?

In [None]:
btx_2013.loc["Benzene", "conversion"] / btx_2013.loc["Benzene", "levi_total"]

In [None]:
btx_2013.loc["Xylenes", "conversion"] / btx_2013.loc["Xylenes", "levi_total"]

8% of xylenes came from conversion, and 17% of benzene.

What's the yield of the conversion? (on average, across the two).

In [None]:
(btx_2013.loc["Benzene", "conversion"] + btx_2013.loc["Xylenes", "conversion"]) / -btx_2013.loc["Toluene", "conversion"]

Assume these fractions still apply in 2020 (since we don't have other data) -- so the amount of toluene used to convert to xylenes and benzene respectively is:

In [None]:
btx_2013.loc["Xylenes", "icis_2020"] * 0.082 / 0.905

In [None]:
btx_2013.loc["Benzene", "icis_2020"] * 0.166 / 0.905

So corrected production of toluene is:

In [None]:
btx_2013.loc["Toluene", "icis_2020"] - (btx_2013.loc["Xylenes", "icis_2020"] * 0.082 + btx_2013.loc["Benzene", "icis_2020"] * 0.166) / 0.905

In [None]:
btx_2013.loc["Toluene", "icis_2020_corrected"] = (
    btx_2013.loc["Toluene", "icis_2020"] - 
    (btx_2013.loc["Xylenes", "icis_2020"] * 0.082 + btx_2013.loc["Benzene", "icis_2020"] * 0.166) / 0.905
)
btx_2013.loc["Xylenes", "icis_2020_corrected"] = btx_2013.loc["Xylenes", "icis_2020"]
btx_2013.loc["Benzene", "icis_2020_corrected"] = btx_2013.loc["Benzene", "icis_2020"]
btx_2013

In [None]:
extra_demand = btx_2013["icis_2020_corrected"] - btx_2013["model"]
extra_demand

Going to future years, we will assume that the same fraction of the ICIS production