# Check implementation

In [None]:
import bw2data as bd
import bw_processing as bwp
from collections import defaultdict
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from fs.zipfs import ZipFS
from scipy.stats import lognorm

import sys
sys.path.append("/Users/akim/PycharmProjects/akula")
from akula.implicit_markets import DATA_DIR

bd.projects.set_current('GSA for archetypes')

fp_implicit_markets = DATA_DIR / "implicit-markets.zip"
dp = bwp.load_datapackage(ZipFS(fp_implicit_markets))

In [None]:
indices = dp.get_resource('implicit markets.indices')[0]
data = dp.get_resource('implicit markets.data')[0]
# distributions = dp.get_resource('implicit markets.distributions')[0]

In [None]:
found = defaultdict(list)
data_act = defaultdict(list)
for col in list(set(indices['col'])):
    rows = indices[indices['col']==col]['row']
    act = bd.get_activity(int(col))
    for exc in act.exchanges():
        if exc.input.id in rows:
            found[act].append(exc)
            row_col = np.array((exc.input.id, col), dtype=indices.dtype)
            where = np.where(indices == row_col)[0][0]
            data_act[act].append(data[where])

In [None]:
# pdf = matplotlib.backends.backend_pdf.PdfPages("implicit_market_figures.pdf")
write_figs = Path("implicit_markets")
write_figs.mkdir(exist_ok=True, parents=True)

dist_ = {}
num_bins = 100
count = 0
  
for ng, current in found.items():

    rows=len(current)
    showlegend = True
#     x = np.array([exc['amount'] for exc in current])
#     alpha = x.copy()
#     alpha_exc_dict = {alpha[i]: current[i] for i in range(len(alpha))}
#     scaling_factors = [get_dirichlet_scaling_factor(alpha_exc_dict), 250, 500]

#     scaling_factors_str = [f"SF={sf:5.3f}" for sf in scaling_factors]
    fig = make_subplots(
        rows=rows, 
        cols=1,
    )

    for i,exc in enumerate(current):
        Y = data_act[ng][i]
        bins_ = np.linspace(min(Y), max(Y), num_bins+1, endpoint=True)
        Y_samples, _ = np.histogram(Y, bins=bins_, density=True)
        # Given distribution
        assert exc['uncertainty type']==2
        loc = exc['loc']
        scale = exc['scale']  
        midbins = (bins_[1:]+bins_[:-1])/2
        Y_distr = lognorm.pdf(midbins, s=scale, scale=np.exp(loc))
        distance = np.sqrt(sum(Y_distr-Y_samples)**2)/max(Y_distr)

        fig.add_trace(
            go.Scatter(
                x = midbins,
                y = Y_samples,
                line_color = 'blue',
                name='Dirichlet samples',
                showlegend=showlegend,
            ),
            row=i+1,
            col=1,
        )
        fig.add_trace(
            go.Scatter(
                x = midbins,
                y = Y_distr,
                line_color = 'red',
                name='Defined distribution',
                showlegend=showlegend,
            ),
            row=i+1,
            col=1,
        )
        showlegend=False
        fig.update_yaxes(
            title_text=f"ED={distance:5.3f}",
            row=i+1,
            col=1,
        )
    fig.update_layout(
        width=300,
        height=250*rows,
        legend=dict(
            yanchor="top",
            y=-0.2,
            xanchor="left",
            x=0.01,
            orientation='h',
        )
    )
    fig.write_html(write_figs / "{}_{}_{}.html".format(count, ng['name'][:20], ng['location'][:3]))
    count += 1

# Plot implicit markets for paper 3

In [None]:
names1 = [
    r'$\text{heat and power,}$',
    r'$\text{electricity production,}$',
    r'$\text{electricity production,}$',
    r'$\text{heat and power,}$',
    r'$\text{electricity production,}$',
]
names2 = [
    r'$\text{hard coal, RU}$',
    r'$\text{lignite, BA}$',
    r'$\text{hard coal, ME}$',
    r'$\text{lignite, RU}$',
    r'$\text{lignite, ME}$',
]

In [None]:
color_gray_hex = "#b2bcc0"
color_darkgray_hex = "#485063"
color_black_hex = "#212931"
color_pink_rgb = "rgb(148, 52, 110)"
color_blue_rgb = "rgb(29,105,150)"
color_orange_rgb = "rgb(217,95,2)"
color_red_hex = "#ff2c54"
opacity=0.6
num_bins = 60

cols = 5
fig = make_subplots(
    rows=1, 
    cols=cols,
    horizontal_spacing=0.05,
    shared_yaxes=False,
#     subplot_titles=names2,
)
showlegend = True
for i,exc in enumerate(current):
    Y = data_act[ng][i]
    bins_ = np.linspace(min(Y), max(Y), num_bins+1, endpoint=True)
    Y_samples, _ = np.histogram(Y, bins=bins_, density=True)
    # Given distribution
    assert exc['uncertainty type']==2
    loc = exc['loc']
    scale = exc['scale']  
    midbins = (bins_[1:]+bins_[:-1])/2
    Y_distr = lognorm.pdf(midbins, s=scale, scale=np.exp(loc))
    distance = np.sqrt(sum(Y_distr-Y_samples)**2)/max(Y_distr)

    fig.add_trace(
        go.Scatter(
            x = midbins,
            y = Y_samples,
            line_color = color_darkgray_hex,
            name=r"$\text{Dirichlet samples}$",
            showlegend=showlegend,
            opacity=opacity,
            line=dict(color=color_blue_rgb, width=1, shape="hvh"),
            fill="tozeroy",
        ),
        row=1,
        col=i+1,
    )
    fig.add_trace(
        go.Scatter(
            x = midbins,
            y = Y_distr,
            line_color = color_red_hex,
            name=r"$\text{Defined lognormal distributions}$",
            showlegend=showlegend,
        ),
        row=1,
        col=i+1,
    )
    showlegend=False
    
fig.update_xaxes(
    title_text=r"$\text{Production volume share}$",
    showgrid=True,
    gridwidth=1,
    gridcolor=color_gray_hex,
    zeroline=True,
    zerolinewidth=1,
    zerolinecolor=color_black_hex,
    showline=True,
    linewidth=1,
    linecolor=color_gray_hex,
)

fig.update_yaxes(title_text=r"$\text{Frequency}$", col=1)
fig.update_yaxes(
    showgrid=True,
    gridwidth=1,
    gridcolor=color_gray_hex,
    zeroline=True,
    zerolinewidth=1,
    zerolinecolor=color_black_hex,
    showline=True,
    linewidth=1,
    linecolor=color_gray_hex,
)

xpos = [0.08, 0.29, 0.5, 0.71, 0.92]
for i in range(cols):
    fig.add_annotation(
        {
            'font': {'size': 14},
            'showarrow': False,
            'text': names1[i],
            'x': xpos[i]-0.08,
            'xanchor': 'left',
            'xref': 'paper',
            'y': 1.2,
            'yanchor': 'bottom',
            'yref': 'paper'
        }
    )
    fig.add_annotation(
        {
            'font': {'size': 14},
            'showarrow': False,
            'text': names2[i],
            'x': xpos[i]-0.08,
            'xanchor': 'left',
            'xref': 'paper',
            'y': 1.05,
            'yanchor': 'bottom',
            'yref': 'paper'
        }
    )
    
fig.update_layout(
    width=220*cols,
    height=250,
    legend=dict(
        yanchor="middle",
        y=-0.7,
        xanchor="center",
        x=0.5,
        orientation='h',
        font=dict(size=13),
        bordercolor=color_darkgray_hex,
        borderwidth=1,
    ),
    margin=dict(t=40, b=10, l=10, r=0),
    paper_bgcolor="rgba(255,255,255,1)",
    plot_bgcolor="rgba(255,255,255,1)",
)
fig.write_image(Path("impicit_market_samples.eps"))

# fig.write_html(write_figs / "{}_{}_{}.html".format(count, ng['name'][:20], ng['location'][:3]))
# count += 1

# Plot regression figures for Dirichlet scales for paper 3

In [59]:
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from gsa_framework.utils import read_pickle
import sys
sys.path.append("/Users/akim/PycharmProjects/akula")
from akula.markets import get_dirichlet_scales

In [60]:
# ei_name = "ecoinvent 3.8 cutoff"
# found = find_uncertain_implicit_markets(ei_name)
# markets = find_markets(ei_name)
# write_pickle(found, "implicit_markets.pickle")
# write_pickle(markets, "normal_markets.pickle")
ims = read_pickle("implicit_markets.pickle")
gms = read_pickle("generic_markets.pickle")

In [71]:
LMEAN_ID, NEXC_ID = 0, 1

def get_market_features(markets):
    from bw2analyzer.econ import gini_coefficient
    X = []
    for i, act in enumerate(markets.keys()):
        exchanges = markets[act]
        amounts = np.array([exc['amount'] for exc in exchanges])
        mean = np.mean(amounts)
        lmean = np.mean(amounts[amounts>=mean])
        n_excs = len(exchanges)
        X.append([lmean, n_excs])
    X = np.array(X)
    X_ = 1/X[:,LMEAN_ID]**3
    return X_.reshape((-1,1)), X

Xtrain, Xtrain_base = get_market_features(ims)
ytrain = get_dirichlet_scales(ims)
Xtest, Xtest_base = get_market_features(gms)
reg = LinearRegression().fit(Xtrain, ytrain)
ytest = Xtest * reg.coef_
ytest[ytest<50] = 50
ytest[ytest>10000] = 10000

In [72]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=Xtrain_base[:,0],
        y=ytrain,
        mode="markers",
        marker = dict(
            size=np.log(Xtrain_base[:,1])*10,  
            showscale=True,
            color=np.log(Xtrain_base[:,1])*10,
        )
    ),
)

fig.update_layout(
    width=800,
    height=450,
)

fig.update_xaxes(title_text="large mean")
fig.update_yaxes(title_text="Dirichlet scales")

In [73]:
fig = go.Figure()
xline = np.linspace(min(Xtrain[:,0]),max(Xtrain[:,0]),100)
fig.add_trace(
    go.Scatter(
        x=Xtrain[:,0],
        y=ytrain,
        mode='markers',
        marker_color="blue",
        name='Transformed data'
    )
)
fig.add_trace(
    go.Scatter(
        x=xline,
        y=reg.coef_[0]*xline,
        marker_color="red",
        name='Linear regression'
    )
)
fig.update_layout(
    width=700,
    height=400,
)

fig.update_xaxes(title_text="1/x**3")
fig.update_yaxes(title_text="Dirichlet scales")

In [74]:
max(ytest)

array([10000.])

In [75]:
xtest = Xtest_base[:,0]
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=xtest,
        y=ytest[:,0],
        mode='markers',
        marker = dict(
            size=np.log(Xtest_base[:,1])*10,  
            showscale=True,
            color=np.log(Xtest_base[:,1])*10,
        )
    )
)
fig.update_layout(
    width=800,
    height=450,
)

fig.update_xaxes(title_text="large mean")
fig.update_yaxes(title_text="Dirichlet scales")