In [1]:
import bw2data as bd
from collections import defaultdict
from tqdm import tqdm
from thefuzz import fuzz
from gsa_framework.utils import write_pickle, read_pickle
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import lognorm
import copy

In [2]:
bd.projects.set_current('GSA for archetypes')

In [3]:
bd.databases

Databases dictionary with 3 object(s):
	biosphere3
	ecoinvent 3.8 cutoff
	swiss consumption 1.0

The inspiration for these virtual markets was the input of 'soybean' to 'market for soybean, feed' which has a reference product 'soybean, feed'. We can't just test exact matching, need to be a bit [more flexible](https://github.com/seatgeek/thefuzz) on these virtual markets.

In [4]:
def similar(a, b):
    return fuzz.partial_ratio(a, b) > 90 or fuzz.ratio(a, b) > 40

In [5]:
def find_uncertain_virtual_markets(database):
    db = bd.Database(database)

    found = {}
    
    for act in tqdm(db):
        rp = act.get("reference product")
        if not rp:
            continue
            
        inpts = defaultdict(list)
        for exc in act.technosphere():
            if exc.input == exc.output:
                continue
            elif exc['uncertainty type'] < 2:
                continue
            inpts[exc.input['reference product']].append(exc)
            
        for key, lst in inpts.items():
            if len(lst) > 1 and similar(rp, key) and 0.98 <= sum([exc['amount'] for exc in lst]) <= 1.02:
                found[act] = lst
            
    return found

In [6]:
# found = find_uncertain_virtual_markets("ecoinvent 3.8 cutoff")
# write_pickle(found, "implicit_markets.pickle")
found = read_pickle("implicit_markets.pickle")

In [7]:
ng = list(found)[6]
ng, found[ng], len(found[ng])

('natural gas, high pressure, import from NL' (cubic meter, CH, None),
 [Exchange: 0.2840284 cubic meter 'petroleum and gas production, off-shore' (cubic meter, NL, None) to 'natural gas, high pressure, import from NL' (cubic meter, CH, None)>,
  Exchange: 0.7160716 cubic meter 'petroleum and gas production, on-shore' (cubic meter, NL, None) to 'natural gas, high pressure, import from NL' (cubic meter, CH, None)>],
 2)

# Sampling with presamples

In [19]:
ei_name = "ecoinvent 3.8 cutoff"

in_total = sum([exc['amount'] for exc in found[ng]])
out_total = 1
static_ratio = in_total / out_total if out_total != 0 else inf
static_balance = in_total - out_total

activity_params = []
for i,exc in enumerate(found[ng]):
#     if 'formula' in exc:
#         print(i)
#         break
    param_name = f"market_param_{i}"
    activity_params.append(
        {
            'name': param_name,
            'amount': exc.get('amount', 0),
            'uncertainty type': exc.get('uncertainty type', 0),
            'loc': exc.get('loc', exc.get('amount', 0)),
            'scale': exc.get('scale'),
            'negative': exc.get('negative', False),
            'database': ei_name,
            'code': ng.get('code'),
        }
    )
    if exc.get('uncertainty type', 0) > 1:
        exc['formula'] = "{} * scaling".format(param_name)
    else:
        exc['formula'] = param_name
    exc.save()
    if exc.get('variable name', False):
        exc['variable name temp'] = exc['variable name']
        exc['variable name'] = []
        exc.save()
    
activity_params.append(
    {
        'name': 'static_ratio',
        'database': ei_name,
        'code': ng['code'],
        'amount': static_ratio,
        'uncertainty type': 0,
        'loc': static_ratio,
    }
)
out_term = "1"
const_in_term = "0"
var_in_term = "(market_param_0 + market_param_1)"
activity_params.append(
    {
        'name': 'scaling',
        'formula': "({}*{}-{})/({})".format(static_ratio, out_term, const_in_term, var_in_term),
        'database': ei_name,
        'code': ng['code'],
    },
)
activity_params.append(
    {
        'name': 'ratio',
        'formula': "(scaling * {} + {})/{}".format(var_in_term, const_in_term, out_term),
        'database': ei_name,
        'code': ng['code'],
    },
)

group = 'my_market_2022_04'
iterations = 10
bd.parameters.new_activity_parameters(activity_params, group, True)
# bd.parameters.add_exchanges_to_group(group, ng)
bd.parameters.recalculate()
# pbm = PBM(group)
# pbm.load_parameter_data()
# pbm.calculate_stochastic(iterations, update_amounts=True)
# pbm.calculate_matrix_presamples()

MissingName: The following variables aren't defined:
hard_RU|hard_ROW|lig_BA|lig_ROW|lig_RU

In [None]:
ei

In [None]:
bd.parameters.remove_from_group??

In [None]:
from bw2data.parameters import ActivityParameter
ActivityParameter

In [None]:
list(ActivityParameter.group)

In [None]:
list(ng.exchanges())[4].as_dict()

We can use the [dirichlet](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.dirichlet.html) to model parameters with a fixed sum, but this distribution is sensitive to the concentration values.

In [None]:
from scipy.stats import dirichlet
import numpy as np
import seaborn as sb

In [None]:
current = found[ng]

x = np.array([exc['amount'] for exc in current])
alpha = x.copy()

dirichlet.mean(alpha)

In [None]:
get_var = lambda a,b: a*b/(a+b)**2/(a+b+1)

In [None]:
ralpha = alpha*2287.723635997957*2 #*642.983550493241*2
rvs = dirichlet.rvs(ralpha, size=1000)
rvs4 = rvs[:,4]

ralpha = alpha*642.983550493241*2
rvs = dirichlet.rvs(ralpha, size=1000)
rvs2 = rvs[:,2]
# sb.displot(rvs[:, 0])

In [None]:
rvs4 + rvs2

In [None]:
iexc = 4
s = found[ng][iexc]['scale']
mu = found[ng][iexc]['loc']
get_var(ralpha[iexc], sum(ralpha)-ralpha[iexc]) / ((np.exp(s**2)-1)*np.exp(2*mu+s**2))

In [None]:
fig = make_subplots(rows=len(current))
num_bins = 100
for i,exc in enumerate(current):
    Y = rvs[:,i]
    bin_min = min(Y)
    bin_max = max(Y)
    bins_ = np.linspace(bin_min, bin_max, num_bins, endpoint=True)
    Y_samples, _ = np.histogram(Y, bins=bins_, density=True)
    # Given distribution
    if exc['uncertainty type']==2:
        num_bins = 60
        loc = exc['loc']
        scale = exc['scale']  
        midbins = (bins_[1:]+bins_[:-1])/2
        Y_distr = lognorm.pdf(midbins, s=scale, scale=np.exp(loc))
    else:
        print(i,exc)
    fig.add_trace(
        go.Scatter(
            x = midbins,
            y = Y_samples,
            line_color = 'blue',
        ),
        row=i+1,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            x = midbins,
            y = Y_distr,
            line_color = 'red',
        ),
        row=i+1,
        col=1,
    )
fig.update_layout(width=500,height=len(current)*300)
fig.show()

In [None]:
found[ng][1].as_dict()

In [None]:
np.exp(-2.121321856799328)

In [None]:
s = 0.07280109889280519
mu = -2.121321856799328
a = (np.exp(s**2) + 2)*np.sqrt(np.exp(s**2)-1)
np.log(4.5*(a**2)) / 0.11987306870694

In [None]:
rvs = dirichlet.rvs(alpha * 12.784821071155195, size=1000)

In [None]:
lca

In [None]:
ei = bd.Database('ecoinvent 3.8 cutoff')
fu = ei.random()

In [None]:
import bw2analyzer as bwa
import bw2calc as bc
ipcc = ('IPCC 2013', 'climate change', 'GWP 100a')
lca = bc.LCA({fu: 1}, ipcc)
lca.lci()
lca.lcia()
lca.score

In [None]:
ca.annotated_top_processes?

In [None]:
ca = bwa.ContributionAnalysis()
test = ca.annotated_top_processes(lca)
data = [
    {
        "impact": element[0], 
        "amount": element[1], 
        "activity_name": element[2].get('name'), 
        "activity_unit": element[2].get('unit'), 
        "activity_location": element[2].get('location'),
    }
    for element in test
]
pd.DataFrame(data)

In [None]:
test[0][2], type(test[0][2])

In [None]:
sb.displot(rvs[:, 1])

In [None]:
rvs = dirichlet.rvs(alpha * 500, size=1000)

In [None]:
sb.displot(rvs[:, 0])

In [None]:
sb.displot(rvs[:, 1])

We can use these new values in Monte Carlo assessment (in place of the independent sampling which results in broken mass balances). The exact approach here will probably be different; for example, one could use trade statistics to create regional markets with much higher precision.

The underlying concepts in the following are documented in [bw_processing](https://github.com/brightway-lca/bw_processing) and [matrix_utils](https://github.com/brightway-lca/matrix_utils). In this notebook we will use in-memory datapackages for our fixes.

In [None]:
import bw_processing as bwp

In [None]:
indices_array = np.array([(exc.input.id, exc.output.id) for exc in found[ng]], dtype=bwp.INDICES_DTYPE)

# Redefine alpha to make sure order is consistent
# Transpose to get rows or exchange indices, columns of possible values
data_array = dirichlet.rvs(np.array([exc['amount'] for exc in found[ng]]) * 500, size=1000).T

# technosphere inputs must be flipped
flip_array = np.ones(len(found[ng]), dtype=bool)

In [None]:
dp = bwp.create_datapackage()

In [None]:
dp.add_persistent_array(
    matrix="technosphere_matrix",
    data_array=data_array,
    name="ng-fix-dz-es",
    indices_array=indices_array,
    flip_array=flip_array,
)

Compare Monte Carlo results with and without the fix

In [None]:
ipcc = ('IPCC 2013', 'climate change', 'GWP 100a')

In [None]:
_, data_objs, _ = bd.prepare_lca_inputs({ng: 1}, method=ipcc)

Default is to use three datapackages: biosphere database, ecoinvent database, and LCIA method

In [None]:
data_objs

In [None]:
import bw2calc as bc

In [None]:
lca = bc.LCA({ng.id: 1}, data_objs=data_objs, use_distributions=True)
lca.lci()
lca.lcia()

In [None]:
unmodified = np.array([lca.score for _ in zip(lca, range(250))])

In [None]:
fixed = bc.LCA({ng.id: 1}, data_objs=data_objs + [dp], use_arrays=True, use_distributions=True)
fixed.lci()
fixed.lcia()

In [None]:
modified = np.array([fixed.score for _ in zip(fixed, range(250))])

Uncertainty for this example is not huge, so difference is not obvious

In [None]:
np.mean(modified), np.std(modified), np.mean(unmodified), np.std(modified)

In [None]:
for exc in found[ng]:
    lca.redo_lcia({exc.input.id: 1})
    print(lca.score)

In [None]:
for exc in found[ng]:
    print(exc['scale'])

In [None]:
sum([
    lca.technosphere_matrix[lca.dicts.product[row], lca.dicts.activity[col]]
    for row, col in indices_array
])

In [None]:
sum([
    fixed.technosphere_matrix[fixed.dicts.product[row], fixed.dicts.activity[col]]
    for row, col in indices_array
])

In [None]:
sb.displot(unmodified, kde=True)

In [None]:
sb.displot(modified, kde=True)

# 