Skip to content

Commit

Permalink
Add biosphere and technosphere linking
Browse files Browse the repository at this point in the history
  • Loading branch information
cmutel committed Sep 20, 2023
1 parent abfd19d commit 9e16c17
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 11 deletions.
21 changes: 15 additions & 6 deletions bw_hestia_bridge/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,25 @@
from bw2io.importers.base_lci import LCIImporter
from bw2io.strategies import add_database_name, normalize_units

from . import get_config, set_config
from . import set_config
from .hestia_api import get_hestia_node
from .strategies import add_code_from_hestia_attributes, convert
from .strategies import (
add_code_from_hestia_attributes,
convert,
drop_zeros,
link_ecoinvent_biosphere,
link_ecoinvent_technosphere,
)


class HestiaImporter(LCIImporter):
def __init__(
self,
cycle_id: str,
ecoinvent_label: str,
data_state: Literal["original", "recalculated"] = "recalculated",
staging: Optional[bool] = False,
biosphere_label: Optional[str] = "biosphere3",
) -> None:
"""
Import a Hestia cycle as a Brightway database.
Expand All @@ -30,7 +38,6 @@ def __init__(
Whether to fetch the cycle from the staging Hestia API.
"""
# move to staging if necessary
old_staging = get_config("use_staging")
set_config("use_staging", staging)

# initialize variables
Expand All @@ -45,9 +52,11 @@ def __init__(
self.strategies = [
convert,
normalize_units,
drop_zeros,
add_code_from_hestia_attributes,
partial(add_database_name, name=self.db_name),
partial(link_ecoinvent_biosphere, biosphere_label=biosphere_label),
partial(
link_ecoinvent_technosphere, ecoinvent_database_label=ecoinvent_label
),
]

# revert config to initial value
set_config("use_staging", old_staging)
7 changes: 6 additions & 1 deletion bw_hestia_bridge/strategies/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
from .conversion import Converter, convert # NOQA: F401
from .linking import add_code_from_hestia_attributes # NOQA: F401
from .generic import drop_zeros # NOQA: F401
from .linking import ( # NOQA: F401
add_code_from_hestia_attributes,
link_ecoinvent_biosphere,
link_ecoinvent_technosphere,
)
16 changes: 13 additions & 3 deletions bw_hestia_bridge/strategies/conversion.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from collections import defaultdict
from functools import lru_cache
from typing import Optional, Tuple
Expand Down Expand Up @@ -52,17 +53,26 @@ def get_basic_metadata(self, obj: dict) -> dict:
"defaultMethodClassification",
"defaultMethodClassificationDescription",
}

site = self.get_site(obj["site"]["@id"])

return {
"@id": obj["@id"],
"comment": obj.get("description"),
"location": self.get_location(obj["site"]["@id"]),
"location": site["name"],
"hestia_site_id": obj["site"]["@id"],
"hestia_site": site,
"type": "process",
"extra_metadata": {key: obj[key] for key in EXTRAS if key in obj},
}

@lru_cache
def get_location(self, node_id: str) -> str:
return get_hestia_node(node_type="site", node_id=node_id)["name"]
def get_site(self, node_id: str) -> str:
location = get_hestia_node(node_type="site", node_id=node_id)
if "name" not in location:
warnings.warn(f"Can't find location {node_id}; using `GLO` instead")
return "GLO"
return location

def _add_suffixed(
self, source: dict, target: dict, label: str, fields: set, currency: bool
Expand Down
9 changes: 9 additions & 0 deletions bw_hestia_bridge/strategies/generic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
def drop_zeros(data: list) -> list:
for ds in data:
if "exchanges" in ds:
ds["exchanges"] = [
exc
for exc in ds["exchanges"]
if ("amount" not in exc or exc["amount"] != 0)
]
return data
119 changes: 118 additions & 1 deletion bw_hestia_bridge/strategies/linking.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,126 @@
import csv
from collections import defaultdict
from pathlib import Path
from typing import Optional

import bw2data as bd
from bw2io import activity_hash
from constructive_geometries import Geomatcher

DATA_DIR = Path(__file__).parent.parent.resolve() / "data"


def add_code_from_hestia_attributes(data):
def add_code_from_hestia_attributes(data: list) -> list:
FIELDS = ("name", "unit", "reference product", "transformation_id")

for obj in data:
obj["code"] = activity_hash(obj, fields=FIELDS)
return data


def link_ecoinvent_technosphere(data: list, ecoinvent_database_label: str) -> list:
csv_fp = DATA_DIR / "ecoinvent_mappings_technosphere.csv"

with open(csv_fp) as csvfile:
reader = csv.reader(csvfile)
# Skip header row
next(reader)
hestia_mapping = dict(reader)

geo = Geomatcher()

# Starts as Hestia @id to ecoinvent name
# Switch to ecoinvent name to [@ids]
hestia_reverse = defaultdict(list)

for key, value in hestia_mapping.items():
hestia_reverse[value].append(key)

hestia_possibles = defaultdict(list)

for ds in bd.Database(ecoinvent_database_label):
for hestia_term_id in hestia_reverse.get(ds["name"], []):
hestia_possibles[hestia_term_id].append(ds)

for ds in data:
try:
overlapping = geo.within(ds["location"])[::-1]
except KeyError:
overlapping = []

if ds.get("type", "process") != "process":
continue
for exc in ds.get("exchanges"):
if exc["type"] == "technosphere" and "input" not in exc:
pick_from_overlapping(
exc, hestia_possibles.get(exc["term_id"], []), overlapping
)

return data


def pick_from_overlapping(
exc: dict,
possibles: defaultdict,
overlapping: list,
) -> None:
"""Try to find a provider of the supplied demand.
`exc` is a dictionary; we use `term_id` (e.g. `manureSaltsKgK2O`) to search
in `possibles`.
`possibles` is a dictionary with keys of Hestia term `@id` keys, and values
of bw2data processes which were mapped against the Hestia term ids in
"ecoinvent_mappings_technosphere.csv".
`overlapping` is an ordered list, from smallest to largest, or each region in
ecoinvent which completely overlaps the `exc` dataset's location.
We try to find the process in the correct `possibles` value list which most
closely matches the `exc` dataset location. We also use fallback locations if
necessary. The fallback locations are `RoE` (rest of Europe), `RoW` (rest of
world), and `GLO` (global).
If a match is found, adds `input` to the `exc`.
"""
mapping = {ds["location"]: ds for ds in possibles}

for location in overlapping:
if location in mapping:
exc["input"] = mapping[location].key
return
if (
any("Europe" in geo for geo in overlapping) or ("RER" in overlapping)
) and "RoE" in mapping:
exc["input"] = mapping["RoE"].key
elif "RoW" in mapping:
exc["input"] = mapping["RoW"].key
elif "GLO" in mapping:
exc["input"] = mapping["GLO"].key


def link_ecoinvent_biosphere(
data: list, biosphere_label: Optional[str] = "biosphere3"
) -> list:
csv_fp = DATA_DIR / "ecoinvent_mappings_biosphere.csv"

with open(csv_fp) as csvfile:
reader = csv.reader(csvfile)
# Skip header row
next(reader)
hestia_mapping = {
key: bd.get_node(code=value).key for key, value in reader if value
}

for ds in data:
if ds.get("type", "process") != "process":
continue
for exc in ds.get("exchanges"):
if exc.get("type") == "biosphere" and "input" not in exc:
try:
exc["input"] = hestia_mapping[exc["term_id"]]
except KeyError:
pass

return data
156 changes: 156 additions & 0 deletions dev/Test basic functionality.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b3f75293-1179-459c-bc77-c19f08c66285",
"metadata": {},
"outputs": [],
"source": [
"import bw2data as bd\n",
"import bw_hestia_bridge as bhb"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "35414d38-ce77-422c-a99e-2ec065d3a850",
"metadata": {},
"outputs": [],
"source": [
"bd.projects.set_current(\"ecoinvent-3.9-cutoff\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "64666ee5-3808-4464-811c-6d281ff9a93b",
"metadata": {},
"outputs": [],
"source": [
"imp = bhb.HestiaImporter(\"__shpuarrure\", \"ecoinvent-3.9-cutoff\", staging=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "eb8af71c-946d-49d1-aaad-7cf614baba4e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Applying strategy: convert\n",
"Applying strategy: normalize_units\n",
"Applying strategy: drop_zeros\n",
"Applying strategy: add_code_from_hestia_attributes\n",
"Applying strategy: add_database_name\n",
"Applying strategy: link_ecoinvent_biosphere\n",
"Applying strategy: link_ecoinvent_technosphere\n",
"Geomatcher: Used 'MY' for 'Cropland - Malaysia'\n",
"Geomatcher: Used 'MY' for 'Cropland - Malaysia'\n",
"Applied 7 strategies in 4.34 seconds\n"
]
}
],
"source": [
"imp.apply_strategies()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "21080250-d0ba-4e52-8e51-6c6391d89656",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7 datasets\n",
"51 exchanges\n",
"15 unlinked exchanges\n",
" Type production: 6 unique unlinked exchanges\n",
" Type technosphere: 9 unique unlinked exchanges\n"
]
},
{
"data": {
"text/plain": [
"(7, 51, 15)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"imp.statistics()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "5cd62a8d-405d-4b21-a554-39f6639f6ada",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'name': 'Palm empty fruit bunches (kg mass)', 'term_type': 'organicFertiliser', 'term_id': 'palmEmptyFruitBunchesKgMass', 'unit': 'kilogram', 'amount': 4250, 'group': 'Palm empty fruit bunches (kg mass)-0', 'type': 'technosphere', 'methodClassificationDescription': 'Best practice, assumed to what happens', 'methodClassification': 'estimated with assumptions'}\n",
"{'name': 'Palm oil mill effluent (kg mass)', 'term_type': 'organicFertiliser', 'term_id': 'palmOilMillEffluentKgMass', 'unit': 'kilogram', 'amount': 2540, 'group': 'Palm oil mill effluent (kg mass)-0', 'type': 'technosphere', 'methodClassificationDescription': 'Best practice, assumed to what happens', 'methodClassification': 'estimated with assumptions'}\n",
"{'name': 'Nitrogen in precipitation (kg N)', 'term_type': 'organicFertiliser', 'term_id': 'nitrogenInPrecipitationKgN', 'unit': 'kg N', 'amount': 17.5, 'group': 'Nitrogen in precipitation (kg N)-0', 'type': 'technosphere'}\n",
"{'name': 'Inorganic Magnesium fertiliser, unspecified (kg Mg)', 'term_type': 'inorganicFertiliser', 'term_id': 'inorganicMagnesiumFertiliserUnspecifiedKgMg', 'unit': 'kg Mg', 'amount': 21, 'group': 'Inorganic Magnesium fertiliser, unspecified (kg Mg)-0', 'type': 'technosphere'}\n",
"{'name': 'Glyphosate', 'term_type': 'pesticideAI', 'term_id': 'CAS-1071-83-6', 'unit': 'kg active ingredient', 'amount': 2.3, 'group': 'Glyphosate-0', 'type': 'technosphere'}\n",
"{'name': 'Cypermethrin', 'term_type': 'pesticideAI', 'term_id': 'CAS-52315-07-8', 'unit': 'kg active ingredient', 'amount': 0.3, 'group': 'Cypermethrin-0', 'type': 'technosphere'}\n",
"{'name': 'Fungicide unspecified (AI)', 'term_type': 'pesticideAI', 'term_id': 'fungicideUnspecifiedAi', 'unit': 'kg active ingredient', 'amount': 0.00018, 'group': 'Fungicide unspecified (AI)-0', 'type': 'technosphere'}\n",
"{'name': 'Warfarin', 'term_type': 'pesticideAI', 'term_id': 'CAS-81-81-2', 'unit': 'kg active ingredient', 'amount': 0.00022, 'group': 'Warfarin-0', 'type': 'technosphere'}\n",
"{'name': 'Saplings', 'term_type': 'other', 'term_id': 'saplings', 'unit': 'number', 'amount': 165, 'group': 'Saplings-0', 'type': 'technosphere', 'model': {'@type': 'Term', '@id': 'pooreNemecek2018', 'name': 'Poore & Nemecek (2018)', 'termType': 'model'}}\n",
"{'type': 'production', 'name': 'Oil palm, fruit', 'unit': 'kilogram', 'amount': 21000, 'transformation_id': None}\n",
"{'type': 'production', 'name': 'Oil palm, frond', 'unit': 'kilogram', 'amount': 10700, 'transformation_id': None}\n",
"{'type': 'production', 'name': 'Nitrogen uptake (above ground crop residue)', 'unit': 'kg N', 'amount': 153.54, 'transformation_id': None}\n",
"{'type': 'production', 'name': 'Phosphorus uptake (above ground crop residue)', 'unit': 'kg P', 'amount': 11.5, 'transformation_id': None}\n",
"{'type': 'production', 'name': 'Above ground crop residue, left on field', 'unit': 'kg dry matter', 'amount': 10700, 'transformation_id': None}\n",
"{'type': 'production', 'name': 'Above ground crop residue, removed', 'unit': 'kg dry matter', 'amount': 5018.3, 'transformation_id': None}\n"
]
}
],
"source": [
"for exc in imp.unlinked:\n",
" print(exc)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "917791d8-f361-4442-89d9-526b7880cdb9",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 9e16c17

Please sign in to comment.