Add biosphere and technosphere linking

brightway-lca · Sep 20, 2023 · 9e16c17 · 9e16c17
1 parent abfd19d
commit 9e16c17
Show file tree

Hide file tree

Showing 6 changed files with 317 additions and 11 deletions.
diff --git a/bw_hestia_bridge/importer.py b/bw_hestia_bridge/importer.py
@@ -4,17 +4,25 @@
 from bw2io.importers.base_lci import LCIImporter
 from bw2io.strategies import add_database_name, normalize_units
 
-from . import get_config, set_config
+from . import set_config
 from .hestia_api import get_hestia_node
-from .strategies import add_code_from_hestia_attributes, convert
+from .strategies import (
+    add_code_from_hestia_attributes,
+    convert,
+    drop_zeros,
+    link_ecoinvent_biosphere,
+    link_ecoinvent_technosphere,
+)
 
 
 class HestiaImporter(LCIImporter):
     def __init__(
         self,
         cycle_id: str,
+        ecoinvent_label: str,
         data_state: Literal["original", "recalculated"] = "recalculated",
         staging: Optional[bool] = False,
+        biosphere_label: Optional[str] = "biosphere3",
     ) -> None:
         """
         Import a Hestia cycle as a Brightway database.
@@ -30,7 +38,6 @@ def __init__(
             Whether to fetch the cycle from the staging Hestia API.
         """
         # move to staging if necessary
-        old_staging = get_config("use_staging")
         set_config("use_staging", staging)
 
         # initialize variables
@@ -45,9 +52,11 @@ def __init__(
         self.strategies = [
             convert,
             normalize_units,
+            drop_zeros,
             add_code_from_hestia_attributes,
             partial(add_database_name, name=self.db_name),
+            partial(link_ecoinvent_biosphere, biosphere_label=biosphere_label),
+            partial(
+                link_ecoinvent_technosphere, ecoinvent_database_label=ecoinvent_label
+            ),
         ]
-
-        # revert config to initial value
-        set_config("use_staging", old_staging)
diff --git a/bw_hestia_bridge/strategies/__init__.py b/bw_hestia_bridge/strategies/__init__.py
@@ -1,2 +1,7 @@
 from .conversion import Converter, convert  # NOQA: F401
-from .linking import add_code_from_hestia_attributes  # NOQA: F401
+from .generic import drop_zeros  # NOQA: F401
+from .linking import (  # NOQA: F401
+    add_code_from_hestia_attributes,
+    link_ecoinvent_biosphere,
+    link_ecoinvent_technosphere,
+)
diff --git a/bw_hestia_bridge/strategies/conversion.py b/bw_hestia_bridge/strategies/conversion.py
@@ -1,3 +1,4 @@
+import warnings
 from collections import defaultdict
 from functools import lru_cache
 from typing import Optional, Tuple
@@ -52,17 +53,26 @@ def get_basic_metadata(self, obj: dict) -> dict:
             "defaultMethodClassification",
             "defaultMethodClassificationDescription",
         }
+
+        site = self.get_site(obj["site"]["@id"])
+
         return {
             "@id": obj["@id"],
             "comment": obj.get("description"),
-            "location": self.get_location(obj["site"]["@id"]),
+            "location": site["name"],
+            "hestia_site_id": obj["site"]["@id"],
+            "hestia_site": site,
             "type": "process",
             "extra_metadata": {key: obj[key] for key in EXTRAS if key in obj},
         }
 
     @lru_cache
-    def get_location(self, node_id: str) -> str:
-        return get_hestia_node(node_type="site", node_id=node_id)["name"]
+    def get_site(self, node_id: str) -> str:
+        location = get_hestia_node(node_type="site", node_id=node_id)
+        if "name" not in location:
+            warnings.warn(f"Can't find location {node_id}; using `GLO` instead")
+            return "GLO"
+        return location
 
     def _add_suffixed(
         self, source: dict, target: dict, label: str, fields: set, currency: bool

diff --git a/bw_hestia_bridge/strategies/generic.py b/bw_hestia_bridge/strategies/generic.py
@@ -0,0 +1,9 @@
+def drop_zeros(data: list) -> list:
+    for ds in data:
+        if "exchanges" in ds:
+            ds["exchanges"] = [
+                exc
+                for exc in ds["exchanges"]
+                if ("amount" not in exc or exc["amount"] != 0)
+            ]
+    return data
diff --git a/bw_hestia_bridge/strategies/linking.py b/bw_hestia_bridge/strategies/linking.py
@@ -1,9 +1,126 @@
+import csv
+from collections import defaultdict
+from pathlib import Path
+from typing import Optional
+
+import bw2data as bd
 from bw2io import activity_hash
+from constructive_geometries import Geomatcher
+
+DATA_DIR = Path(__file__).parent.parent.resolve() / "data"
 
 
-def add_code_from_hestia_attributes(data):
+def add_code_from_hestia_attributes(data: list) -> list:
     FIELDS = ("name", "unit", "reference product", "transformation_id")
 
     for obj in data:
         obj["code"] = activity_hash(obj, fields=FIELDS)
     return data
+
+
+def link_ecoinvent_technosphere(data: list, ecoinvent_database_label: str) -> list:
+    csv_fp = DATA_DIR / "ecoinvent_mappings_technosphere.csv"
+
+    with open(csv_fp) as csvfile:
+        reader = csv.reader(csvfile)
+        # Skip header row
+        next(reader)
+        hestia_mapping = dict(reader)
+
+    geo = Geomatcher()
+
+    # Starts as Hestia @id to ecoinvent name
+    # Switch to ecoinvent name to [@ids]
+    hestia_reverse = defaultdict(list)
+
+    for key, value in hestia_mapping.items():
+        hestia_reverse[value].append(key)
+
+    hestia_possibles = defaultdict(list)
+
+    for ds in bd.Database(ecoinvent_database_label):
+        for hestia_term_id in hestia_reverse.get(ds["name"], []):
+            hestia_possibles[hestia_term_id].append(ds)
+
+    for ds in data:
+        try:
+            overlapping = geo.within(ds["location"])[::-1]
+        except KeyError:
+            overlapping = []
+
+        if ds.get("type", "process") != "process":
+            continue
+        for exc in ds.get("exchanges"):
+            if exc["type"] == "technosphere" and "input" not in exc:
+                pick_from_overlapping(
+                    exc, hestia_possibles.get(exc["term_id"], []), overlapping
+                )
+
+    return data
+
+
+def pick_from_overlapping(
+    exc: dict,
+    possibles: defaultdict,
+    overlapping: list,
+) -> None:
+    """Try to find a provider of the supplied demand.
+
+    `exc` is a dictionary; we use `term_id` (e.g. `manureSaltsKgK2O`) to search
+    in `possibles`.
+
+    `possibles` is a dictionary with keys of Hestia term `@id` keys, and values
+    of bw2data processes which were mapped against the Hestia term ids in
+    "ecoinvent_mappings_technosphere.csv".
+
+    `overlapping` is an ordered list, from smallest to largest, or each region in
+    ecoinvent which completely overlaps the `exc` dataset's location.
+
+    We try to find the process in the correct `possibles` value list which most
+    closely matches the `exc` dataset location. We also use fallback locations if
+    necessary. The fallback locations are `RoE` (rest of Europe), `RoW` (rest of
+    world), and `GLO` (global).
+
+    If a match is found, adds `input` to the `exc`.
+
+    """
+    mapping = {ds["location"]: ds for ds in possibles}
+
+    for location in overlapping:
+        if location in mapping:
+            exc["input"] = mapping[location].key
+            return
+    if (
+        any("Europe" in geo for geo in overlapping) or ("RER" in overlapping)
+    ) and "RoE" in mapping:
+        exc["input"] = mapping["RoE"].key
+    elif "RoW" in mapping:
+        exc["input"] = mapping["RoW"].key
+    elif "GLO" in mapping:
+        exc["input"] = mapping["GLO"].key
+
+
+def link_ecoinvent_biosphere(
+    data: list, biosphere_label: Optional[str] = "biosphere3"
+) -> list:
+    csv_fp = DATA_DIR / "ecoinvent_mappings_biosphere.csv"
+
+    with open(csv_fp) as csvfile:
+        reader = csv.reader(csvfile)
+        # Skip header row
+        next(reader)
+        hestia_mapping = {
+            key: bd.get_node(code=value).key for key, value in reader if value
+        }
+
+    for ds in data:
+        if ds.get("type", "process") != "process":
+            continue
+        for exc in ds.get("exchanges"):
+            if exc.get("type") == "biosphere" and "input" not in exc:
+                try:
+                    exc["input"] = hestia_mapping[exc["term_id"]]
+                except KeyError:
+                    pass
+
+    return data
diff --git a/dev/Test basic functionality.ipynb b/dev/Test basic functionality.ipynb
@@ -0,0 +1,156 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "b3f75293-1179-459c-bc77-c19f08c66285",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import bw2data as bd\n",
+    "import bw_hestia_bridge as bhb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "35414d38-ce77-422c-a99e-2ec065d3a850",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bd.projects.set_current(\"ecoinvent-3.9-cutoff\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "64666ee5-3808-4464-811c-6d281ff9a93b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "imp = bhb.HestiaImporter(\"__shpuarrure\", \"ecoinvent-3.9-cutoff\", staging=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "eb8af71c-946d-49d1-aaad-7cf614baba4e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Applying strategy: convert\n",
+      "Applying strategy: normalize_units\n",
+      "Applying strategy: drop_zeros\n",
+      "Applying strategy: add_code_from_hestia_attributes\n",
+      "Applying strategy: add_database_name\n",
+      "Applying strategy: link_ecoinvent_biosphere\n",
+      "Applying strategy: link_ecoinvent_technosphere\n",
+      "Geomatcher: Used 'MY' for 'Cropland - Malaysia'\n",
+      "Geomatcher: Used 'MY' for 'Cropland - Malaysia'\n",
+      "Applied 7 strategies in 4.34 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "imp.apply_strategies()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "21080250-d0ba-4e52-8e51-6c6391d89656",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "7 datasets\n",
+      "51 exchanges\n",
+      "15 unlinked exchanges\n",
+      "  Type production: 6 unique unlinked exchanges\n",
+      "  Type technosphere: 9 unique unlinked exchanges\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(7, 51, 15)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "imp.statistics()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "5cd62a8d-405d-4b21-a554-39f6639f6ada",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'name': 'Palm empty fruit bunches (kg mass)', 'term_type': 'organicFertiliser', 'term_id': 'palmEmptyFruitBunchesKgMass', 'unit': 'kilogram', 'amount': 4250, 'group': 'Palm empty fruit bunches (kg mass)-0', 'type': 'technosphere', 'methodClassificationDescription': 'Best practice, assumed to what happens', 'methodClassification': 'estimated with assumptions'}\n",
+      "{'name': 'Palm oil mill effluent (kg mass)', 'term_type': 'organicFertiliser', 'term_id': 'palmOilMillEffluentKgMass', 'unit': 'kilogram', 'amount': 2540, 'group': 'Palm oil mill effluent (kg mass)-0', 'type': 'technosphere', 'methodClassificationDescription': 'Best practice, assumed to what happens', 'methodClassification': 'estimated with assumptions'}\n",
+      "{'name': 'Nitrogen in precipitation (kg N)', 'term_type': 'organicFertiliser', 'term_id': 'nitrogenInPrecipitationKgN', 'unit': 'kg N', 'amount': 17.5, 'group': 'Nitrogen in precipitation (kg N)-0', 'type': 'technosphere'}\n",
+      "{'name': 'Inorganic Magnesium fertiliser, unspecified (kg Mg)', 'term_type': 'inorganicFertiliser', 'term_id': 'inorganicMagnesiumFertiliserUnspecifiedKgMg', 'unit': 'kg Mg', 'amount': 21, 'group': 'Inorganic Magnesium fertiliser, unspecified (kg Mg)-0', 'type': 'technosphere'}\n",
+      "{'name': 'Glyphosate', 'term_type': 'pesticideAI', 'term_id': 'CAS-1071-83-6', 'unit': 'kg active ingredient', 'amount': 2.3, 'group': 'Glyphosate-0', 'type': 'technosphere'}\n",
+      "{'name': 'Cypermethrin', 'term_type': 'pesticideAI', 'term_id': 'CAS-52315-07-8', 'unit': 'kg active ingredient', 'amount': 0.3, 'group': 'Cypermethrin-0', 'type': 'technosphere'}\n",
+      "{'name': 'Fungicide unspecified (AI)', 'term_type': 'pesticideAI', 'term_id': 'fungicideUnspecifiedAi', 'unit': 'kg active ingredient', 'amount': 0.00018, 'group': 'Fungicide unspecified (AI)-0', 'type': 'technosphere'}\n",
+      "{'name': 'Warfarin', 'term_type': 'pesticideAI', 'term_id': 'CAS-81-81-2', 'unit': 'kg active ingredient', 'amount': 0.00022, 'group': 'Warfarin-0', 'type': 'technosphere'}\n",
+      "{'name': 'Saplings', 'term_type': 'other', 'term_id': 'saplings', 'unit': 'number', 'amount': 165, 'group': 'Saplings-0', 'type': 'technosphere', 'model': {'@type': 'Term', '@id': 'pooreNemecek2018', 'name': 'Poore & Nemecek (2018)', 'termType': 'model'}}\n",
+      "{'type': 'production', 'name': 'Oil palm, fruit', 'unit': 'kilogram', 'amount': 21000, 'transformation_id': None}\n",
+      "{'type': 'production', 'name': 'Oil palm, frond', 'unit': 'kilogram', 'amount': 10700, 'transformation_id': None}\n",
+      "{'type': 'production', 'name': 'Nitrogen uptake (above ground crop residue)', 'unit': 'kg N', 'amount': 153.54, 'transformation_id': None}\n",
+      "{'type': 'production', 'name': 'Phosphorus uptake (above ground crop residue)', 'unit': 'kg P', 'amount': 11.5, 'transformation_id': None}\n",
+      "{'type': 'production', 'name': 'Above ground crop residue, left on field', 'unit': 'kg dry matter', 'amount': 10700, 'transformation_id': None}\n",
+      "{'type': 'production', 'name': 'Above ground crop residue, removed', 'unit': 'kg dry matter', 'amount': 5018.3, 'transformation_id': None}\n"
+     ]
+    }
+   ],
+   "source": [
+    "for exc in imp.unlinked:\n",
+    "    print(exc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "917791d8-f361-4442-89d9-526b7880cdb9",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}