In [1]:
import bw2data as bd
import bw2io as bi
import bw2calc as bc
import premise as pr
import bw_processing as bwp
import pandas as pd
import os

In [2]:
PREMISE_KEY = os.environ["PREMISE_KEY"]
assert PREMISE_KEY

In [3]:
bd.projects.set_current("premise bw25")

In [4]:
fp = "/Users/cmutel/Documents/lca/Ecoinvent/3.7.1/cutoff/datasets"

if "ecoinvent 3.7.1 cutoff" not in bd.databases:
    bi.bw2setup()
    ei = bi.SingleOutputEcospold2Importer(fp, "ecoinvent 3.7.1 cutoff")
    ei.apply_strategies()
    assert ei.all_linked
    ei.write_database()

In [5]:
ndb = pr.NewDatabase(
        scenarios=[
                {"model":"remind", "pathway":"SSP2-Base", "year":2028}
            ],
        source_db="ecoinvent 3.7.1 cutoff",
        source_version="3.7.1",
        key=PREMISE_KEY
    )


////////////////////// EXTRACTING SOURCE DATABASE ///////////////////////
Getting activity data


100%|█████████████████████████████████| 19128/19128 [00:00<00:00, 112221.40it/s]


Adding exchange data to activities


100%|████████████████████████████████| 621719/621719 [00:56<00:00, 10927.17it/s]


Filling out exchange data


100%|███████████████████████████████████| 19128/19128 [00:05<00:00, 3705.86it/s]


Set missing location of datasets to global scope.
Set missing location of production exchanges to scope of dataset.
Correct missing location of technosphere exchanges.
Correct missing flow categories for biosphere exchanges
Remove empty exchanges.

/////////////////// IMPORTING DEFAULT INVENTORIES ////////////////////
Importing necessary inventories...

Done!



In [6]:
from time import time
import io
from contextlib import redirect_stdout

In [7]:
start = time()

# avoid printing lots of stuff; available afterwards as a buffer in capture_log
with io.StringIO() as capture_log, redirect_stdout(capture_log):
    ndb.update_cars()
    ndb.update_trucks()
    ndb.update_electricity()
    ndb.update_cement()
    ndb.update_steel()
    ndb.update_solar_PV()

print("Took {} seconds".format(time() - start))

Europe  not found in regex
Europe  not found in regex
Europe  not found in regex


Took 248.21454405784607 seconds


In [8]:
len(ndb.db)

20108

In [9]:
{o['database'] for o in ndb.db}

{'CHP CCS',
 'Carma CCS',
 'Hydrogen from biogas SMR and ATR',
 'Hydrogen from coal Gasification',
 'Hydrogen from woody biomass gasification',
 'Methanol-based fuels from biogas hydrogen',
 'Methanol-based fuels from biomass hydrogen',
 'Methanol-based fuels from electrolysis',
 'Methanol-based fuels from natural gas hydrogen',
 'Methanol-based fuels with hydrogen from coal',
 'MobiTool - other vehicles',
 'biofuels',
 'biogas',
 'cement CCS-CCU',
 'direct air capture',
 'ecoinvent 3.7.1 cutoff',
 'geothermal',
 'hydrogen-electrolysis',
 'hydrogen-smr-natgas',
 'synfuel from FT from biomass',
 'synfuel from FT from biomass with CCS',
 'synfuel from FT from biomethane',
 'synfuel from FT from hydrogen from petroleum cracking',
 'synfuel from FT from natural gas',
 'synfuel from FT from natural gas with CCS',
 'synfuel from coal',
 'synfuel from electrolysis',
 'syngas',
 'syngas from coal'}

# Create a new export function

This already exists in `wurst`: [write_brightway25_database](https://github.com/polca/wurst/blob/master/wurst/brightway25/__init__.py#L30). However, `premise` doesn't follow `wurst` convention and set the `modified` flag on modified activities, so we will instead write a complete datapackage **without creating a `bw2data` database**. The metadata will only be stored in a CSV in the datapackage.

In [10]:
from wurst.linking import check_internal_linking, link_internal
from bw2data.backends import ActivityDataset as AD
from fs.zipfs import ZipFS
import numpy as np

In [11]:
class DatapackageWriter:
    def __init__(self, database, filesystem, name=None, dont_write=["biosphere3"], offset=100_000, **kwargs):
        self.db = database
        self.offset = offset
        self.dont_write = dont_write
        self.name = name
        
        if isinstance(filesystem, str):
            self.filesystem = ZipFS(filesystem, write=True)
        else:
            self.filesystem = filesystem
            
        # Check that each activity has a production exchange
        assert len(self.db) == sum([1 for obj in self.db if any(exc['type'] == 'production' for exc in obj['exchanges'])])
    
        link_internal(database) 
        check_internal_linking(database)
        
    def assign_ids(self):
        database_names = {exc['input'][0] for ds in self.db for exc in ds['exchanges']}
        id_mapping = {(ds.database, ds.code): ds.id for ds in AD.select().where(AD.database << database_names)}
        
        new_id_start = max(id_mapping.values()) + self.offset
        
        # Add ids for activities not in the bw2data database
        for i, ds in enumerate(self.db):
            if (ds['database'], ds['code']) not in id_mapping:
                id_mapping[(ds['database'], ds['code'])] = new_id_start + i
        
        for ds in self.db:
            col = id_mapping[(ds['database'], ds['code'])]
            ds['id'] = col
            for exc in ds['exchanges']:
                exc['col'] = col
                exc['row'] = id_mapping[exc['input']]

    def get_indices(self, itr):
        return np.array([(o['row'], o['col']) for o in itr], dtype=bwp.INDICES_DTYPE)
    
    def get_data(self, itr):
        return np.array([o['amount'] for o in itr])
    
    def get_flip(self, itr):
        return np.array([o['type'] in ('technosphere', 'generic consumption') for o in itr])
                
    def get_exchange_iterator(self, data, database_name, act_types=None, exc_types=None):
        act_filter = lambda ds: ds.get('type') in act_types if act_types else True
        exc_filter = lambda exc: exc.get('type') in exc_types if exc_types else True
        return (exc 
                for ds in data 
                for exc in ds['exchanges'] 
                if ds['database'] == database_name 
                and act_filter(ds)
                and exc_filter(exc)
               )

    def technosphere(self, data, database_name):
        return self.get_exchange_iterator(
            data=data,
            database_name=database_name,
            act_types=('process', None),
            exc_types=('production', 'substitution', 'generic production', 'technosphere', 'generic consumption')
        )

    def biosphere(self, data, database_name):
        return self.get_exchange_iterator(
            data=data,
            database_name=database_name,
            act_types=('process', None),
            exc_types=('biosphere',)
        )

    def write_datapackage(self, **kwargs):
        dp = bwp.create_datapackage(fs=self.filesystem, name=self.name, **kwargs)
        FIELDS = ["id", "database", "code", "name", "unit", "location", "reference product"]

        for database_name in sorted({o['database'] for o in self.db}):
            dp.add_persistent_vector(
                matrix="technosphere_matrix",
                name=database_name + " technosphere",
                indices_array=self.get_indices(self.technosphere(self.db, database_name)),
                data_array=self.get_data(self.technosphere(self.db, database_name)),
                flip_array=self.get_flip(self.technosphere(self.db, database_name)),
            )
            dp.add_persistent_vector(
                matrix="biosphere_matrix",
                name=database_name + " biosphere",
                indices_array=self.get_indices(self.biosphere(self.db, database_name)),
                data_array=self.get_data(self.biosphere(self.db, database_name)),
                flip_array=self.get_flip(self.biosphere(self.db, database_name)),
            )
            df = pd.DataFrame([{field: ds.get(field) 
                                for field in FIELDS} 
                               for ds in self.db
                               if ds['database'] == database_name
                              ])
            dp.add_csv_metadata(
                dataframe=df,
                valid_for=[
                    (database_name + " technosphere", "cols"),
                    (database_name + " biosphere", "cols"),
                ],
                name=database_name + " metadata"
            )
        dp.finalize_serialization()
        return dp

In [12]:
start = time()

dpw = DatapackageWriter(
    database=ndb.db,
    filesystem="remind SSP2-Base 2028.zip",
    name="remind SSP2-Base 2028",
)
dpw.assign_ids()

intermediate = time()

dp = dpw.write_datapackage()

now = time()

print("Took {} / {} seconds".format(now - start, now - intermediate))

Took 34.742751121520996 / 22.46084713935852 seconds


In [13]:
ipcc = bd.Method(('IPCC 2013', 'climate change', 'GWP 100a')).datapackage()

In [14]:
ndb.db[1000]['id']

21646

In [15]:
lca = bc.LCA({21646: 1}, data_objs=[ipcc, dp])

In [16]:
lca.lci()
lca.lcia()

In [17]:
lca.score

5.588797156831066

In [18]:
lca.technosphere_matrix

<20108x20108 sparse matrix of type '<class 'numpy.float64'>'
	with 242644 stored elements in Compressed Sparse Row format>

In [19]:
lca.biosphere_matrix

<2106x20108 sparse matrix of type '<class 'numpy.float64'>'
	with 390065 stored elements in Compressed Sparse Row format>

In [20]:
lca.characterization_matrix

<2106x2106 sparse matrix of type '<class 'numpy.float64'>'
	with 83 stored elements in Compressed Sparse Row format>

In [21]:
lca.technosphere_mm.packages

{<bw_processing.datapackage.FilteredDatapackage at 0x22d6a0d30>: [],
 <bw_processing.datapackage.FilteredDatapackage at 0x22d6a04f0>: [<matrix_utils.resource_group.ResourceGroup at 0x22d6a0640>,
  <matrix_utils.resource_group.ResourceGroup at 0x22d6a0760>,
  <matrix_utils.resource_group.ResourceGroup at 0x22d6a0190>,
  <matrix_utils.resource_group.ResourceGroup at 0x22d6a0550>,
  <matrix_utils.resource_group.ResourceGroup at 0x22d6a0910>,
  <matrix_utils.resource_group.ResourceGroup at 0x22d6a05e0>,
  <matrix_utils.resource_group.ResourceGroup at 0x22d6a0c70>,
  <matrix_utils.resource_group.ResourceGroup at 0x22d6a05b0>,
  <matrix_utils.resource_group.ResourceGroup at 0x22d6a0610>,
  <matrix_utils.resource_group.ResourceGroup at 0x22d6a06d0>,
  <matrix_utils.resource_group.ResourceGroup at 0x22c3815b0>,
  <matrix_utils.resource_group.ResourceGroup at 0x22dacd0a0>,
  <matrix_utils.resource_group.ResourceGroup at 0x22dacd100>,
  <matrix_utils.resource_group.ResourceGroup at 0x22dacd070>,