# Creating ecoinvent 3 migrations

Start by setting up a new project:

In [1]:
from brightway2 import *
projects.set_current("ecoinvent updates")



Biosphere database already present!!! No setup is needed


In [None]:
bw2setup()

In [None]:
ei = SingleOutputEcospold2Importer(
    "/Users/cmutel/Documents/LCA Documents/Ecoinvent/3.1/cutoff/datasets", 
    "3.1 cutoff"
)
ei.apply_strategies()
ei.statistics()

In [None]:
for x in ei.unlinked:
    print(x['name'])

These temporary flows can be safely dropped.

In [None]:
ei.drop_unlinked(True)

In [None]:
ei.write_database()

In [None]:
ei = SingleOutputEcospold2Importer(
    "/Users/cmutel/Documents/LCA Documents/Ecoinvent/3.2/cutoff/datasets", 
    "3.2 cutoff"
)
ei.apply_strategies()
ei.statistics()

In [None]:
ei.write_database()

In [None]:
ei = SingleOutputEcospold2Importer(
    "/Users/cmutel/Documents/LCA Documents/Ecoinvent/3.3/cutoff/datasets", 
    "3.3 cutoff"
)
ei.apply_strategies()
ei.statistics()

In [None]:
ei.write_database()

## 3.1 to 3.2

Which processes don't map exactly?

In [2]:
data_31 = {(ds['name'], ds['reference product'], ds['location']) for ds in Database("3.1 cutoff")}
data_32 = {(ds['name'], ds['reference product'], ds['location']) for ds in Database("3.2 cutoff")}

517 out of 11301 activities are not the same in 3.2.

In [3]:
len(data_31), len(data_31.difference(data_32))

(11301, 517)

Let's get a selection of the missing activities.

In [4]:
list(data_31.difference(data_32))[:100]

[('electricity production, photovoltaic, 3kWp slanted-roof installation, CdTe, laminated, integrated',
  'electricity, low voltage',
  'MX'),
 ('electricity production, natural gas, at conventional power plant',
  'electricity, high voltage',
  'AU'),
 ('concrete production, for de-icing salt contact',
  'concrete, for de-icing salt contact',
  'CH'),
 ('wheat grain, feed production, Swiss integrated production',
  'wheat grain, feed, Swiss integrated production',
  'RoW'),
 ('sawnwood production, softwood, raw, air dried',
  'sawnwood, softwood, raw, air dried',
  'RoW'),
 ('electricity production, geothermal', 'electricity, high voltage', 'RU'),
 ('electricity production, photovoltaic, 3kWp slanted-roof installation, CdTe, laminated, integrated',
  'electricity, low voltage',
  'KR'),
 ('electricity production, natural gas, at conventional power plant',
  'electricity, high voltage',
  'MY'),
 ('planing, board, softwood, air dried',
  'shaving, softwood, measured as dry mass',
  'CH'

## Official ecoinvent update list

Let's look at the official ecoinvent update Excel sheet. This is included in `bw2io`.

In [5]:
import os
import pandas as pd

In [6]:
from bw2io.data import dirpath
fp = os.path.join(dirpath, "lci", "ecoinvent 3.1-3.2.xlsx")
os.path.isfile(fp)

True

### `Deleted or replaced` activities

In [7]:
overview = pd.read_excel(fp, "overview")

In [10]:
overview['status'].unique()

array(['in both versions', 'new', 'deleted or replaced'], dtype=object)

In [11]:
overview = overview[overview['status'] == 'deleted or replaced']

In [12]:
overview

Unnamed: 0,ISIC number,ISIC class,activityName,geography,reference product,status,time period previous,time period current,new time period,Technology Level previous,Technology Level current,Technology Level updated,quantitative change,Tags current,activityName renamed,activityName previous,reference product renamed,reference product previous
208,2420,Manufacture of basic precious and other non-fe...,"aluminium production, primary, ingot",UN-EUROPE,"aluminium, primary, ingot",deleted or replaced,2012-01-01 to 2012-12-31,,,Current,,,,,,,,
217,2420,Manufacture of basic precious and other non-fe...,"aluminium production, primary, liquid, Söderberg",UN-EUROPE,"aluminium, primary, liquid",deleted or replaced,2012-01-01 to 2012-12-31,,,Current,,,,,,,,
228,2420,Manufacture of basic precious and other non-fe...,"aluminium production, primary, liquid, prebake",UN-EUROPE,"aluminium, primary, liquid",deleted or replaced,2012-01-01 to 2012-12-31,,,Current,,,,,,,,
741,2394,"Manufacture of cement, lime and plaster","cement production, alternative constituents 21...",CH,burnt shale,deleted or replaced,2005-01-01 to 2009-12-31,,,Current,,,,,,,,
743,2394,"Manufacture of cement, lime and plaster","cement production, alternative constituents 21...",Europe without Switzerland,"cement, alternative constituents 21-35%",deleted or replaced,2005-01-01 to 2009-12-31,,,Current,,,,,,,,
745,2394,"Manufacture of cement, lime and plaster","cement production, alternative constituents 21...",GLO,hard coal ash,deleted or replaced,2005-01-01 to 2009-12-31,,,Current,,,,,,,,
747,2394,"Manufacture of cement, lime and plaster","cement production, alternative constituents 6-20%",CA-QC,"cement, alternative constituents 6-20%",deleted or replaced,2005-01-01 to 2009-12-31,,,Current,,,,,,,,
750,2394,"Manufacture of cement, lime and plaster","cement production, alternative constituents 6-20%",CH,burnt shale,deleted or replaced,2005-01-01 to 2009-12-31,,,Current,,,,,,,,
751,2394,"Manufacture of cement, lime and plaster","cement production, alternative constituents 6-20%",Europe without Switzerland,"cement, alternative constituents 6-20%",deleted or replaced,2005-01-01 to 2009-12-31,,,Current,,,,,,,,
753,2394,"Manufacture of cement, lime and plaster","cement production, alternative constituents 6-20%",GLO,"cement, alternative constituents 6-20%",deleted or replaced,2005-01-01 to 2009-12-31,,,Current,,,,,,,,


How many of our 517 missing activities were `deleted or replaced`?

In [21]:
changed_31 = set(zip(overview['activityName'], overview['reference product'], overview['geography']))
517 - len(data_31.difference(data_32).difference(changed_31))

275

### Changed reference products

Find cases where the reference or byproduct name was changed. Byproducts need to be included because this list if for the undefined datasets - the byproducts can become reference product in new activities, depending on the system model.

In [25]:
changes = pd.read_excel(fp, "quantitative changes")

There are four types of changes listed here:

In [49]:
changes['change'].unique()

array(['updated', 'renaming only', 'added', 'deleted'], dtype=object)

We don't care about `added` activities, nor do we care about `updated` activities. Updated activities have changes in the amount, production volume, or price of an exchange, but we are trying to figure out how to update links from 3.1 to 3.2 - not update 3.1 itself. So we don't need to care about these changes.

Find changes where:

* The change was to a reference or byproduct
* The change was a renaming change

In [51]:
mask = (
    ((changes['group'] == 'ReferenceProduct') | (changes['group'] == 'Byproduct')) *
    (changes['change'] == 'renaming only')
)
mask.sum()



183

In [48]:
changes[mask]

Unnamed: 0,ISIC number,ISIC class,activityName,geography,reference product,field,change,group,exchange name,compartment,...,value current,value current / value previous,unit,activityName previous,activityName renamed,exchange name previous,exchange name renamed,activityLink activityName previous,activityLink activityName renamed,activityLink geography previous
480,1610,Sawmilling and planing of wood,"sawnwood production, softwood, dried (u=10%), ...",RER,"sawnwood, softwood, dried (u=10%), planed",,renaming only,ReferenceProduct,"sawnwood, softwood, dried (u=10%), planed",,...,151,,EUR2005,"sawnwood production, softwood, kiln dried, planed",yes,"sawnwood, softwood, kiln dried, planed",yes,,no,
481,1610,Sawmilling and planing of wood,"sawnwood production, softwood, dried (u=10%), ...",RER,"sawnwood, softwood, dried (u=10%), planed",,renaming only,ReferenceProduct,"sawnwood, softwood, dried (u=10%), planed",,...,1,,m3,"sawnwood production, softwood, kiln dried, planed",yes,"sawnwood, softwood, kiln dried, planed",yes,,no,
483,1610,Sawmilling and planing of wood,"sawnwood production, softwood, dried (u=10%), ...",RER,"sawnwood, softwood, dried (u=10%), planed",,renaming only,ReferenceProduct,"sawnwood, softwood, dried (u=10%), planed",,...,23103500,,m3,"sawnwood production, softwood, kiln dried, planed",yes,"sawnwood, softwood, kiln dried, planed",yes,,no,
633,1610,Sawmilling and planing of wood,"lath, softwood, raw, kiln drying to u=10%",CH,"sawnwood, lath, softwood, raw, dried (u=10%)",,renaming only,ReferenceProduct,"sawnwood, lath, softwood, raw, dried (u=10%)",,...,1,,m3,"lath, softwood, raw, kiln drying",yes,"sawnwood, lath, softwood, raw, kiln dried",yes,,no,
1914,1610,Sawmilling and planing of wood,"planing, board, softwood, u=10%",CH,"sawnwood, board, softwood, dried (u=10%), planed",,renaming only,ReferenceProduct,"sawnwood, board, softwood, dried (u=10%), planed",,...,1,,m3,"planing, board, softwood, kiln dried",yes,"sawnwood, board, softwood, kiln dried, planed",yes,,no,
3111,1610,Sawmilling and planing of wood,"sawnwood production, hardwood, dried (u=10%), ...",GLO,"sawnwood, hardwood, dried (u=10%), planed",,renaming only,ReferenceProduct,"sawnwood, hardwood, dried (u=10%), planed",,...,1,,m3,"sawnwood production, hardwood, kiln dried, planed",yes,"sawnwood, hardwood, kiln dried, planed",yes,,no,
3113,1610,Sawmilling and planing of wood,"sawnwood production, hardwood, dried (u=10%), ...",GLO,"sawnwood, hardwood, dried (u=10%), planed",,renaming only,ReferenceProduct,"sawnwood, hardwood, dried (u=10%), planed",,...,2790000,,m3,"sawnwood production, hardwood, kiln dried, planed",yes,"sawnwood, hardwood, kiln dried, planed",yes,,no,
3115,1610,Sawmilling and planing of wood,"sawnwood production, hardwood, dried (u=10%), ...",GLO,"sawnwood, hardwood, dried (u=10%), planed",,renaming only,ReferenceProduct,"sawnwood, hardwood, dried (u=10%), planed",,...,265,,EUR2005,"sawnwood production, hardwood, kiln dried, planed",yes,"sawnwood, hardwood, kiln dried, planed",yes,,no,
3154,1610,Sawmilling and planing of wood,"market for shavings, hardwood, measured as dry...",GLO,"shavings, hardwood, measured as dry mass",,renaming only,ReferenceProduct,"shavings, hardwood, measured as dry mass",,...,1,,kg,"market for shaving, hardwood, measured as dry ...",yes,"shaving, hardwood, measured as dry mass",yes,,no,
3155,1610,Sawmilling and planing of wood,"market for shavings, hardwood, measured as dry...",GLO,"shavings, hardwood, measured as dry mass",,renaming only,ReferenceProduct,"shavings, hardwood, measured as dry mass",,...,0.112329,,EUR2005,"market for shaving, hardwood, measured as dry ...",yes,"shaving, hardwood, measured as dry mass",yes,,no,


We need to check and make sure 1) all the old activity data is included in the 3.1 missing list, and 2) that the new activity data is included in the difference between 3.2 and 3.1.

In [58]:
import math

All old rows are in the set of missing 3.1 activities.

In [94]:
renamed_activities_old = set(zip(
        changes[mask]['activityName'], 
        changes[mask]['activityName previous'], 
        changes[mask]['exchange name previous'], 
        changes[mask]['geography'],
        changes[mask]['exchange name']
))

find_in = data_31.difference(data_32)
found_renamed_31 = {}

for row in renamed_activities_old:
    if not isinstance(row[1], str) and math.isnan(row[1]):
        old_name = row[0]
    else:
        old_name = row[1]
    new_name, old_rp, new_rp, location = row[0], row[2], row[4], row[3]

    if (old_name, old_rp, location) in find_in:
        found_renamed_31[(old_name, old_rp, location)] = (new_name, new_rp, location)
    elif location == 'GLO' and (old_name, old_rp, "RoW") in find_in:
        found_renamed_31[(old_name, old_rp, "RoW")] = (new_name, new_rp, "RoW")
    else:
        print(row)

In [95]:
len(found_renamed_31)

112

Check to make sure we can find the new activities in 3.2:

In [64]:
check_row = set(zip(
    changes[mask]['activityName'], 
    changes[mask]['exchange name'], 
    changes[mask]['geography'])
).difference(data_32)

assert {x[2] for x in check_row} == {'GLO'}
assert not {(x[0], x[1], 'RoW') for x in check_row}.difference(data_32)

### Current status

There are 11031 activities in ecoinvent 3.1 cutoff to upgrade to 3.2. Of these:

* 10784 are no problem - we can find the same activity name, reference product, and location
* 517 need more attention
* Of these 517, 112 were simple renames

Let's look at some of the remaining problematic activities:

In [96]:
list(data_31.difference(data_32).difference(set(found_renamed_31)))[:100]

[('electricity production, photovoltaic, 3kWp slanted-roof installation, CdTe, laminated, integrated',
  'electricity, low voltage',
  'MX'),
 ('electricity production, natural gas, at conventional power plant',
  'electricity, high voltage',
  'AU'),
 ('concrete production, for de-icing salt contact',
  'concrete, for de-icing salt contact',
  'CH'),
 ('wheat grain, feed production, Swiss integrated production',
  'wheat grain, feed, Swiss integrated production',
  'RoW'),
 ('electricity production, geothermal', 'electricity, high voltage', 'RU'),
 ('electricity production, photovoltaic, 3kWp slanted-roof installation, CdTe, laminated, integrated',
  'electricity, low voltage',
  'KR'),
 ('electricity production, natural gas, at conventional power plant',
  'electricity, high voltage',
  'MY'),
 ('planing, board, softwood, air dried',
  'shaving, softwood, measured as dry mass',
  'CH'),
 ('electricity production, photovoltaic, 3kWp slanted-roof installation, ribbon-Si, panel, mounted

## Write changes

In [92]:
import csv

In [97]:
with open('ecoinvent 31-32.csv', 'w') as csvfile:
    writer = csv.writer(csvfile)
    
    writer.writerow(["Old activity name", "Reference product", "Location",
                     "New activity name", "Reference product", "Location"])
    
    data = []
    
    for k, v in found_renamed_31.items():
        data.append(list(k) + list(v))
        
    for elem in data_31.difference(data_32).difference(set(found_renamed_31)):
        data.append(list(elem))
        
    data.sort()
    
    for line in data:
        writer.writerow(line)

In [98]:
with open('ecoinvent 32.csv', 'w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Activity name", "Reference product", "Location"])
    for line in sorted(data_32):
        writer.writerow(line)