# Build characterization matrices <br>
- prepared regionalized CF
- non-regionalized CF from ecoinvent LCIA

In [1]:
import pickle
import collections
import numpy as np
import scipy.sparse as sp
import pandas as pd
import brightway2 as bw
import hashlib

In [2]:
!uname -a

Linux ifu-esd-srv-4 4.15.0-213-generic #224-Ubuntu SMP Mon Jun 19 13:30:12 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux


In [3]:
!ls ../data/final_matrix/

biosphere_index.p		 CF_pslglo_glo_eco.p
biosphere_matrix_csc_crop_rev.p  CF_pslglo_glo.p
biosphere_matrix_csc.p		 CF_psl_glo.p
CF_aware_glo_eco.p		 CF_pslglo.p
CF_aware_glo_ei.p		 CF_psl.p
CF_aware_glo.p			 CF_recipe.p
CF_aware.p			 full_index.p
CF_aware_reg_144co_mat.p	 production_vector.p
CF_ipcc.p			 technosphere_matrix_local_mar_csc_abs.p
CF_psl_glo_ei.p			 technosphere_matrix.p


In [4]:
with open('../data/final_matrix/full_index.p', 'rb') as i:
    full_index = pickle.load(i)
with open('../data/final_matrix/biosphere_index.p', 'rb') as i:
    bio_index = pickle.load(i)

In [5]:
# regionalized CF.
CF_aware = pd.read_csv('parse_CF_data/prepared_characterization_factors/prepared_aware_CF.csv', index_col=0,keep_default_na=False)
CF_lu_op = pd.read_csv('parse_CF_data/prepared_characterization_factors/prepared_land_use_occupation_CF.csv', index_col=0,keep_default_na=False)
CF_lu_tf = pd.read_csv('parse_CF_data/prepared_characterization_factors/prepared_land_use_transformation_CF.csv', index_col=0,keep_default_na=False)

In [6]:
%%time
bw.projects.set_current('regeco')
db = bw.Database('ecoinvent 3.7.1_cutoff_no_marketgroups')

CPU times: user 38 ms, sys: 18 ms, total: 56.1 ms
Wall time: 54.6 ms


## IPCC

In [7]:
[m for m in bw.methods if m[0].lower().startswith('ipcc 2013')]

[('IPCC 2013 no LT', 'climate change', 'GTP 100a'),
 ('IPCC 2013 no LT', 'climate change', 'GTP 20a'),
 ('IPCC 2013 no LT', 'climate change', 'GWP 100a'),
 ('IPCC 2013 no LT', 'climate change', 'GWP 20a'),
 ('IPCC 2013', 'climate change', 'GTP 100a'),
 ('IPCC 2013', 'climate change', 'GTP 20a'),
 ('IPCC 2013', 'climate change', 'GWP 100a'),
 ('IPCC 2013', 'climate change', 'GWP 20a')]

In [8]:
ipcc = ('IPCC 2013', 'climate change', 'GWP 100a')

In [9]:
%%time
lca = bw.LCA({db.random():1}, method = ipcc)
lca.lci()
lca.lcia()

CPU times: user 10.9 s, sys: 852 ms, total: 11.8 s
Wall time: 1.58 s


Current study: bio_index in biosphere matrix and inventory matrix is sorted by bio codes<br> 
Brightway: biosphere, inventory, and characterization matrix are ordered as in lca.biosphere_dict. <br> therefore when we build CF matrix, we need to reorder characterization matrix.

In [10]:
lca.biosphere_dict

{('biosphere3', '38a622c6-f086-4763-a952-7c6b3b1c42ba'): 0,
 ('biosphere3', '541a823c-0aad-4dc4-9123-d4af4647d942'): 1,
 ('biosphere3', '8cbaa905-41b0-4327-8403-bf1c8eb25429'): 2,
 ('biosphere3', 'f681eb3c-854a-4f78-bcfe-76dfbcf9df3c'): 3,
 ('biosphere3', 'a0fec60d-3f74-48bf-a2d2-58c30fc13e53'): 4,
 ('biosphere3', 'd5649872-502b-4d23-b882-26d74a9fc4b4'): 5,
 ('biosphere3', 'e2d860e3-1038-4386-a5f1-25ad75d18bbd'): 6,
 ('biosphere3', '79a87f98-0b00-4e7e-86f8-70667307f696'): 7,
 ('biosphere3', '90d374f1-b739-45b2-a734-d6bdd8c8dc9c'): 8,
 ('biosphere3', 'cbf58f64-9286-406f-8dcb-ef1d81eec980'): 9,
 ('biosphere3', 'c6310f81-3859-4c5c-990a-4662ba179693'): 10,
 ('biosphere3', '9fbec2f4-6b42-4e63-9573-14e04befe20f'): 11,
 ('biosphere3', '3166d308-cc57-4d31-b42e-63c7eada821f'): 12,
 ('biosphere3', '99c07b66-c039-4896-866d-06abdaa9d46a'): 13,
 ('biosphere3', 'cdcdbfc9-3d5f-4e78-8033-8dbecbb2cfec'): 14,
 ('biosphere3', '5ad58fcc-e9ba-4155-a3c9-e4ffb3065a6f'): 15,
 ('biosphere3', 'eb23cf3a-b866-458

In [11]:
len(bio_index) == len(lca.biosphere_dict)#('biosphere3', '38a622c6-f086-4763-a952-7c6b3b1c42ba'): 0,

True

In [12]:
biosphere_dict_list = sorted([(k[1], v) for k,v in lca.biosphere_dict.items()], key=lambda x:x[0])

In [13]:
[e[0] for e in biosphere_dict_list] == bio_index

True

In [14]:
reordered_index = np.array([e[1] for e in biosphere_dict_list])
reordered_index.shape

(2102,)

In [15]:
ipcc_cf_mat = sp.diags(lca.characterization_matrix.diagonal()[reordered_index]).tocsr()
ipcc_cf_mat.shape

(2102, 2102)

## ReCiPe

In [16]:
[m for m in bw.methods if m[0].lower().startswith('recipe endpoint')] #all with '(obsolete)'

[('ReCiPe Endpoint (E,A) w/o LT',
  'ecosystem quality w/o LT',
  'freshwater ecotoxicity w/o LT'),
 ('ReCiPe Endpoint (E,A) w/o LT',
  'ecosystem quality w/o LT',
  'freshwater eutrophication w/o LT'),
 ('ReCiPe Endpoint (E,A) w/o LT',
  'ecosystem quality w/o LT',
  'urban land occupation w/o LT'),
 ('ReCiPe Endpoint (E,A) w/o LT',
  'ecosystem quality w/o LT',
  'climate change, ecosystems w/o LT'),
 ('ReCiPe Endpoint (E,A) w/o LT',
  'ecosystem quality w/o LT',
  'terrestrial ecotoxicity w/o LT'),
 ('ReCiPe Endpoint (E,A) w/o LT',
  'ecosystem quality w/o LT',
  'agricultural land occupation w/o LT'),
 ('ReCiPe Endpoint (E,A) w/o LT',
  'ecosystem quality w/o LT',
  'marine ecotoxicity w/o LT'),
 ('ReCiPe Endpoint (E,A) w/o LT',
  'ecosystem quality w/o LT',
  'natural land transformation w/o LT'),
 ('ReCiPe Endpoint (E,A) w/o LT',
  'ecosystem quality w/o LT',
  'terrestrial acidification w/o LT'),
 ('ReCiPe Endpoint (E,A) w/o LT',
  'human health w/o LT',
  'ionising radiation w/

In [17]:
recipe = ('ReCiPe Endpoint (H,A)', 'total', 'total')

In [18]:
lca.switch_method(recipe)
lca.redo_lcia()
lca.characterization_matrix

<2102x2102 sparse matrix of type '<class 'numpy.float64'>'
	with 934 stored elements in Compressed Sparse Row format>

In [19]:
new_biosphere_dict_list = sorted([(k[1], v) for k,v in lca.biosphere_dict.items()], key=lambda x:x[0])
biosphere_dict_list == new_biosphere_dict_list

True

In [20]:
recipe_cf_mat = sp.diags(lca.characterization_matrix.diagonal()[reordered_index]).tocsr()
recipe_cf_mat

<2102x2102 sparse matrix of type '<class 'numpy.float64'>'
	with 931 stored elements in Compressed Sparse Row format>

## AWARE

In [21]:
CF_aware.drop("World average",axis=0,inplace=True)

In [22]:
CF_aware.shape

(266, 11)

In [23]:
CF_aware.head()

Unnamed: 0,06d4812b-6937-4d64-8517-b69aabce3648,1acb026e-9de6-48fe-9e0d-be4d24125bbc,2404b41a-2eed-4e9d-8ab6-783946fdf5d6,51254820-3456-4373-b7b4-056cf7b16e01,67c40aae-d403-464d-9649-c12695e43ad8,831f249e-53f2-49cf-a93c-7cee105f048e,8bdedc25-af46-4c46-9b3a-670d0c177d8f,8c1494a5-4987-4715-aa2d-1908c495f4eb,8c75e7ab-8ab8-41e4-b394-c166ff5b050d,db4566b1-bd88-427d-92da-2d25879063b9,fc1c42ce-a759-49fa-b987-f1ec5e503db1
AE,-18.560787,18.560787,-18.560787,-18.560787,18.560787,18.560787,-18.560787,18.560787,18.560787,-18.560787,18.560787
AL,-23.118637,23.118637,-23.118637,-23.118637,23.118637,23.118637,-23.118637,23.118637,23.118637,-23.118637,23.118637
AM,-85.447551,85.447551,-85.447551,-85.447551,85.447551,85.447551,-85.447551,85.447551,85.447551,-85.447551,85.447551
AO,-7.986448,7.986448,-7.986448,-7.986448,7.986448,7.986448,-7.986448,7.986448,7.986448,-7.986448,7.986448
AR,-47.101654,47.101654,-47.101654,-47.101654,47.101654,47.101654,-47.101654,47.101654,47.101654,-47.101654,47.101654


In [24]:
aware_region_cf_dict = {}
not_available = set() #bio_flow in aware but not used by our bio_index
for region, cfs in CF_aware.iterrows():
    cf_array = np.zeros(len(bio_index))
    for i, val in cfs.iteritems():
        if i in bio_index:
            cf_array[bio_index.index(i)] = val
        else:
            not_available.add(i)
    aware_region_cf_dict[region] = cf_array
not_available    

{'06d4812b-6937-4d64-8517-b69aabce3648',
 '8bdedc25-af46-4c46-9b3a-670d0c177d8f'}

In [25]:
bio = bw.Database('biosphere3').load()
for na in not_available:
    print(bio.get(('biosphere3', na))['name'], '|', bio.get(('biosphere3', na))['categories'])

Water | ('water', 'ground-, long-term')
Fresh water (obsolete) | ('water', 'surface water')


In [26]:
#Stack arrays in sequence vertically (row wise). T. (len(bio_index), len(full_index) regions)
%time aware_cf_mat = sp.csc_matrix(np.vstack(aware_region_cf_dict[full_i[0]] for full_i in full_index)).T
aware_cf_mat

  """Entry point for launching an IPython kernel.


CPU times: user 24 s, sys: 1.06 s, total: 25.1 s
Wall time: 25.1 s


<2102x335099 sparse matrix of type '<class 'numpy.float64'>'
	with 3015891 stored elements in Compressed Sparse Row format>

In [27]:
np.array_equal(np.flatnonzero(np.array(aware_cf_mat.sum(axis=1))), np.flatnonzero(cf_array))

True

construct a aware_regCF_mat for 144 countries, in the shape of 144*144.

In [31]:
co_l = sorted(list({i[0][:2] if i[0] != 'Canada without Quebec' else 'CA'  for i in full_index}))
CF_aware_co_dict = abs(CF_aware[CF_aware.columns[0]])
CF_aware_reg_144co = [CF_aware_co_dict[c] for c in co_l]
CF_aware_reg_144co_mat = np.diag(CF_aware_reg_144co)
CF_aware_reg_144co_mat

array([[18.56078654,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        , 23.1186366 ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        , 85.44755127, ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.        ,  0.        ,  0.        , ..., 36.35202059,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         5.57640313,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  4.96560905]])

In [33]:
CF_aware_co_dict['CY']

74.29739327371473

# PSLglobal

In [28]:
print(CF_lu_op.shape)
print(CF_lu_tf.shape)
CF_lu_op.head()

(267, 60)
(267, 120)


Unnamed: 0,061259d7-7bcc-4298-af3a-63d084191988,062a6faf-b1a5-4a6a-aa02-47ae3ec566a8,0f923637-e322-44d8-9ff2-9c765d75e253,12c7671c-e4aa-46c6-93c5-b6f9ac1c453b,18636f13-f552-4136-a353-3b5a8e5f87d1,1896b498-8d13-4f58-8c17-21fe57740158,190d9910-5d04-4c97-abea-3b39682b7ed6,19f84b2e-e6ff-4351-ba3a-8b650fc20d14,1b0a8570-eab4-46c2-9b67-c9b918e75676,1eaa9ea4-40b8-414a-b198-5626400372e1,...,db1e4137-27a3-4b89-99af-42a18271c144,e063ee9c-9850-42b5-b01e-4cc9b5ad7152,e489cce4-a80f-417d-9ae6-9fc14cc7dd49,e5824519-f945-4b2c-a81b-677290021b8a,e780232c-898f-4606-8d21-f8589801ebe6,e9007a6f-7244-44d4-a561-91ae1b6c6cfc,f318deb8-ac36-47c0-bb00-e3022b583c7e,f8ba8266-7f00-47b8-9ef0-bc81a8728d09,fdb1b2d0-f537-401e-b845-1d93da512174,fe9c3a98-a6d2-452d-a9a4-a13e64f1b95b
AE,0.0,3.022179e-16,0.0,2.262251e-16,1.652649e-16,1.652649e-16,3.022179e-16,1.995576e-16,2.262251e-16,3.022179e-16,...,0.0,2.262251e-16,2.262251e-16,0.0,1.995576e-16,1.652649e-16,1.652649e-16,0.0,2.262251e-16,3.022179e-16
AL,0.0,4.596609e-15,0.0,5.976012e-15,3.409073e-15,3.409073e-15,4.596609e-15,1.599513e-15,5.976012e-15,4.596609e-15,...,0.0,5.976012e-15,5.976012e-15,0.0,1.599513e-15,3.409073e-15,3.409073e-15,0.0,5.976012e-15,4.596609e-15
AM,0.0,3.295205e-15,0.0,3.163179e-15,1.904295e-15,1.904295e-15,3.295205e-15,1.292189e-15,3.163179e-15,3.295205e-15,...,0.0,3.163179e-15,3.163179e-15,0.0,1.292189e-15,1.904295e-15,1.904295e-15,0.0,3.163179e-15,3.295205e-15
AO,0.0,1.359071e-15,0.0,8.39135e-16,6.767554e-16,6.767554e-16,1.359071e-15,6.804042e-16,8.39135e-16,1.359071e-15,...,0.0,8.39135e-16,8.39135e-16,0.0,6.804042e-16,6.767554e-16,6.767554e-16,0.0,8.39135e-16,1.359071e-15
AR,0.0,1.538893e-15,0.0,1.075026e-15,6.987089e-16,6.987089e-16,1.538893e-15,7.311084e-16,1.075026e-15,1.538893e-15,...,0.0,1.075026e-15,1.075026e-15,0.0,7.311084e-16,6.987089e-16,6.987089e-16,0.0,1.075026e-15,1.538893e-15


In [29]:
CF_lu = pd.concat([CF_lu_op,CF_lu_tf],axis=1)
CF_lu.to_csv(f'parse_CF_data/prepared_characterization_factors/prepared_land_use_CF.csv')
CF_lu.drop("World average",axis=0,inplace=True)
CF_lu.shape

(266, 180)

In [30]:
pslglo_region_cf_dict = {}
not_available = set()
for region, cfs in CF_lu.iterrows():
    cf_array = np.zeros(len(bio_index))
    for i, val in cfs.iteritems():
        if i in bio_index:
            cf_array[bio_index.index(i)] = val
        else:
            not_available.add(i)
    pslglo_region_cf_dict[region] = cf_array
len(not_available)

50

In [31]:
for na in not_available:
    print(bio.get(('biosphere3', na))['name'])

Transformation, to bare area (non-use)
Transformation, from seabed, drilling and mining
Occupation, arable, conventional tillage (obsolete)
Transformation, to annual crop, flooded crop
Occupation, seabed, natural (non-use)
Transformation, from wetland, coastal (non-use)
Transformation, from annual crop, irrigated, extensive
Transformation, from urban, discontinuously built
Occupation, snow and ice (non-use)
Transformation, from traffic area, rail network
Transformation, to urban, green area
Occupation, field margin/hedgerow
Occupation, arable, conservation tillage (obsolete)
Transformation, to snow and ice (non-use)
Transformation, from dump site
Transformation, from annual crop, flooded crop
Occupation, annual crop, irrigated, extensive
Occupation, lake, natural (non-use)
Occupation, wetland, inland (non-use)
Occupation, permanent crop, non-irrigated, extensive
Transformation, to field margin/hedgerow
Occupation, forest, secondary (non-use)
Transformation, to wetland, coastal (non-use

In [32]:
%time pslglo_cf_mat = sp.csc_matrix(np.vstack(pslglo_region_cf_dict[full_i[0]] for full_i in full_index)).T

  """Entry point for launching an IPython kernel.


CPU times: user 7.51 s, sys: 4.03 s, total: 11.5 s
Wall time: 11.5 s


In [33]:
pslglo_cf_mat

<2102x335099 sparse matrix of type '<class 'numpy.float64'>'
	with 35184159 stored elements in Compressed Sparse Row format>

In [34]:
np.array_equal(np.flatnonzero(np.array(pslglo_cf_mat.sum(axis=1))), np.flatnonzero(cf_array))

True

# Save

In [35]:
with open('../data/final_matrix/CF_ipcc.p', 'wb') as o:
    pickle.dump(ipcc_cf_mat, o)

In [36]:
with open('../data/final_matrix/CF_recipe.p', 'wb') as o:
    pickle.dump(recipe_cf_mat, o)

In [37]:
with open('../data/final_matrix/CF_aware.p', 'wb') as o:
    pickle.dump(aware_cf_mat, o)

In [38]:
pslglo_cf_mat.data.nbytes*1e-6

281.473272

In [39]:
with open('../data/final_matrix/CF_pslglo.p', 'wb') as o:
    pickle.dump(pslglo_cf_mat, o)

In [30]:
with open('../data/final_matrix/CF_aware_reg_144co_mat.p', 'wb') as o:
    pickle.dump(CF_aware_reg_144co_mat, o)

In [28]:
!ls -lh ../data/final_matrix

total 3.1G
-rw-r--r-- 1 sipeng2 sipeng2  94K Oct 24 12:31 biosphere_index.p
-rwxr--r-- 1 sipeng2 sipeng2  64M Dec 21  2021 biosphere_matrix_csc_crop_rev.p
-rw-rw-r-- 1 sipeng2 sipeng2  64M Oct 24 12:31 biosphere_matrix_csc.p
-rw-r--r-- 1 sipeng2 sipeng2 8.7K Oct 24 12:33 CF_aware_glo_eco.p
-rwxr--r-- 1 sipeng2 sipeng2 8.7K Dec 21  2021 CF_aware_glo_ei.p
-rw-r--r-- 1 sipeng2 sipeng2 8.7K Oct 24 12:32 CF_aware_glo.p
-rw-r--r-- 1 sipeng2 sipeng2  35M Oct 24 12:32 CF_aware.p
-rw-rw-r-- 1 sipeng2 sipeng2 163K Nov  9 18:44 CF_aware_reg_144co_mat.p
-rw-r--r-- 1 sipeng2 sipeng2 9.6K Oct 24 12:32 CF_ipcc.p
-rwxr--r-- 1 sipeng2 sipeng2 9.9K Dec 21  2021 CF_psl_glo_ei.p
-rw-r--r-- 1 sipeng2 sipeng2 9.9K Oct 24 12:33 CF_pslglo_glo_eco.p
-rw-r--r-- 1 sipeng2 sipeng2 9.9K Oct 24 12:33 CF_pslglo_glo.p
-rwxr--r-- 1 sipeng2 sipeng2 9.9K Dec 21  2021 CF_psl_glo.p
-rw-r--r-- 1 sipeng2 sipeng2 403M Oct 24 12:32 CF_pslglo.p
-rwxr--r-- 1 sipeng2 sipeng2 413M Dec 22  2021 CF_psl.p
-rw-r--r-- 