# Build CF matirx of AWARE and IPCC

In [1]:
import os
import json
import pandas as pd
import brightway2 as bw
import pickle
import numpy as np
import scipy.sparse as sp

### 1. Parse CFs of AWARE
Based on the raw file of AWARE CF, get CF for each final location and water-related elementary flow in new ecoinvent

#### 1.2 Load raw file

In [18]:
path_to_file = os.path.abspath('../../Data/cfs/AWARE_country_regions_world_april2016_modified_region_name.xlsx')
%time excel_file = pd.ExcelFile(path_to_file,engine='openpyxl')

CPU times: user 28.5 ms, sys: 288 µs, total: 28.8 ms
Wall time: 27.2 ms


In [19]:
excel_file.sheet_names

['AWARE', 'AWARE monthly', 'Spatio-Temporal Variability']

In [20]:
aware = pd.read_excel(excel_file, sheet_name='AWARE', skiprows=2, usecols='A:D')
aware.dropna(axis=0, how='all', inplace=True) #drop three rows where all values are na (blank)
aware.fillna(axis=1, method='ffill', inplace=True) #propagate non-null values forward, column-wise 
aware_unknown = aware[['COUNTRY', 'UNKNOWN']]
aware_unknown = aware_unknown[aware_unknown.applymap(pd.np.isreal)['UNKNOWN']] # remove those with text
aware_unknown.set_index('COUNTRY', inplace=True)
aware_cf = aware_unknown['UNKNOWN']
len(aware_cf)

  """


227

#### 1.2 Get proper CF for each final location in new ecoinvent
final_loc in new ecoinvent -> country_code -> country_name -> row in CF table

In [28]:
with open('../../Data/tech_vector/final_loc_set.p', 'rb') as i:
    final_loc_set = pickle.load(i)
    
eco_geo = pd.read_excel (r'../../Data/ecoinvent_database/eiv3.7_geographies_names_coordinates_shortcuts_overlaps.xlsx',
                         sheet_name='list of geographies',keep_default_na=False, engine='openpyxl')
shortcut_eco_loc = eco_geo.set_index('shortname')['name'].to_dict()

In [30]:
aware_final_loc_df = pd.DataFrame(columns=aware_unknown.columns.tolist())
df = aware_unknown

#add missing CF: South Sudan -> RAF. Kosovo -> Europe. Curaçao -> RLA. Hongkong, Taiwan -> RAS
df.loc['Kosovo'] = df.loc['RER'] 
df.loc['South Sudan'] = df.loc['RAF'] 
df.loc['Curaçao'] = df.loc['RLA'] 
df.loc['Hong Kong'] = df.loc['RAS'] 
df.loc['Taiwan, Province of China'] = df.loc['RAS']  

# final_loc -> co_code -> co_name -> CF
for loc in final_loc_set:
    if loc=='Canada without Quebec':
        co_code = 'CA'
    else:
        co_code = loc[:2]
    co = shortcut_eco_loc[co_code]
    aware_final_loc_df.loc[loc]=df.loc[co]
aware_final_loc_df.loc['World average']=df.loc['World, UNKNOWN']

aware_final_loc_df = aware_final_loc_df.sort_index() #by index (final loc)
aware_final_loc_df.to_csv(f'../../Data/cfs/aware_CF_with_final_location.csv')

#### 1.3 Change signs to get correct CFs for water-related elementary flows in new ecoinvent
elementary(biosphere) flow code -> elementary flow name -> column in CF table

In [31]:
bw.projects.set_current('regeco')
bio = bw.Database('biosphere3').load()

In [32]:
def load_json(path):
    with open(path, 'r') as i:
        json_dict = json.load(i)
    return json_dict

In [33]:
aware_flow_factors = load_json('../../Data/cfs/wateruse_elementary_flows_factors.json')
aware_flow_factors = {tuple(k.split('|')):v for k,v in aware_flow_factors.items()}
aware_flow_factors

{('Fresh water (obsolete)', "('water', 'surface water')"): -1.0,
 ('Water, cooling, unspecified natural origin',
  "('natural resource', 'in water')"): 1.0,
 ('Water, lake', "('natural resource', 'in water')"): 1.0,
 ('Water, river', "('natural resource', 'in water')"): 1.0,
 ('Water, turbine use, unspecified natural origin',
  "('natural resource', 'in water')"): 1.0,
 ('Water, unspecified natural origin',
  "('natural resource', 'in water')"): 1.0,
 ('Water, well, in ground', "('natural resource', 'in water')"): 1.0,
 ('Water', "('water', 'ground-')"): -1.0,
 ('Water', "('water', 'ground-, long-term')"): -1.0,
 ('Water', "('water', 'surface water')"): -1.0,
 ('Water', "('water',)"): -1.0}

In [34]:
code_to_name = {b['code']:(b['name'], str(b['categories'])) for b in bio.values() if 
                (b['name'], str(b['categories'])) in aware_flow_factors}

In [35]:
aware_CF_final_loc_biocode_df = pd.DataFrame(index = aware_final_loc_df.index.tolist())
for code,name in code_to_name.items():
    aware_CF_final_loc_biocode_df[code] = aware_final_loc_df['UNKNOWN']*aware_flow_factors[name]
cols = aware_CF_final_loc_biocode_df.columns.tolist()    
cols.sort()
aware_CF_final_loc_biocode_df = aware_CF_final_loc_biocode_df[cols]
aware_CF_final_loc_biocode_df.to_csv(f'../../Data/cfs/prepared_aware_CF.csv')

## 2. Build CF matrix of AWARE with global CFs for new ecoinvent

In [36]:
with open('../../Data/final_matrix/biosphere_index.p', 'rb') as i:
    bio_index = pickle.load(i)

In [37]:
aware_cfs = pd.read_csv('../../Data/cfs/prepared_aware_CF.csv', index_col=0,keep_default_na=False)
aware_cf_dict = aware_cfs.loc['World average'].T.to_dict()
len(aware_cf_dict)

11

In [38]:
aware_cf_array = np.array([aware_cf_dict.get(i,0) for i in bio_index])
aware_cf_mat = sp.diags(aware_cf_array).tocsr()
aware_cf_mat

<2102x2102 sparse matrix of type '<class 'numpy.float64'>'
	with 9 stored elements in Compressed Sparse Row format>

In [39]:
with open('../../Data/final_matrix/CF_aware_glo.p', 'wb') as o:
    pickle.dump(aware_cf_mat, o)

## 3. Build CF matrix of AWARE with regional CFs for new ecoinvent

In [40]:
with open('../../Data/final_matrix/full_index.p', 'rb') as i:
    full_index = pickle.load(i)

In [41]:
CF_aware = pd.read_csv('../../Data/cfs/prepared_aware_CF.csv', index_col=0,keep_default_na=False)
CF_aware.drop("World average",axis=0,inplace=True)

In [42]:
aware_region_cf_dict = {}
not_available = set() #bio_flow in aware but not used by our bio_index
for region, cfs in CF_aware.iterrows():
    cf_array = np.zeros(len(bio_index))
    for i, val in cfs.iteritems():
        if i in bio_index:
            cf_array[bio_index.index(i)] = val
        else:
            not_available.add(i)
    aware_region_cf_dict[region] = cf_array

for na in not_available:
    print(bio.get(('biosphere3', na))['name'], '|', bio.get(('biosphere3', na))['categories'])

Fresh water (obsolete) | ('water', 'surface water')
Water | ('water', 'ground-, long-term')


In [43]:
#Stack arrays in sequence vertically (row wise). T. (len(bio_index), len(full_index) regions)
%time aware_cf_mat = sp.csc_matrix(np.vstack(aware_region_cf_dict[full_i[0]] for full_i in full_index)).T
aware_cf_mat

  """Entry point for launching an IPython kernel.


CPU times: user 5.34 s, sys: 1.02 s, total: 6.36 s
Wall time: 6.36 s


<2102x335099 sparse matrix of type '<class 'numpy.float64'>'
	with 3015891 stored elements in Compressed Sparse Row format>

In [None]:
with open('../../Data/final_matrix/CF_aware.p', 'wb') as o:
    pickle.dump(aware_cf_mat, o)

## 4. Build CF matrix of AWARE with global CFs for ecoinvent

In [50]:
db = bw.Database('ecoinvent 3.7.1_cutoff_no_marketgroups')
lca = bw.LCA({db.random(): 1}, bw.methods.random())
lca.lci()

In [51]:
aware_cfs = pd.read_csv('../../Data/cfs/prepared_aware_CF.csv', index_col=0,keep_default_na=False)
aware_cf_dict = aware_cfs.loc['World average'].T.to_dict()

In [52]:
aware_cf_arr_eco = np.array([aware_cf_dict.get(key[1],0) for key, pos in lca.biosphere_dict.items()])
aware_cf_mat_eco = sp.diags(aware_cf_arr_eco).tocsr()
aware_cf_mat_eco

<2102x2102 sparse matrix of type '<class 'numpy.float64'>'
	with 9 stored elements in Compressed Sparse Row format>

In [None]:
with open('../../Data/final_matrix/CF_aware_glo_eco.p', 'wb') as o:
    pickle.dump(aware_cf_mat_eco, o)

## 5. Build CF matrix of IPCC with CFs in ecoinvent for new ecoinvent
In the biosphere matrix of new ecoinvent, biosphere flows (bio_index) are sorted by bio codes. In brightway, biosphere flows are ordered as in lca.biosphere_dict. To build CF matrix, we need to reorder characterization matrix.

In [53]:
[m for m in bw.methods if m[0].lower().startswith('ipcc 2013')]

[('IPCC 2013 no LT', 'climate change', 'GTP 100a'),
 ('IPCC 2013 no LT', 'climate change', 'GTP 20a'),
 ('IPCC 2013 no LT', 'climate change', 'GWP 100a'),
 ('IPCC 2013 no LT', 'climate change', 'GWP 20a'),
 ('IPCC 2013', 'climate change', 'GTP 100a'),
 ('IPCC 2013', 'climate change', 'GTP 20a'),
 ('IPCC 2013', 'climate change', 'GWP 100a'),
 ('IPCC 2013', 'climate change', 'GWP 20a')]

In [54]:
ipcc = ('IPCC 2013', 'climate change', 'GWP 100a')

In [None]:
lca = bw.LCA({db.random():1}, method = ipcc)
lca.lci()
lca.lcia()
lca.biosphere_dict

In [56]:
biosphere_dict_list = sorted([(k[1], v) for k,v in lca.biosphere_dict.items()], key=lambda x:x[0])
reordered_index = np.array([e[1] for e in biosphere_dict_list])
reordered_index.shape

(2102,)

In [57]:
ipcc_cf_mat = sp.diags(lca.characterization_matrix.diagonal()[reordered_index]).tocsr()
ipcc_cf_mat.shape

(2102, 2102)

In [None]:
with open('../../Data/final_matrix/CF_ipcc.p', 'wb') as o:
    pickle.dump(ipcc_cf_mat, o)