# Make biosphere matrix

The biosphere exchanges of a dataset in new ecoinvent are the same as the ones in its reference dataset from the original ecoinvent. This notebook creates biosphere vectors first and then matrix for new ecoinvent.


In [1]:
import pickle
import random
import itertools
import pandas as pd
import numpy as np
import scipy.sparse as sp

### 1. Create biosphere index 

In [2]:
with open('../../Data/lci_iot_imported/biosphere371.pickle', 'rb') as i:
    biosphere = pickle.load(i)
bio_code_dict = {b['code']:b for b in biosphere}

with open('../../Data/lci_iot_imported/cutoff371_no_mg.pickle', 'rb') as i:
    datasets = pickle.load(i)
code_dict = {d['code']:d for d in datasets}

print(len(biosphere), len(datasets))

4332 19013


In [3]:
actual_flows = {e['flow'] for e in itertools.chain.from_iterable([d['exchanges'] for d in datasets]) if 
                e['type']=='biosphere'}
len(actual_flows)

2102

In [4]:
bio_index = sorted([b['code'] for b in biosphere if b['code'] in actual_flows]) # all possible bios flow
bio_index_dict = {b:i for i,b in enumerate(bio_index)} #a dict showing bio flows and list index, used when fill amounts    
len(bio_index_dict)

2102

### 2. Create biosphere vector for each dataset in new ecoinvent

In [5]:
with open('../../Data/tech_vector/full_index.p', 'rb') as i:
    full_index = pickle.load(i)
    
with open('../../Data/tech_vector/full_reference_dataset_dict.p', 'rb') as i:
    reference_dataset_dict = pickle.load(i)

# the shape of biosphere matrix should be:
len(full_index)*len(bio_index)

704378098

In [6]:
def get_biosphere_exchanges(dataset):
    bio_ex = [e for e in dataset['exchanges'] if e['type']=='biosphere']
    return bio_ex

def make_biosphere_vector(biosphere_exchanges):
    vec = np.zeros((len(bio_index),1))
    for e in biosphere_exchanges:
        i = bio_index_dict[e['flow']]
        amount = e['amount']
        vec[i,0] = amount
    sparse_vec = sp.coo_matrix(vec)
    return sparse_vec

In [7]:
vector_dict = dict()
for code, dataset in code_dict.items():
    bio_exchanges = get_biosphere_exchanges(dataset)
    if bio_exchanges:
        bio_vec = make_biosphere_vector(bio_exchanges)
    else:
        bio_vec = sp.coo_matrix((len(bio_index),1))
    vector_dict[code] = bio_vec
    
biosphere_vector_list = [vector_dict[reference_dataset_dict[ind]] for ind in full_index]

# more efficient
# get_ipython().run_cell_magic('time', '', 'vector_dict = dict()\nfor code, dataset in code_dict.items():\n    bio_exchanges = get_biosphere_exchanges(dataset)\n    if bio_exchanges:\n        bio_vec = make_biosphere_vector(bio_exchanges)\n    else:\n        bio_vec = sp.coo_matrix((len(bio_index),1))\n    \n    vector_dict[code] = bio_vec')
# get_ipython().run_cell_magic('time', '', 'biosphere_vector_list = [vector_dict[reference_dataset_dict[ind]] for ind in full_index]\nprint(len(biosphere_vector_list))')

In [8]:
len(biosphere_vector_list)

335099

### 3. Create biosphere matrix

In [9]:
get_ipython().magic('time biosphere_matrix = sp.hstack(biosphere_vector_list).tocsc()')
biosphere_matrix.shape

CPU times: user 9.15 s, sys: 203 ms, total: 9.35 s
Wall time: 9.35 s


(2102, 335099)

In [10]:
with open('../../Data/final_matrix/biosphere_matrix_csc.p', 'wb') as o:
    pickle.dump(biosphere_matrix, o)

with open('../../Data/final_matrix/biosphere_index.p', 'wb') as o:
    pickle.dump(bio_index, o)