# ------------------------------------------ Part 1 ------------------------------------------

In [None]:
import pandas as pd
import numpy as np
from copy import copy
import os
import re
import brightway2 as bw

# Comments to the following code
> - ### Raw data: Excel sheet can be obtained from https://doi.org/10.1021/acs.est.8b01452
> - ### Terms `exchange` and `input activity` are used interchangeably

# Still TODO
> - ### First exchange should be in 1 unit - probably not the case for us -> check that

# Define constants

In [2]:
# Database name
CONSUMPTION_DB_NAME = 'CH consumption 1.0'
# Number of relevant columns in the raw file (df_raw) to extract info about activity
N_ACT_RELEVANT = 11
# Index of the column where activities start
FIRST_ACT_IND = 7
# Number of columns that contain info about one activity
N_COLUMNS_INPUT_ACTIVITY = 5

# Column names for exchanges needed by brightway
EXC_COLUMNS_DICT = {
        'name': 'A', 
        'reference product': 'B', 
        'location': 'C', 
        'amount': 'D', 
        'unit': 'E', 
        'database': 'F', 
        'type': 'G', 
        'categories': 'H',
        'comment': 'I',
    }

# Conversion from type in databases to type that should be in excel file to import a new database
ACTIVITY_TYPE_DICT = {
    'process': 'technosphere',
    'emission': 'biosphere',
}

# Convert data to brightway database format -> all functions

In [3]:
# Add missing On columns
def complete_columns(df):
    
    column_names = list(df.columns)
    indices = [i for i,el in enumerate(column_names)  if 'Activity' in el]
    column_names_complete = copy(column_names)

    n_el_added = 0
    for ind in indices:
        if 'On' not in column_names[ind-1]:
            act_name = column_names[ind]
            act_number = act_name[act_name.find(' ')+1:]
            column_names_complete.insert(ind+n_el_added, 'On ' + act_number)
            n_el_added += 1
        
    df.columns = column_names_complete[:len(column_names)]
    
    return df

In [4]:
def create_df_bw(db_name, n_cutoff_cols = len(EXC_COLUMNS_DICT)+3):
    '''
    Create dataframe for a new database in the Brightway format and add the necessary meta information
    '''
    df = pd.DataFrame([['cutoff', n_cutoff_cols], ['database', db_name]], columns=list('AB'))
    df = df.append(pd.Series(), ignore_index=True)
    return df

In [5]:
def compute_act_unit(df):
    '''
    Depending on whether `Quantity code` is present for a specific activity, 
    set unit to the unit of the first input activity or CHF.
    '''
    if 'Quantity code' in df.keys():
        return df['DB Act 1'].split('(')[1].split(',')[0]
    else:
        return 'CHF'

In [6]:
def append_activity(df, df_ind):
    '''
    Append activity from row df_ind to the dataframe df in the brightway format
    '''
    # Append empty row
    df = df.append(pd.Series(), ignore_index=True)
    
    # Extract activity information
    act_name = df_ind['Translated name']
    act_unit = compute_act_unit(df)
    
    len_df = len(df)
    
    act_data = [ ['Activity', act_name],
                 ['reference product',  act_name],
                 ['location', 'CH'],
                 ['amount', 1],
                 ['unit', act_unit] ]
    
    df_act = pd.DataFrame( act_data, 
                           columns=list('AB'),
                           index = np.arange(len_df,len_df+len(act_data)) )
                          
    df = df.append(df_act, sort=False)
    
    return df, df_act

In [7]:
def append_exchanges_in_correct_columns(df, dict_with_values):
    '''
    Make sure that exchanges values are appended to df in the correct columns.
    '''  
    col_names = list(dict_with_values.keys()) # order of columns is determined by this list
    col_excel_literal = [EXC_COLUMNS_DICT[m] for m in col_names]
    
    if dict_with_values != EXC_COLUMNS_DICT:
        col_data  = [dict_with_values[m] for m in col_names]
    else:
        col_data = col_names
    
    df = df.append(pd.DataFrame([col_data], columns=col_excel_literal, index=[len(df)]), sort=False)
    
    return df

In [8]:
def append_exchanges_column_names(df):
    '''
    Add column names for exchanges
    '''
    df = df.append(pd.DataFrame(['Exchanges'], columns=['A'], index=[len(df)]), sort=False)
    df = append_exchanges_in_correct_columns(df, EXC_COLUMNS_DICT)
    return df

In [9]:
def append_first_exchange(df, df_act):
    '''
    Append first exchange which is activity itself, the amount is always 1, 
    the database is always the one that is being currently created, type is `production`.
    '''
    df_act_dict = df_act.set_index('A').to_dict()['B']
    
    first_exc_data_dict = { 'name': df_act_dict['Activity'],
                            'reference product': df_act_dict['reference product'],
                            'location': df_act_dict['location'],
                            'amount': 1,
                            'unit': df_act_dict['unit'],
                            'database': CONSUMPTION_DB_NAME,
                            'type': 'production',
                          }
    
    df = append_exchanges_in_correct_columns(df, first_exc_data_dict)
    
    return df

In [10]:
def is_pattern_correct(df_ind_j):
    '''
    Check that input activity info has correct pattern. 
    In case the pattern is not correct, move on to the next 5 columns and check their pattern.
    This is needed because for some input activities some relevant values are missing, eg only 'On' value is present.
    '''
    list_ = list(df_ind_j.index)
    pattern = ['On', 'Activity', 'DB Act', 'CFL Act', 'Amount Act']
    check = [pattern[n] in list_[n] for n in range(N_COLUMNS_INPUT_ACTIVITY)]
    if np.all(check): 
        return 1
    else: 
        return 0

In [11]:
def append_exchanges(df, df_ind, df_act):
    '''
    Add all exchanges (input activities) from the row df_ind to consumption database dataframe.
    '''
    # Add exchanges column names
    df = append_exchanges_column_names(df)
    
    # Add first exchange that is the same as the activity itself, type of this exchange is production
    df = append_first_exchange(df, df_act)
    
    # Add all exchanges
    n_exchanges = (len(df_ind)-FIRST_ACT_IND) // N_COLUMNS_INPUT_ACTIVITY
    if n_exchanges != (len(df_ind) - FIRST_ACT_IND) / N_COLUMNS_INPUT_ACTIVITY:
        print('smth is not right with exchanges of Activity -> ' + str(df_ind['Translated name']))
    
    ConversionDem2FU = df_ind['ConversionDem2FU']
    skip = 0
    for j in range(1, n_exchanges+1):
        
        start = FIRST_ACT_IND + N_COLUMNS_INPUT_ACTIVITY*(j-1) + skip
        end = start + N_COLUMNS_INPUT_ACTIVITY
        df_ind_j = df_ind[start:end]
        
        #Check that df_ind_j contains <On 1, Activity 1, DB Act 1, CFL Act 1, Amount Act 1> pattern
        flag = 1
        while flag:
            flag_pattern = is_pattern_correct(df_ind_j) 
            if flag_pattern == 1: # we don't need to skip if patter is correct
                flag = 0
            else:
                skip += 1
                start = FIRST_ACT_IND + N_COLUMNS_INPUT_ACTIVITY*(j-1) + skip
                end = start + N_COLUMNS_INPUT_ACTIVITY
                df_ind_j = df_ind[start:end]
        
        df = append_one_exchange(df, df_ind_j, ConversionDem2FU)
        
    return df

In [12]:
def create_input_act_dict(act_bw, input_act_amount):
    '''
    Create a dictionary with all info about input activities.
    '''
    
    input_act_values_dict = {
        'name': act_bw['name'], 
        'location': act_bw['location'], 
        'amount': input_act_amount, 
        'unit': act_bw['unit'], 
        'database': act_bw['database'], 
        # We do not expect type biosphere, but assign it via ACTIVITY_TYPE_DICT anyway 
        # to be sure that we don't encounter them.
        'type': ACTIVITY_TYPE_DICT[act_bw['type']],
    }
    try:
        input_act_values_dict['reference product'] = act_bw['reference product']
    except:
        pass
            
    return input_act_values_dict

In [25]:
def bw_get_activity_info_manually(input_act_str, db_name, input_act_amount):
    # Extract the activity name
    apostrophes = [(m.start(0), m.end(0)) for m in re.finditer("'", input_act_str)]
    if len(apostrophes) == 1:
        ap_start = 0
        ap_end = apostrophes[0][0]
    else:
        ap_start = apostrophes[0][1]
        ap_end = apostrophes[1][0]
    input_act_name = input_act_str[ ap_start:ap_end ]
    input_act_unit_loc = input_act_str[ input_act_str.find("(") : input_act_str.find(")")+1 ]
    input_act_unit_loc_split = [ re.sub('[^-A-Za-z0-9-€-]', ' ' , el).rstrip().lstrip() \
                                 for el in input_act_unit_loc.split(',')]
    input_act_unit = input_act_unit_loc_split[0]
    input_act_location = input_act_unit_loc_split[1]

    # Add comment when activity cannot be found
    input_act_values_dict = {}
    input_act_values_dict['name'] = input_act_name
    input_act_values_dict['unit'] = input_act_unit
    input_act_values_dict['location'] = input_act_location
    input_act_values_dict['amount'] = input_act_amount
    input_act_values_dict['database'] = db_name
    input_act_values_dict['type'] = ACTIVITY_TYPE_DICT['process'] # TODO remove hardcoding
    input_act_values_dict['comment'] = 'TODO could not find this activity'

    return input_act_values_dict

In [26]:
 def append_one_exchange(df, df_ind_j, ConversionDem2FU):
    '''
    Extract information about one input activity, eg name, unit, location, etc and append it to the dataframe df.
    '''    
    # Extract the activity number
    k = int(''.join(c for c in df_ind_j.index[0] if c.isdigit()))
    # Extract information about activity and save it
    input_act_str = df_ind_j['DB Act ' + str(k)]
    input_act_db_code = df_ind_j['Activity ' + str(k)]
    
    # Find this input activity in brightway databases
    db_name = input_act_db_code.split("'")[1]
    code = input_act_db_code.split("'")[3]
    input_act_db_code_tuple = (db_name, code)
    
    # TODO remove HEIA for now
    if 'heia' in db_name:
        return df
    
    # Compute amount
    input_act_amount = ConversionDem2FU \
                     * df_ind_j['On ' + str(k)] \
                     * df_ind_j['CFL Act ' + str(k)] \
                     * df_ind_j['Amount Act ' + str(k)]
    
    try:
        # Find activity using bw functionality
        act_bw = bw.get_activity(input_act_db_code_tuple)
        input_act_values_dict = create_input_act_dict(act_bw, input_act_amount)
    except:
        # If bw.get_activity does not work for whichever reason, fill info manually
        input_act_values_dict = bw_get_activity_info_manually(input_act_str, db_name, input_act_amount)
        
    # Add exchange to the dataframe with database in brightway format
    df = append_exchanges_in_correct_columns(df, input_act_values_dict)
    
    return df

In [27]:
# def append_one_exchange_old(df, df_ind_j, ConversionDem2FU):
#     '''
#     Extract information about one input activity, eg name, unit, location, etc and append it to the dataframe df.
#     '''    
#     # Extract the activity number
#     k = int(''.join(c for c in df_ind_j.index[0] if c.isdigit()))
    
#     input_act_str = df_ind['DB Act ' + str(k)]
    
#     # Extract the activity name
#     apostrophes = [(m.start(0), m.end(0)) for m in re.finditer("'", input_act_str)]
#     if len(apostrophes) == 1:
#         ap_start = 0
#         ap_end = apostrophes[0][0]
#     else:
#         ap_start = apostrophes[0][1]
#         ap_end = apostrophes[1][0]
#     input_act_name = input_act_str[ ap_start:ap_end ]
    
#     input_act_unit_loc = input_act_str[ input_act_str.find("(") : input_act_str.find(")")+1 ]
# #     input_act_unit_loc_split = [ re.sub('\W+', ' ' , el) for el in input_act_unit_loc.split(',')]
#     input_act_unit_loc_split = [ re.sub('[^-A-Za-z0-9-€-]', ' ' , el).rstrip().lstrip() \
#                                  for el in input_act_unit_loc.split(',')]
#     input_act_unit = input_act_unit_loc_split[0]
#     input_act_location = input_act_unit_loc_split[1]
    
#     # Extract input activity amount
#     input_act_amount = df_ind['On ' + str(k)] * df_ind['CFL Act ' + str(k)] * df_ind['Amount Act ' + str(k)]
    
#     # Find this input activity in brightway databases
#     input_act_db_code = df_ind['Activity ' + str(k)]
#     db_name = input_act_db_code.split("'")[1]
#     db = bw.Database(db_name)
    
#     # TODO remove HEIA for now
#     if 'heia' in db_name:
#         return df
    
#     if 'EXIOBASE' in db_name and input_act_location == 'CH':
#         input_act_location = 'Switzerland'
    
#     acts_bw = [act for act in db if  input_act_name == act['name'] \
#                                  and input_act_unit == act['unit'] \
#                                  and input_act_location == act['location']]
    
#     # TODO change this part once we get rid of non unique activities
#     try:
#         act_bw = acts_bw[0]   
#         input_act_values_dict = create_input_act_dict(act_bw, input_act_amount)
        
#         # Add comment when activity is not unique
#         if len(acts_bw) > 1:
#             input_act_values_dict['comment'] = 'TODO: not unique!'
            
#     except:
#         # Add comment when activity cannot be found
#         input_act_values_dict = {}
#         input_act_values_dict['name'] = input_act_name
#         input_act_values_dict['unit'] = input_act_unit
#         input_act_values_dict['location'] = input_act_location
#         input_act_values_dict['amount'] = input_act_amount
#         input_act_values_dict['database'] = db_name
#         input_act_values_dict['type'] = ACTIVITY_TYPE_DICT['process'] # TODO remove hardcoding
#         input_act_values_dict['comment'] = 'TODO: cannot find this activity'
        
#     # Add exchange to the dataframe with database in brightway format
#     df = append_exchanges_in_correct_columns(df, input_act_values_dict)
    
#     return df

# Convert data to brightway database format -> main code
calls all the functions used above

In [28]:
%%time
# Start brightway project that already contains databases
project = 'GSA for ecoinvent'
bw.projects.set_current(project)

# Create dataframe that will be our consumption database after we add activities and exchanges from the raw file
df_bw = create_df_bw(CONSUMPTION_DB_NAME)

# Read data
path = 'data/es8b01452_si_002.xlsx'
sheet_name = 'Overview & LCA-Modeling'
df_raw = pd.read_excel(path, sheet_name = sheet_name, header=2)

# Add ON columns
df = complete_columns(df_raw)

act_indices = df_raw.index[df_raw['ConversionDem2FU'].notna()].tolist() # indices of all activities

for ind in act_indices:
    # For each row
    df_ind = df_raw.iloc[ind]
    df_ind = df_ind[df_ind.notna()]
    # Add activity
    df_bw, df_act = append_activity(df_bw, df_ind[:N_ACT_RELEVANT]) # only pass columns relevant to this function 
    # Add exchanges
    df_bw = append_exchanges(df_bw, df_ind, df_act)

smth is not right with exchanges of Activity -> Desktop computers
smth is not right with exchanges of Activity -> Rice
smth is not right with exchanges of Activity -> Pasta products
smth is not right with exchanges of Activity -> Bread  
smth is not right with exchanges of Activity -> Wheat flour 
smth is not right with exchanges of Activity -> Other flours and meals, starches, semolina, flakes and grains
smth is not right with exchanges of Activity -> Other cereal products
smth is not right with exchanges of Activity -> Beef
smth is not right with exchanges of Activity -> Veal
smth is not right with exchanges of Activity -> Pork, fresh or frozen
smth is not right with exchanges of Activity -> Horse meat
smth is not right with exchanges of Activity -> Sheep and Goat meat
smth is not right with exchanges of Activity -> Poultry, fresh or frozen
smth is not right with exchanges of Activity -> Hare, game and rabbit meat
smth is not right with exchanges of Activity -> Other eatable meat pro

In [29]:
# Write the dataframe to excel file
write_dir_name = 'write_files'
if not os.path.exists(write_dir_name):
    os.mkdir(write_dir_name)
db_bw_path = write_dir_name + '/' + 'consumption_db.xlsx'
df_bw.to_excel(db_bw_path, index=False, header=False)

# ------------------------------------------ Part 2 ------------------------------------------

In [30]:
import pandas as pd
import numpy as np
import brightway2 as bw
import string

# Constants

In [152]:
DB_COLUMN = 'F'

# Replace names of old databases with the new ones in the consumption database excel file

In [153]:
def replace_one_db(df, db_old_name, db_new_name):
    '''
    Replace database name with a new one (eg in case a newer version is available)
    '''
    df_updated = copy(df)
    
    where = np.where(df_updated[DB_COLUMN]==db_old_name)[0]
    if where.shape[0] != 0:
        df_updated[DB_COLUMN][where] = db_new_name
        
    return df_updated

In [154]:
def update_all_db(df):
    '''
    Update all databases in the consumption database
    '''
    db_old_list = ['Agribalyse 1.2', 
                   'ecoinvent 3.3 cutoff']
    db_new_list = ['Agribalyse 1.2 - ecoinvent 3.3 cutoff',
                  'ecoinvent 3.6 cutoff']
    
    assert len(db_old_list) == len(db_new_list)
    
    for i in range(len(db_old_list)):
        df = replace_one_db(df, db_old_list[i], db_new_list[i])
        
    return df

In [192]:
# Main code
project = 'GSA for ecoinvent'
bw.projects.set_current(project)

# Read consumption database
path = 'write_files/consumption_db.xlsx'
df = pd.read_excel(path, header = None)
df.columns = list(string.ascii_uppercase[:len(df.columns)])
# 
# Replace
df = update_all_db(df)
path_new_db = 'write_files/consumption_db_updated.xlsx'
df.to_excel(path_new_db, index=False, header=False)

# Import consumption database linked to older versions of other databases

# 1. Ecoinvent 3.6

### TODO Chris -> please check migrations

In [226]:
if CONSUMPTION_DB_NAME in bw.databases:
    print(CONSUMPTION_DB_NAME + " database already present!!! No import is needed")
else: 
    co = bw.ExcelImporter(path_new_db)
    co.apply_strategies()
    co.match_database('EXIOBASE 2.2', fields=('name','reference product', 'unit','location','categories'))
    co.match_database('ecoinvent 3.6 cutoff', fields=('name', 'reference product', 'unit','location','categories'))
    co.match_database('Agribalyse 1.2 - ecoinvent 3.3 cutoff', fields=('name','unit','location'))
    co.statistics()

Extracted 1 worksheets in 0.41 seconds
Applying strategy: csv_restore_tuples
Applying strategy: csv_restore_booleans
Applying strategy: csv_numerize
Applying strategy: csv_drop_unknown
Applying strategy: csv_add_missing_exchanges_section
Applying strategy: normalize_units
Applying strategy: normalize_biosphere_categories
Applying strategy: normalize_biosphere_names
Applying strategy: strip_biosphere_exc_locations
Applying strategy: set_code_by_activity_hash
Applying strategy: link_iterable_by_fields
Applying strategy: assign_only_product_as_production
Applying strategy: link_technosphere_by_activity_hash
Applying strategy: drop_falsey_uncertainty_fields_but_keep_zeros
Applying strategy: convert_uncertainty_types_to_integers
Applying strategy: convert_activity_parameters_to_list
Applied 16 strategies in 0.31 seconds
Applying strategy: link_iterable_by_fields
Applying strategy: link_iterable_by_fields
Applying strategy: link_iterable_by_fields
203 datasets
6058 exchanges
95 unlinked exch

In [232]:
# list(co.unlinked)

In [228]:
# Define a migration for two particular activities that can only be hardcoded
ecoinvent36_change_names_data = {
    'fields': ['name', ],
    'data': [
        (
            ['steam production in chemical industry'], 
            {
                'name': 'steam production, in chemical industry',
                'reference product': 'steam, in chemical industry',
                'unit': 'kilogram',
                'multiplier': 1/2.75, # see comment on this activity in ecoinvent
            }
        ),
        (
            ['market for green bell pepper'],
            {
                'name': 'market for bell pepper',
                'reference product': 'bell pepper',
            }
        ),
    ]
}

bw.Migration("ecoinvent36-change-names").write(
    ecoinvent36_change_names_data,
    description="Change names of some activities"
)

In [229]:
# Define a migration for rice production and specific locations
# These locations have only non-basmati rice production
ecoinvent36_rice_production_data = {
    'fields': ['name', 'location'],
    'data': [
        (
            ['rice production', 'US'],
            {
                'name': 'rice production, non-basmati',
                'reference product': 'rice, non-basmati'
            }
        ),
        (
            ['rice production', 'CN'],
            {
                'name': 'rice production, non-basmati',
                'reference product': 'rice, non-basmati'
            }
        ),
    ]
}

bw.Migration("ecoinvent36-rice-production").write(
    ecoinvent36_rice_production_data,
    description="Change names of some activities"
)

In [230]:
co.migrate('ecoinvent36-change-names')
co.migrate("ecoinvent36-rice-production")
co.match_database('ecoinvent 3.6 cutoff', fields=('name','reference product', 'unit','location','categories'))
co.statistics()

Applying strategy: migrate_datasets
Applying strategy: migrate_exchanges
Applying strategy: migrate_datasets
Applying strategy: migrate_exchanges
Applying strategy: link_iterable_by_fields
203 datasets
6058 exchanges
26 unlinked exchanges
  Type technosphere: 7 unique unlinked exchanges


(203, 6058, 26)

In [231]:
# Define a migration with allocation based on the share in production volumes

In [281]:
def get_unlinked_act(db2import, unlinked_exc):
    acts = []
    for act in db2import.data:
        for exc in act['exchanges']:
            if exc == unlinked_exc:
                acts.append(act)
    return acts

In [300]:
unlinked_excs = list(co.unlinked)
acts = [dict()]*len(unlinked_excs) # activities where the unlinked exchange is present
i = 0
for unlinked_exc in unlinked_excs:
    acts[i][tuple(unlinked_exc)] = get_unlinked_act(co, unlinked_exc)
    i += 1

In [308]:
co.get_activity((co.data[0]['database'], co.data[0]['code']))

TypeError: get_activity() missing 1 required positional argument: 'ws'

In [313]:
co.

TypeError: process_activities() missing 1 required positional argument: 'data'

In [298]:
unlinked_exc

{'name': 'market for rice',
 'reference product': 'rice',
 'location': 'GLO',
 'amount': 0.0,
 'unit': 'kilogram',
 'database': 'ecoinvent 3.6 cutoff',
 'type': 'technosphere'}

In [276]:
while True:
    next(co.unlinked)


KeyboardInterrupt: 

In [278]:
co.data[0]['exchanges']

[{'name': 'Desktop computers',
  'reference product': 'Desktop computers',
  'location': 'CH',
  'amount': 1.0,
  'unit': 'CHF',
  'database': 'CH consumption 1.0',
  'type': 'production',
  'input': ('CH consumption 1.0', '13ee003610534e860db3cc78c1347cc1')},
 {'name': 'market for computer, desktop, without screen',
  'reference product': 'computer, desktop, without screen',
  'location': 'GLO',
  'amount': 0.25,
  'unit': 'unit',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere',
  'input': ('ecoinvent 3.6 cutoff', '3fc8677e12987ef34d5dc084d6923c8f')},
 {'name': 'market for display, liquid crystal, 17 inches',
  'reference product': 'display, liquid crystal, 17 inches',
  'location': 'GLO',
  'amount': 0.08333333333333333,
  'unit': 'unit',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere',
  'input': ('ecoinvent 3.6 cutoff', '4be589ad2931fbdd511f99ab5f062395')},
 {'name': 'market for display, cathode ray tube, 17 inches',
  'reference product': 'display, ca

In [254]:
list(co.unlinked)

[{'name': 'market for rice',
  'reference product': 'rice',
  'location': 'GLO',
  'amount': 0.0,
  'unit': 'kilogram',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere'},
 {'name': 'rice production',
  'reference product': 'rice',
  'location': 'RoW',
  'amount': 1.1793134592,
  'unit': 'kilogram',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere'},
 {'name': 'rice production',
  'reference product': 'rice',
  'location': 'IN',
  'amount': 0.1380030448211913,
  'unit': 'kilogram',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere'},
 {'name': 'market for wheat grain',
  'reference product': 'wheat grain',
  'location': 'GLO',
  'amount': 0.0,
  'unit': 'kilogram',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere'},
 {'name': 'market for maize grain',
  'reference product': 'maize grain',
  'location': 'GLO',
  'amount': 0.0,
  'unit': 'kilogram',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere'},
 {'name': 'market for ma

In [None]:
get_unlinked(co, )

In [209]:
for u in list(co.unlinked):
    print('Unlinked exchange: ' + u['name'] + ', ' + \
                                  u['unit'] + ', ' + \
                                  u['location'] + ', ' + \
                                  u['reference product'])
    [print(str(act) + ' - ' + act['reference product']) for act in db if u['name'] in act['name']]
    print('\n')

Unlinked exchange: market for rice, kilogram, GLO, rice
'market for rice, non-basmati' (kilogram, GLO, None) - rice, non-basmati
'market for rice, basmati' (kilogram, GLO, None) - rice, basmati
'market for rice seed, for sowing' (kilogram, GLO, None) - rice seed, for sowing


Unlinked exchange: rice production, kilogram, RoW, rice
'rice production, basmati' (kilogram, RoW, None) - rice, basmati
'rice production, non-basmati' (kilogram, CN, None) - rice, non-basmati
'rice production, basmati' (kilogram, IN, None) - rice, basmati
'rice production, non-basmati' (kilogram, IN, None) - rice, non-basmati
'rice production, non-basmati' (kilogram, RoW, None) - rice, non-basmati
'rice production, non-basmati' (kilogram, CN, None) - straw
'rice production, non-basmati' (kilogram, US, None) - rice, non-basmati
'rice production, non-basmati' (kilogram, RoW, None) - straw


Unlinked exchange: rice production, kilogram, US, rice
'rice production, basmati' (kilogram, RoW, None) - rice, basmati
'rice 

In [164]:
from copy import deepcopy

def split_exchange_by_production_volume(exc, data, total, lst):
    for obj, amount in data:
        new_exc = deepcopy(exc)
        # Could also use rescale_exchange function from wurst here
        new_exc["amount"] *= value / total
        # Create direct link to this particular dataset
        new_exc["input"] = (obj["database"], obj["code"])
        lst.append(new_exc)
    return new_exc

def split_by_production_volume(data, search_db, old_exchange_properties, new_exchange_properties):
    """
    Given some data being imported, `data`, and a database to search in, `search_db`, 
    take each exchange matching `old_exchange_properties`, and split it into multiple exchanges 
    matching `new_exchange_properties`, using production volumes as allocation factors.
    """
    matched_datasets = [
        ds
        for ds in search_db
        if all(ds[key] == value for key, value in new_exchange_properties.items())
    ]

    production_volumes = [
        next(ds.production()).get("production volume", 0) for ds in matched_datasets
    ]
    if len(production_volumes) == 1:
        # Fix if only one possible and no production volume
        production_volumes = [1]
    elif not production_volumes:
        raise ValueError("No matching datasets found")

    for ds in data:
        indices_to_drop, new_exchanges = [], []
        for i, exc in enumerate(ds.get("exchanges", [])):
            if all(exc[key] == value for key, value in old_exchange_properties.items()):
                indices_to_drop.append(i)
                new_exchanges = split_exchange_by_production_volume(
                    exc,
                    zip(matched_datasets, production_volumes),
                    sum(production_volumes),
                    new_exchanges,
                )

        if new_exchanges:
            ds["exchanges"] = [
                exc
                for i, exc in enumerate(ds.get("exchanges", []))
                if i not in indices_to_drop
            ] + new_exchanges
    return data


In [246]:
new_exchange_properties = {
    'name': 'market for rice, basmati',
    'location': 'GLO'
                          }
old_exchange_properties = {
    'name': 'market for rice',
    'location': 'GLO'
}

matched_datasets = [act for act in db if all(act[key] == value for key, value in new_exchange_properties.items())]
production_volumes = [
        next(iter(ds.production())).get("production volume", 0) for ds in matched_datasets
    ]

In [247]:
production_volumes

[12520000000.0]

In [248]:
data = [{'name': 'market for rice',
  'reference product': 'rice',
  'location': 'GLO',
  'amount': 0.0,
  'unit': 'kilogram',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere'}]

In [249]:
for ds in data:
    indices_to_drop, new_exchanges = [], []
    for i, exc in enumerate(ds.get("exchanges", [])):
        if all(exc[key] == value for key, value in old_exchange_properties.items()):
            indices_to_drop.append(i)
            new_exchanges = split_exchange_by_production_volume(
                exc,
                zip(matched_datasets, production_volumes),
                sum(production_volumes),
                new_exchanges,
            )

    if new_exchanges:
        ds["exchanges"] = [
            exc
            for i, exc in enumerate(ds.get("exchanges", []))
            if i not in indices_to_drop
        ] + new_exchanges

In [250]:
new_exchanges

[]

In [245]:
i

11

In [183]:
a = matched_datasets[0]
a.as_dict()

{'comment': "This dataset represents the supply of 1 kg of 'rice, basmati' from activities that produce it within the geography of GLO. Transportation is included in this market. The transport amounts are based on the default transport distances for markets, provided in the 'Default Transport Assumptions' file available on the ecoinvent website (https://www.ecoinvent.org/support/documents-and-files/documents-and-files.html). See exchange comments for additional details.",
 'classifications': [('ISIC rev.4 ecoinvent', '0112:Growing of rice'),
  ('CPC', '0113: Rice')],
 'activity type': 'market activity',
 'activity': 'e3efbc2d-9334-4160-b16a-c68d13c9f2f2',
 'database': 'ecoinvent 3.6 cutoff',
 'filename': 'e3efbc2d-9334-4160-b16a-c68d13c9f2f2_f2711295-b251-4171-8316-516c3526b8ff.spold',
 'location': 'GLO',
 'name': 'market for rice, basmati',
 'parameters': [],
 'authors': {'data entry': {'name': '[System]',
   'email': 'support@ecoinvent.org'},
  'data generator': {'name': 'Avraam Syme

In [185]:
next(iter(a.production()))._data

{'flow': 'f2711295-b251-4171-8316-516c3526b8ff',
 'type': 'production',
 'name': 'rice, basmati',
 'classifications': {'CPC': ['0113: Rice']},
 'production volume': 12520000000.0,
 'activity': 'e3efbc2d-9334-4160-b16a-c68d13c9f2f2',
 'unit': 'kilogram',
 'amount': 1.0,
 'uncertainty type': 0,
 'loc': 1.0,
 'input': ('ecoinvent 3.6 cutoff', '1a17e39261f37a262dd3d8a3c69f2f05'),
 'output': ('ecoinvent 3.6 cutoff', '1a17e39261f37a262dd3d8a3c69f2f05')}

In [187]:
a.production??

In [219]:
a=[act for act in db if 'rice production' in act['name'] and 'US' in act['location']][0]
a

'rice production, non-basmati' (kilogram, US, None)

In [251]:
list(co.unlinked)

[{'name': 'market for rice',
  'reference product': 'rice',
  'location': 'GLO',
  'amount': 0.0,
  'unit': 'kilogram',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere'},
 {'name': 'rice production',
  'reference product': 'rice',
  'location': 'RoW',
  'amount': 1.1793134592,
  'unit': 'kilogram',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere'},
 {'name': 'rice production',
  'reference product': 'rice',
  'location': 'IN',
  'amount': 0.1380030448211913,
  'unit': 'kilogram',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere'},
 {'name': 'market for wheat grain',
  'reference product': 'wheat grain',
  'location': 'GLO',
  'amount': 0.0,
  'unit': 'kilogram',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere'},
 {'name': 'market for maize grain',
  'reference product': 'maize grain',
  'location': 'GLO',
  'amount': 0.0,
  'unit': 'kilogram',
  'database': 'ecoinvent 3.6 cutoff',
  'type': 'technosphere'},
 {'name': 'market for ma

# Playground