# Region matching

Ecoinvent contains subnational, national, and country-aggregated regions. Exiobase contains 44 countries and 5 Rest-of-world regions. <br>
This notebook is to match the locations of ecoinvent to the regions of EXIOBASE.

In [1]:
import numpy as np
import pandas as pd
import brightway2 as bw
import pickle
import os

#### 1. get all regions in ecoinvent and exiobase

In [5]:
with open('../../Data/lci_iot_imported/cutoff371_no_mg.pickle', 'rb') as i:
    datasets = pickle.load(i)
eco_reg_co_partco = {ds['location'] for ds in datasets}

with open("../../Data/region_matching/ecoinvent_locations.p", 'wb') as o:
    pickle.dump(eco_reg_co_partco, o)
    
eco_reg_co_partco

{'AE',
 'AL',
 'AM',
 'AO',
 'AR',
 'AT',
 'AU',
 'AZ',
 'BA',
 'BD',
 'BE',
 'BG',
 'BH',
 'BJ',
 'BN',
 'BO',
 'BR',
 'BR-AC',
 'BR-AL',
 'BR-AM',
 'BR-AP',
 'BR-BA',
 'BR-CE',
 'BR-DF',
 'BR-ES',
 'BR-GO',
 'BR-MA',
 'BR-MG',
 'BR-MS',
 'BR-MT',
 'BR-Mid-western grid',
 'BR-North-eastern grid',
 'BR-Northern grid',
 'BR-PA',
 'BR-PB',
 'BR-PE',
 'BR-PI',
 'BR-PR',
 'BR-RJ',
 'BR-RN',
 'BR-RO',
 'BR-RR',
 'BR-RS',
 'BR-SC',
 'BR-SE',
 'BR-SP',
 'BR-South-eastern grid',
 'BR-Southern grid',
 'BR-TO',
 'BW',
 'BY',
 'CA',
 'CA-AB',
 'CA-BC',
 'CA-MB',
 'CA-NB',
 'CA-NF',
 'CA-NS',
 'CA-NT',
 'CA-NU',
 'CA-ON',
 'CA-PE',
 'CA-QC',
 'CA-SK',
 'CA-YK',
 'CD',
 'CENTREL',
 'CG',
 'CH',
 'CI',
 'CL',
 'CM',
 'CN',
 'CN-AH',
 'CN-BJ',
 'CN-CQ',
 'CN-CSG',
 'CN-FJ',
 'CN-GD',
 'CN-GS',
 'CN-GX',
 'CN-GZ',
 'CN-HA',
 'CN-HB',
 'CN-HE',
 'CN-HL',
 'CN-HN',
 'CN-HU',
 'CN-JL',
 'CN-JS',
 'CN-JX',
 'CN-LN',
 'CN-NM',
 'CN-NX',
 'CN-QH',
 'CN-SA',
 'CN-SC',
 'CN-SD',
 'CN-SGCC',
 'CN-SH',
 'CN-SX'

In [2]:
with open('../../Data/lci_iot_imported/iot_flow.p', 'rb') as i:
    iot = pickle.load(i)
iot_reg_co = {multi_index[0] for multi_index in iot.index}

with open("../../Data/region_matching/exiobase_locations.p", 'wb') as o:
    pickle.dump(iot_reg_co, o)

iot_reg_co

{'AT',
 'AU',
 'BE',
 'BG',
 'BR',
 'CA',
 'CH',
 'CN',
 'CY',
 'CZ',
 'DE',
 'DK',
 'EE',
 'ES',
 'FI',
 'FR',
 'GB',
 'GR',
 'HR',
 'HU',
 'ID',
 'IE',
 'IN',
 'IT',
 'JP',
 'KR',
 'LT',
 'LU',
 'LV',
 'MT',
 'MX',
 'NL',
 'NO',
 'PL',
 'PT',
 'RO',
 'RU',
 'SE',
 'SI',
 'SK',
 'TR',
 'TW',
 'US',
 'WA',
 'WE',
 'WF',
 'WL',
 'WM',
 'ZA'}

#### 2. region matching

###### 2.1 match the countries in ecoinvent to the countries in exiobase, and check the remaining regions in ecoinvent, including other countries, subnational regions, and country-aggregated regions. 

In [6]:
iot_co = iot_reg_co.copy()
iot_reg = ['WA','WE','WF','WL','WM']
iot_co.difference_update(iot_reg) # remove regions in iot locations
iot_co = eco_reg_co_partco.intersection(iot_co) #44 countries in iot are covered by ecoinvent
with open("../../Data/region_matching/iot_co.p", 'wb') as o:
    pickle.dump(iot_co, o)

In [7]:
rest_eco_loc = eco_reg_co_partco.copy()
rest_eco_loc.difference_update(iot_co)
rest_eco_loc #check the rest of locations in ecoinvent

{'AE',
 'AL',
 'AM',
 'AO',
 'AR',
 'AZ',
 'BA',
 'BD',
 'BH',
 'BJ',
 'BN',
 'BO',
 'BR-AC',
 'BR-AL',
 'BR-AM',
 'BR-AP',
 'BR-BA',
 'BR-CE',
 'BR-DF',
 'BR-ES',
 'BR-GO',
 'BR-MA',
 'BR-MG',
 'BR-MS',
 'BR-MT',
 'BR-Mid-western grid',
 'BR-North-eastern grid',
 'BR-Northern grid',
 'BR-PA',
 'BR-PB',
 'BR-PE',
 'BR-PI',
 'BR-PR',
 'BR-RJ',
 'BR-RN',
 'BR-RO',
 'BR-RR',
 'BR-RS',
 'BR-SC',
 'BR-SE',
 'BR-SP',
 'BR-South-eastern grid',
 'BR-Southern grid',
 'BR-TO',
 'BW',
 'BY',
 'CA-AB',
 'CA-BC',
 'CA-MB',
 'CA-NB',
 'CA-NF',
 'CA-NS',
 'CA-NT',
 'CA-NU',
 'CA-ON',
 'CA-PE',
 'CA-QC',
 'CA-SK',
 'CA-YK',
 'CD',
 'CENTREL',
 'CG',
 'CI',
 'CL',
 'CM',
 'CN-AH',
 'CN-BJ',
 'CN-CQ',
 'CN-CSG',
 'CN-FJ',
 'CN-GD',
 'CN-GS',
 'CN-GX',
 'CN-GZ',
 'CN-HA',
 'CN-HB',
 'CN-HE',
 'CN-HL',
 'CN-HN',
 'CN-HU',
 'CN-JL',
 'CN-JS',
 'CN-JX',
 'CN-LN',
 'CN-NM',
 'CN-NX',
 'CN-QH',
 'CN-SA',
 'CN-SC',
 'CN-SD',
 'CN-SGCC',
 'CN-SH',
 'CN-SX',
 'CN-TJ',
 'CN-XJ',
 'CN-XZ',
 'CN-YN',
 'CN-ZJ',
 'CO

In [8]:
partco = {loc for loc in rest_eco_loc if ('-' in loc and 'UN-' not in loc) or (loc=='Canada without Quebec')}
with open("../../Data/region_matching/partco.p", 'wb') as o:
    pickle.dump(partco, o)
partco

{'BR-AC',
 'BR-AL',
 'BR-AM',
 'BR-AP',
 'BR-BA',
 'BR-CE',
 'BR-DF',
 'BR-ES',
 'BR-GO',
 'BR-MA',
 'BR-MG',
 'BR-MS',
 'BR-MT',
 'BR-Mid-western grid',
 'BR-North-eastern grid',
 'BR-Northern grid',
 'BR-PA',
 'BR-PB',
 'BR-PE',
 'BR-PI',
 'BR-PR',
 'BR-RJ',
 'BR-RN',
 'BR-RO',
 'BR-RR',
 'BR-RS',
 'BR-SC',
 'BR-SE',
 'BR-SP',
 'BR-South-eastern grid',
 'BR-Southern grid',
 'BR-TO',
 'CA-AB',
 'CA-BC',
 'CA-MB',
 'CA-NB',
 'CA-NF',
 'CA-NS',
 'CA-NT',
 'CA-NU',
 'CA-ON',
 'CA-PE',
 'CA-QC',
 'CA-SK',
 'CA-YK',
 'CN-AH',
 'CN-BJ',
 'CN-CQ',
 'CN-CSG',
 'CN-FJ',
 'CN-GD',
 'CN-GS',
 'CN-GX',
 'CN-GZ',
 'CN-HA',
 'CN-HB',
 'CN-HE',
 'CN-HL',
 'CN-HN',
 'CN-HU',
 'CN-JL',
 'CN-JS',
 'CN-JX',
 'CN-LN',
 'CN-NM',
 'CN-NX',
 'CN-QH',
 'CN-SA',
 'CN-SC',
 'CN-SD',
 'CN-SGCC',
 'CN-SH',
 'CN-SX',
 'CN-TJ',
 'CN-XJ',
 'CN-XZ',
 'CN-YN',
 'CN-ZJ',
 'Canada without Quebec',
 'IN-AP',
 'IN-AR',
 'IN-AS',
 'IN-BR',
 'IN-CT',
 'IN-DL',
 'IN-Eastern grid',
 'IN-GA',
 'IN-GJ',
 'IN-HP',
 'IN-HR',
 'I

In [9]:
rest_eco_loc.difference_update(partco)
rest_eco_loc#check the rest of locations in ecoinvent

{'AE',
 'AL',
 'AM',
 'AO',
 'AR',
 'AZ',
 'BA',
 'BD',
 'BH',
 'BJ',
 'BN',
 'BO',
 'BW',
 'BY',
 'CD',
 'CENTREL',
 'CG',
 'CI',
 'CL',
 'CM',
 'CO',
 'CR',
 'CU',
 'CW',
 'DO',
 'DZ',
 'EC',
 'EG',
 'ER',
 'ET',
 'Europe without Austria',
 'Europe without Switzerland',
 'Europe without Switzerland and Austria',
 'Europe, without Russia and Turkey',
 'GA',
 'GE',
 'GH',
 'GI',
 'GLO',
 'GT',
 'HK',
 'HN',
 'HT',
 'IAI Area, Africa',
 'IAI Area, Asia, without China and GCC',
 'IAI Area, EU27 & EFTA',
 'IAI Area, Gulf Cooperation Council',
 'IAI Area, North America',
 'IAI Area, Russia & RER w/o EU27 & EFTA',
 'IAI Area, South America',
 'IL',
 'IQ',
 'IR',
 'IS',
 'JM',
 'JO',
 'KE',
 'KG',
 'KH',
 'KP',
 'KW',
 'KZ',
 'LB',
 'LK',
 'LY',
 'MA',
 'MD',
 'ME',
 'MG',
 'MK',
 'MM',
 'MN',
 'MU',
 'MY',
 'MZ',
 'NA',
 'NE',
 'NG',
 'NI',
 'NORDEL',
 'NP',
 'NZ',
 'North America without Quebec',
 'OM',
 'PA',
 'PE',
 'PG',
 'PH',
 'PK',
 'PY',
 'QA',
 'RAF',
 'RER',
 'RER w/o CH+DE',
 'RE

In [10]:
other_co = {loc for loc in rest_eco_loc if len(list(loc))==2}
with open("../../Data/region_matching/other_co.p", 'wb') as o:
    pickle.dump(other_co, o)
other_co

{'AE',
 'AL',
 'AM',
 'AO',
 'AR',
 'AZ',
 'BA',
 'BD',
 'BH',
 'BJ',
 'BN',
 'BO',
 'BW',
 'BY',
 'CD',
 'CG',
 'CI',
 'CL',
 'CM',
 'CO',
 'CR',
 'CU',
 'CW',
 'DO',
 'DZ',
 'EC',
 'EG',
 'ER',
 'ET',
 'GA',
 'GE',
 'GH',
 'GI',
 'GT',
 'HK',
 'HN',
 'HT',
 'IL',
 'IQ',
 'IR',
 'IS',
 'JM',
 'JO',
 'KE',
 'KG',
 'KH',
 'KP',
 'KW',
 'KZ',
 'LB',
 'LK',
 'LY',
 'MA',
 'MD',
 'ME',
 'MG',
 'MK',
 'MM',
 'MN',
 'MU',
 'MY',
 'MZ',
 'NA',
 'NE',
 'NG',
 'NI',
 'NP',
 'NZ',
 'OM',
 'PA',
 'PE',
 'PG',
 'PH',
 'PK',
 'PY',
 'QA',
 'RS',
 'SA',
 'SD',
 'SG',
 'SN',
 'SS',
 'SV',
 'SY',
 'TG',
 'TH',
 'TJ',
 'TM',
 'TN',
 'TT',
 'TZ',
 'UA',
 'UY',
 'UZ',
 'VE',
 'VN',
 'XK',
 'YE',
 'ZM',
 'ZW'}

In [11]:
rest_eco_loc.difference_update(other_co)
rest_eco_loc # the rest of locations are eco_reg

{'CENTREL',
 'Europe without Austria',
 'Europe without Switzerland',
 'Europe without Switzerland and Austria',
 'Europe, without Russia and Turkey',
 'GLO',
 'IAI Area, Africa',
 'IAI Area, Asia, without China and GCC',
 'IAI Area, EU27 & EFTA',
 'IAI Area, Gulf Cooperation Council',
 'IAI Area, North America',
 'IAI Area, Russia & RER w/o EU27 & EFTA',
 'IAI Area, South America',
 'NORDEL',
 'North America without Quebec',
 'RAF',
 'RER',
 'RER w/o CH+DE',
 'RER w/o DE+NL+RU',
 'RER w/o RU',
 'RLA',
 'RME',
 'RNA',
 'RoW',
 'SAS',
 'UCTE',
 'UCTE without Germany',
 'UN-OCEANIA',
 'UN-SEASIA',
 'WECC',
 'WEU'}

In [12]:
# checking whether certain locations are removed by accidently
print(len(iot_co))
print(len(partco))
print(len(other_co))
print(len(rest_eco_loc))
print(len(eco_reg_co_partco))

44
122
100
31
297


###### 2.2  match subnational regions in ecoinvent to countries in exiobase

In [13]:
partco_co = {}
for loc in partco:
    if loc=='Canada without Quebec':
        partco_co[loc] = 'CA'
    else:
        partco_co[loc] = loc[:2]

In [14]:
with open("../../Data/region_matching/partco_to_co.p", 'wb') as o:
    pickle.dump(partco_co, o)

In [15]:
for i in set(partco_co.values()):
    assert i in iot_co, 'check'

###### 2.3 match the country-aggregated regions in ecoinvent to countries in exiobase

In [16]:
eco_geo = pd.read_excel (r'../../Data/ecoinvent_database/eiv3.7_geographies_names_coordinates_shortcuts_overlaps.xlsx',
                         sheet_name='intersections',engine='openpyxl', keep_default_na=False) #local file
eco_geo.head()

Unnamed: 0,geography name,geography shortcut,contained geography name,contained geography shortcut
0,Africa,RAF,Algeria,DZ
1,Africa,RAF,Angola,AO
2,Africa,RAF,Benin,BJ
3,Africa,RAF,Botswana,BW
4,Africa,RAF,Burkina Faso,BF


In [17]:
def get_contained_iot_co(eco_reg, eco_geo): 
    '''
    
    This function is to match the country-aggregated regions in ecoinvent to countries in exiobase 
    through the geography table
    
    
    '''
    
    index_of_all_contained_geo = eco_geo[eco_geo['geography shortcut']==eco_reg].index
    all_contained_geo = set()
    for ind in index_of_all_contained_geo:
        all_contained_geo.add(eco_geo['contained geography shortcut'][ind])
    contained_iot_co = all_contained_geo.intersection(iot_co)
    return contained_iot_co

eco_reg_TO_iot_co = {}
for reg in rest_eco_loc:
    if reg == 'RoW':
        eco_reg_TO_iot_co[reg] = iot_co
    elif reg == 'GLO':
        eco_reg_TO_iot_co[reg] = iot_co
    else:
        eco_reg_TO_iot_co[reg] = get_contained_iot_co(reg, eco_geo)

with open("../../Data/region_matching/eco_reg_TO_iot_co.p", 'wb') as o:
    pickle.dump(eco_reg_TO_iot_co, o)

In [19]:
print(len(eco_reg_TO_iot_co['RoW'])) #check
print(len(eco_reg_TO_iot_co))
eco_reg_TO_iot_co

44
31


{'Europe without Switzerland and Austria': {'BE',
  'BG',
  'CZ',
  'DE',
  'DK',
  'EE',
  'ES',
  'FI',
  'FR',
  'GB',
  'GR',
  'HR',
  'HU',
  'IE',
  'IT',
  'LT',
  'LU',
  'LV',
  'MT',
  'NL',
  'NO',
  'PL',
  'PT',
  'RO',
  'SE',
  'SI',
  'SK'},
 'UN-OCEANIA': {'AU'},
 'GLO': {'AT',
  'AU',
  'BE',
  'BG',
  'BR',
  'CA',
  'CH',
  'CN',
  'CY',
  'CZ',
  'DE',
  'DK',
  'EE',
  'ES',
  'FI',
  'FR',
  'GB',
  'GR',
  'HR',
  'HU',
  'ID',
  'IE',
  'IN',
  'IT',
  'JP',
  'KR',
  'LT',
  'LU',
  'LV',
  'MT',
  'MX',
  'NL',
  'NO',
  'PL',
  'PT',
  'RO',
  'RU',
  'SE',
  'SI',
  'SK',
  'TR',
  'TW',
  'US',
  'ZA'},
 'Europe without Austria': {'BE',
  'BG',
  'CH',
  'CZ',
  'DE',
  'DK',
  'EE',
  'ES',
  'FI',
  'FR',
  'GB',
  'GR',
  'HR',
  'HU',
  'IE',
  'IT',
  'LT',
  'LU',
  'LV',
  'MT',
  'NL',
  'NO',
  'PL',
  'PT',
  'RO',
  'SE',
  'SI',
  'SK'},
 'RNA': {'CA', 'US'},
 'RLA': {'BR', 'MX'},
 'UCTE': {'AT',
  'BE',
  'BG',
  'CH',
  'CZ',
  'DE',
  'ES',

##### 2.4 match the other countries in ecoinvent to one of the 5 RoW regions in exiobase. 

In [21]:
# iot_co, partco can get matching iot_co easily. for other_co, need to get continent and then corresponding iot_reg
country_continent = pd.read_excel (r'../../Data/other_data/country-and-continent-codes-list-added-ME.xlsx',
                                   keep_default_na=False, engine='openpyxl') # prevent NA becomes NaN
country_continent.head()

Unnamed: 0,Continent_Name,Continent_Code,Country_Name,Two_Letter_Country_Code,Three_Letter_Country_Code,Country_Number,Unnamed: 6
0,Asia,AS,"Afghanistan, Islamic Republic of",AF,AFG,4,
1,Europe,EU,"Albania, Republic of",AL,ALB,8,
2,Antarctica,AN,Antarctica (the territory South of 60 deg S),AQ,ATA,10,
3,Africa,AF,"Algeria, People's Democratic Republic of",DZ,DZA,12,
4,Oceania,OC,American Samoa,AS,ASM,16,


In [22]:
def get_matching_iot_reg(one_other_co): 
    '''
    
    This function is to get the matched RoW region in exiobase for each "other country" in ecoinvent
    
    '''
    ind = country_continent[country_continent['Two_Letter_Country_Code']==one_other_co].index # ind is series
    if ind.size == 0:
        matching_iot_reg = 'nan'
    else:
        continent = country_continent['Continent_Name'][ind[0]]
        if continent == 'Middle East':
            matching_iot_reg = 'WM'
        elif continent == 'Africa':
            matching_iot_reg = 'WF'
        elif continent == 'Antarctica' or continent == 'Asia' or continent == 'Oceania' :
            matching_iot_reg = 'WA'
        elif continent == 'Europe':
            matching_iot_reg = 'WE'
        else:
            matching_iot_reg = 'WL'
    return matching_iot_reg
        

In [23]:
final_loc_set_TO_iot_reg_co = {} 
#final_loc_set is the location set of our final LCI database, including all subnational and national regions in ecoinvent.
for loc in iot_co:
    final_loc_set_TO_iot_reg_co[loc] = loc
for loc in {loc for loc in partco if loc != 'Canada without Quebec'}:
    final_loc_set_TO_iot_reg_co[loc] = list(loc)[0]+list(loc)[1]
for loc in {loc for loc in partco if loc == 'Canada without Quebec'}:
    final_loc_set_TO_iot_reg_co[loc] = 'CA'
for loc in other_co:
    final_loc_set_TO_iot_reg_co[loc] = get_matching_iot_reg(loc)
    
final_loc_set_TO_iot_reg_co

{'AU': 'AU',
 'SI': 'SI',
 'KR': 'KR',
 'BE': 'BE',
 'CY': 'CY',
 'NL': 'NL',
 'CA': 'CA',
 'US': 'US',
 'NO': 'NO',
 'HU': 'HU',
 'BG': 'BG',
 'CN': 'CN',
 'ES': 'ES',
 'DK': 'DK',
 'LU': 'LU',
 'JP': 'JP',
 'FR': 'FR',
 'LV': 'LV',
 'BR': 'BR',
 'PL': 'PL',
 'IE': 'IE',
 'ZA': 'ZA',
 'LT': 'LT',
 'DE': 'DE',
 'RO': 'RO',
 'IN': 'IN',
 'FI': 'FI',
 'IT': 'IT',
 'RU': 'RU',
 'ID': 'ID',
 'GR': 'GR',
 'SE': 'SE',
 'HR': 'HR',
 'MX': 'MX',
 'MT': 'MT',
 'CH': 'CH',
 'GB': 'GB',
 'CZ': 'CZ',
 'SK': 'SK',
 'PT': 'PT',
 'EE': 'EE',
 'AT': 'AT',
 'TR': 'TR',
 'TW': 'TW',
 'IN-MP': 'IN',
 'CA-NT': 'CA',
 'BR-PB': 'BR',
 'CA-SK': 'CA',
 'CN-SA': 'CN',
 'BR-SE': 'BR',
 'CA-YK': 'CA',
 'BR-AM': 'BR',
 'IN-WB': 'IN',
 'BR-SP': 'BR',
 'CN-YN': 'CN',
 'IN-OR': 'IN',
 'IN-TN': 'IN',
 'IN-SK': 'IN',
 'BR-SC': 'BR',
 'CN-BJ': 'CN',
 'CA-PE': 'CA',
 'IN-DL': 'IN',
 'IN-GJ': 'IN',
 'BR-AC': 'BR',
 'BR-MS': 'BR',
 'BR-RN': 'BR',
 'IN-NL': 'IN',
 'CN-GX': 'CN',
 'CN-JX': 'CN',
 'CN-HA': 'CN',
 'CN-FJ': 'C

In [24]:
with open("../../Data/region_matching/final_loc_set_TO_iot_reg_co.p", 'wb') as o:
    pickle.dump(final_loc_set_TO_iot_reg_co, o)