There is no 1:1 correspondence of NMOVCs sectors between Edgar v5.0 and Edgarv4.3.2. This notebook maps the edgar v5.0 NMVOCs to match edgar v4.3.2 sectors, so it will be possible to perform speciation by consitent sector types.
The mapping is provided in the 'edgarv5_NMVOC_map_sectors.xlsx' file.

All edgar v5.0 NMVOCs sectors are mapped, except manure managemnet (ippc2006:3A2,ippc1996:4B, MNM) that is no correspondend in speciated edgar v4.3.2 NMVOCs.

In [1]:
import xarray as xr
import os
import numpy as np
import pandas as pd

In [2]:
# original edgarv5 nmvoc
ed_pth = "/geos/d21/s1878599/edgarv5_process/monthly_all_sectors/monthly_v50_2015_NMVOC_.0.1x0.1.nc" # path of edgar v5 NMOVCs.
save_dir='/geos/d21/s1878599/edgarv5_process/'   # where to save the modified sectors NMVOCs.
if not os.path.isdir(save_dir):
    !mkdir -p $save_dir

In [3]:
t=xr.open_dataset(ed_pth)

In [4]:
t

## Read the mapping file

In [6]:
def read_sector_map(file_pth):
    '''Read excel sectors map provided in an excel sheet_file 'sectors_map'. 
       The map table has columns: edgarv5;edgarv4.3.2. Sum of codes are separated by '+' .
       
       file_pth: excel input file.
       output: dictionary with the mapping.
    '''
    f=pd.read_excel(file_pth,sheet_name='sectors_map')
    spc_map= dict(zip( f['edgarv4.3.2'],f['edgarv5']))
    for k,v in spc_map.copy().items():
        # format in the right way for other functions.
        spc_map[k]= [x.strip() for x in v.replace("_", "-").split('+')] 
        if '_' in k:
            spc_map[(k.replace("_", "-"))] = spc_map.pop(k)
    return spc_map

In [7]:
map_pth='edgarv5_NMVOC_map_sectors.xlsx' 

In [8]:
spc_map=read_sector_map(map_pth)

In [9]:
spc_map

{'AWB': ['AWB'],
 'ENE': ['ENE'],
 'IND': ['IND'],
 'PPA': ['CHE', 'FOO-PAP', 'IRO', 'PRU-SOL', 'NMM'],
 'PRO': ['PRO'],
 'RCO': ['RCO'],
 'SWD': ['SWD-INC', 'SWD-LDF', 'WWT'],
 'TNR-Aviation-CDS': ['TNR-Aviation-CDS'],
 'TNR-Aviation-CRS': ['TNR-Aviation-CRS'],
 'TNR-Aviation-LTO': ['TNR-Aviation-LTO'],
 'TNR-Other': ['TNR-Other'],
 'TNR-Ship': ['TNR-Ship'],
 'REF-TRF': ['REF', 'TRF'],
 'TRO': ['TRO-noRES'],
 'FFF': ['FFF']}

## Map

In [10]:
def map_sectors(sec_map,ds_pth):
    '''Lumps EDGARv5 NMVOC sectors according to the sector mapping. 
       ds_pth: single species nc file path.
       sec_map: dictioanary of mapping. 
       output: single species emissions file with lumped sectors (format=.nc).
    '''
    ds=xr.open_dataset(ds_pth)
    ds_msec=xr.Dataset(coords=ds.coords, attrs=ds.attrs) # create dataset for macro sectors.
    v_attrs= {'long_name': 'Emissions - ','units': 'kg m-2 s-1', 
    'comment': ' (see http://edgar.jrc.ec.europa.eu/methodology.php#12sou for the definitions of the single sources)'} # single vars attrs.

    for k,v in sec_map.items():
        ds_msec=ds_msec.assign({k: xr.zeros_like(ds['ENE'])})  # create empty variables for each macrosector.
        for var in ds.data_vars:  # add existent subsector in the list to macrosector variable.
            if var in v:
                ds_msec[k]=ds_msec[k]+ds[var]
        ds_msec[k].attrs= v_attrs # add attributes to variables.
    
    return ds_msec   

In [11]:
#lump sectors for NMVOC each species.
ds_new=map_sectors(spc_map,ed_pth)

# save file.
ds_new.to_netcdf(save_dir+'/'+'mapped_monthly_v50_2015_NMVOC_.0.1x0.1.nc',format='NETCDF3_64BIT') 
    

In [12]:
ds_new

## Check

In [14]:
ds=xr.open_dataset(save_dir+'/mapped_monthly_v50_2015_NMVOC_.0.1x0.1.nc')

In [15]:
ds

In [16]:
# check manually
t['SWD']=t['SWD-INC']+t['SWD-LDF']+t['WWT']
t['PPA']=t['CHE']+t['FOO-PAP']+t['IRO']+t['PRU-SOL']+t['NMM']

In [17]:
xr.testing.assert_allclose(ds['SWD'],t['SWD'])

In [18]:
xr.testing.assert_allclose(ds['PPA'],t['PPA'])

In [19]:
xr.testing.assert_allclose(ds['TRO'],t['TRO-noRES'])