This notebooks maps EDGARv4.3.2 speciated VOCs emissions to MOZART emissions using the mapping in the CEDS_MOZART_VOCmap.xlsx

In [1]:
import xarray as xr
import numpy as np
import os
import pandas as pd

In [2]:
#data dir paths.
voc_dir='/geos/d21/s1878599/edgarv5_process/monthly_nmvocs4.3.2/'
save_dir='/geos/d21/s1878599/edgarv5_process/monthly_nmvocs4.3.2_mass_MOZART/'
#create save directory if missing.
if not os.path.isdir(save_dir):
    !mkdir -p $save_dir

## Read VOCs mapping table

In [3]:
def read_voc_spec(file_pth):
    '''Read excel voc speciation map provided in an excel sheet_file 'mapping_mol'. 
       The map table has columns: out_mechanism; names of voc species.
       
       file_pth: excel input file.
       output: voc list and dictionary with the mapping.
    '''
    f=pd.read_excel(file_pth,sheet_name='mapping_mol')
    return f

In [4]:
m=read_voc_spec('CEDS_MOZART_VOCmap.xlsx')

In [5]:
m

Unnamed: 0,species,mol_weight,C2H6,C3H8,BIGALK,C2H4,C3H6,C2H2,BIGENE,BENZENE,TOLUENE,XYLENES,CH2O,CH3CHO,CH3OH,C2H5OH,CH3COCH3,MEK,HCOOH,CH3COOH
0,voc1,46.2,0,0,0,0,0,0,0,0,0,0,0,0,0.15,0.85,0.0,0.0,0.0,0.0
1,voc2,30.0,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,voc3,44.0,0,1,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,voc4,57.8,0,0,1,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
4,voc5,72.0,0,0,1,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
5,voc6,106.8,0,0,1,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
6,voc7,28.0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
7,voc8,42.0,0,0,0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
8,voc9,26.0,0,0,0,0,0,1,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
9,voc10,,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


## From molar to mass fractional contribution

Create new table with mass fractional contribution for each VOC. This is because VOCs emissions data are expressed in mass units.

In [6]:
ms=m.copy().fillna(0)  #copy mapping and fill nan.
for c in ms.columns:
    if c!='mol_weight' and c!='species':
        tot_mass=(ms['mol_weight']*ms[c]).sum()
        ms[c]=ms[c]*ms['mol_weight']/tot_mass
        
        #check mass fractional contribution sums to 1 for each species.
        if ms[c].sum() != 1.0:
            print('sum is not 1: ' + str(ms[c].sum()))

In [7]:
ms

Unnamed: 0,species,mol_weight,C2H6,C3H8,BIGALK,C2H4,C3H6,C2H2,BIGENE,BENZENE,TOLUENE,XYLENES,CH2O,CH3CHO,CH3OH,C2H5OH,CH3COCH3,MEK,HCOOH,CH3COOH
0,voc1,46.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
1,voc2,30.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,voc3,44.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,voc4,57.8,0.0,0.0,0.136708,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,voc5,72.0,0.0,0.0,0.170293,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,voc6,106.8,0.0,0.0,0.252602,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,voc7,28.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,voc8,42.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,voc9,26.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,voc10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Get VOCs .nc files

In [8]:
#get all vocs .nc files as ordered dictionary of xarray datasets. Easier to perform operations onto.
def get_vocs_arr(voc_dir):
    import collections
    '''
    put all vocs files as ordered dictionary of xarray datasets.
    voc_dir: path to where vocs files are.
    output: lsit of dataset with individual vocs.
    '''
    vocs={}
    for f in os.listdir(voc_dir):
        vname=f.split('_')[3]
        vocs.update({vname:xr.open_dataset(voc_dir+f)})  
    return vocs

In [9]:
vocs=get_vocs_arr(voc_dir)

In [10]:
vocs.keys()

dict_keys(['voc8', 'voc24', 'voc6', 'voc2', 'voc16', 'voc23', 'voc21', 'voc13', 'voc11', 'voc3', 'voc1', 'voc10', 'voc22', 'voc20', 'voc5', 'voc7', 'voc15', 'voc17', 'voc25', 'voc18', 'voc14', 'voc4', 'voc12', 'voc19', 'voc9'])

## Speciate and save new files

For each  species in the new mechanism multiply each VOC dataset in the old mechanism by the correspondend mass fractional weight given by the mapping and sum all the contributions.

In [11]:
for c in ms.columns:
    if c!='mol_weight' and c!='species':
        print(c)
        # create a dict with fractional contributions for the voc c.
        weights= pd.Series(ms[c].values,index=ms.species).to_dict()
        #sum vocs contribution based on their mass weight specified in the mapping.
        ds=sum(weights[k]*vocs[k] for k in vocs.keys() if weights[k]!=0.0) 
        ds.attrs['title']='Monthly emissions of ' + c
        ds.to_netcdf(save_dir+'monthly_v432_2010_'+ c + '_.0.1x0.1.nc',format='NETCDF3_64BIT') #save new file.

C2H6
C3H8
BIGALK
C2H4
C3H6
C2H2
BIGENE
BENZENE
TOLUENE
XYLENES
CH2O
CH3CHO
CH3OH
C2H5OH
CH3COCH3
MEK
HCOOH
CH3COOH


## Check mass conservation

In [13]:
big=xr.open_dataset(save_dir+'monthly_v432_2010_BIGALK_.0.1x0.1.nc')

In [14]:
#manual calc example
big_man=0.136708*vocs['voc4']+0.170293*vocs['voc5']+ 0.252602*vocs['voc6']+0.247635*vocs['voc18']+0.192763*vocs['voc19']

In [15]:
#test
xr.testing.assert_allclose(big, big_man)

In [16]:
big