## Comment

## Import modules

In [1]:
#import modules
import xarray as xr
import numpy as np
import os

## Calculate contribution of each VOC in the total emissions

In [3]:
# Set paths contribution of each VOC in the total emissions.
voc_dir = '/geos/d21/s1878599/EMISSIONS_WRF/anthro_data/EDGAR-HTAP/MOZART_MOSAIC/ALL_2010/NMVOC/' # foilder with tot NMVOCs.
ds_NMVOC = xr.open_dataset(voc_dir +'EDGAR_HTAP_emi_NMVOC_2010.0.1x0.1.nc') # total NMVOC.

In [4]:
# Looping over VOC species and calculate the relative contribution to NMVOC.

sum_coeff = 0

for file in os.listdir(voc_dir):
    
    if file.startswith('EDGAR'):
    
        ds_voc = xr.open_dataset(voc_dir + file) # open VOC file.
        coeff = (ds_voc['emis_tot']/ds_NMVOC['emis_tot']).fillna(value=0)  #calculate relative contribution. Fill nan (division by 0) with 0.
        print('Processing ' + file)
        voc_name = file.split('_')[3] # get VOC name.
        path = voc_dir + voc_name + '_percent.nc'
        coeff.to_netcdf(path)   # Save to netCDF file.
        sum_coeff = sum_coeff + coeff  # for checking sum all coefficient is one.

sum_coeff.to_netcdf(voc_dir + 'sum_all_percent.nc')

Processing EDGAR_HTAP_emi_CH3OH_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_CH3COCH3_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_C3H6_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_TOLUENE_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_BIGENE_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_C3H8_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_BIGALK_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_C2H4_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_CH3CHO_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_C2H5OH_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_MEK_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_NMVOC_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_XYLENE_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_BENZENE_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_C2H6_2010.0.1x0.1.nc
Processing EDGAR_HTAP_emi_CH2O_2010.0.1x0.1.nc


## Making emissions files of Transport from EDGAR compatible with WRF-Chem preprocess tools

In [5]:
# Import NMVOC transport data
trans_path = '/geos/d21/s1878599/EMISSIONS_WRF/anthro_data/EDGAR-HTAP/MOZART_MOSAIC/TRANSPORT_2010/'
trans_voc_name = 'edgar_HTAP_NMVOC_emi_TRANSPORT_2010_'   # specific monthly file name for each voc.

In [6]:
# create a list with montlhy data with extended time dimension (not present in originl files).

month_list =[0]*12  # create list where to store monthly data.

for file in os.listdir(trans_path):
    
    if file.startswith(trans_voc_name):   # get only the species you want.
    
        month = file.split('_')[6].split('.')[0]   # get month from string.
        ds_voc_month = xr.open_dataset(trans_path + file)  # open VOC file.
        expanded = ds_voc_month.expand_dims('time') # espand dimension over time.
        month_list[int(month)-1] = expanded   #add to list (ordered by month).
               

In [7]:
#concatenate list elements in a single dataset.
annual_emiss = month_list[0] 
for i in range(len(month_list)-1):
    annual_emiss = xr.concat([annual_emiss, month_list[i+1]], 'time')                 

In [8]:
# Add date and datesec varaibles to nc files (needed for preprocessing in WRF-Chem anthro_emiss).

date = np.array([20100101,20100201,20100301,20100401,20100501,20100601,20100701,20100801,20100101,20100901,20101001,20101201]).astype(np.int32)
datesec = np.zeros([(12)]).astype(np.int32)
annual_emiss['datesec'] = xr.DataArray(datesec, dims=['time'])
annual_emiss['date'] = xr.DataArray(date, dims=['time'])
annual_emiss.attrs['title'] = 'Monthly Mean Emissions of NMVOC'

In [9]:
#save to netcdf file
annual_emiss.to_netcdf(trans_path + 'edgar_HTAP_NMVOC_emi_TRANSPORT_2010_all.0.1x0.1.nc')

In [13]:
# Speciate NMVOC transport emissions.

trans_NMVOC = trans_path + 'edgar_HTAP_NMVOC_emi_TRANSPORT_2010_all.0.1x0.1.nc'
tr_ds_NMVOC = xr.open_dataset(trans_NMVOC)
spec_path = trans_path + 'NMVOCspec/'

date = np.array([20100101,20100201,20100301,20100401,20100501,20100601,20100701,20100801,20100101,20100901,20101001,20101201]).astype(np.int32)
datesec = np.zeros([(12)]).astype(np.int32)

sum_coeff = 0

for file in os.listdir(voc_dir):  # loop over the speciation coefficient files.
    
    if file.endswith('percent.nc') and file != 'NMVOC_percent.nc': # get only the species you want without the total NMOVC.
        ts_voc = xr.open_dataset(voc_dir + file)  # open VOC file.
        voc_name = file.split('_')[0]
        ts_voc['emis_tot'].values = tr_ds_NMVOC.emi_nmvoc.values * ts_voc.emis_tot.values   
        ts_voc['datesec'] = xr.DataArray(datesec, dims=['time'])    
        ts_voc['date'] = xr.DataArray(date, dims=['time'])
        ts_voc.attrs['title'] = 'Monthly Mean Emissions of TRANSPORT ' + voc_name 
        ts_voc.to_netcdf(spec_path + 'edgar_HTAP_' + voc_name + '_emi_TRANSPORT_2010.0.1x0.1.nc')  #save to netcdf file
        
        sum_coeff = sum_coeff + ts_voc  # for checking sum all contribution is the same.

sum_coeff.to_netcdf(spec_path + 'NMVOC_sum.nc')

In [21]:
# Repeat for all other species (BC, OC, CO, NOx, SO2, NH3, PM10, PM2.5). 
#! CHANGE 'name' variable accordingly.

# create a list with montlhy data with extended time dimension.

name = 'PM2.5'
trans_voc_name = 'edgar_HTAP_'+ name +'_emi_TRANSPORT_2010_'
month_list =[0]*12  # create list where to store monthly data.

for file in os.listdir(trans_path):
    
    if file.startswith(trans_voc_name):   # get only the species you want.
    
        month = file.split('_')[6].split('.')[0]   # get month from string.
        ds_voc_month = xr.open_dataset(trans_path + file)  # open VOC file.
        expanded = ds_voc_month.expand_dims('time') # espand dimension over time.
        month_list[int(month)-1] = expanded   #add to list (ordered by month) .
               

#concatenate in a single dataset.
annual_emiss = month_list[0] 
for i in range(len(month_list)-1):
    annual_emiss = xr.concat([annual_emiss, month_list[i+1]], 'time')      
    

# Add date and datesec varaibles to nc files (needed for preprocessing in WRF-Chem anthro_emiss).

date = np.array([20100101,20100201,20100301,20100401,20100501,20100601,20100701,20100801,20100101,20100901,20101001,20101201]).astype(np.int32)
datesec = np.zeros([(12)]).astype(np.int32)
annual_emiss['datesec'] = xr.DataArray(datesec, dims=['time'])
annual_emiss['date'] = xr.DataArray(date, dims=['time'])
annual_emiss.attrs['title'] = 'Monthly Mean Emissions of ' + name
annual_emiss = annual_emiss.rename_vars({'emi_' + name.lower() :'emis_tot'})

#save to netcdf file
annual_emiss.to_netcdf(trans_path + 'edgar_HTAP_' + name +'_emi_TRANSPORT_2010_allmonths.0.1x0.1.nc')

## Calculate difference from  TOT_EMIS - TRANSPORT EMISSIONS

In [30]:
emi_path = '/geos/d21/s1878599/EMISSIONS_WRF/anthro_data/EDGAR-HTAP/MOZART_MOSAIC/'

for file in os.listdir(emi_path + 'ALL_2010/'):
    
    if file.startswith('EDGAR_HTAP'):
        spec_name = file.split('_')[3]  # get species name from string.
        print(spec_name)
        ds_tot = xr.open_dataset(emi_path +'ALL_2010/' + file)  # open tot emi file
        
        if os.path.isfile((emi_path + 'TRANSPORT_2010/' + 'edgar_HTAP_' + spec_name + '_emi_TRANSPORT_2010_allmonths.0.1x0.1.nc')) and spec_name != 'NMVOC':
            ds_trans = xr.open_dataset(emi_path + 'TRANSPORT_2010/' + 'edgar_HTAP_' + spec_name + '_emi_TRANSPORT_2010_allmonths.0.1x0.1.nc') 
            ds_tot['emis_tot'] =  ds_tot['emis_tot'] - ds_trans['emis_tot']

        elif spec_name != 'NMVOC':
            ds_trans = xr.open_dataset(emi_path + 'TRANSPORT_2010/NMVOCspec/' + 'edgar_HTAP_' + spec_name + '_emi_TRANSPORT_2010.0.1x0.1.nc') 
            ds_tot['emis_tot'] =  ds_tot['emis_tot'] - ds_trans['emis_tot']

        
        ds_tot.to_netcdf('/geos/d21/s1878599/EMISSIONS_WRF/anthro_data/EDGAR-HTAP/MOZART_MOSAIC/ALL_NO_TRANSPORT_2010/' + 'EDGAR_HTAP_emi_' + spec_name
        + '_2010.0.1x0.1.nc', format = 'NETCDF3_64BIT')

CH3OH
SO2
CH3COCH3
C3H6
TOLUENE
BIGENE
C3H8
BC
BIGALK
NH3
NOx
C2H4
CH3CHO
OC
C2H5OH
MEK
NMVOC
XYLENE
PM10
BENZENE
C2H6
PM2.5
CO
CH2O
