This notebook reorganises raw edgarv5 for 2015 emissions gridmaps by species. Outputs are nc files for each species 
containing montlhy contributions of all the sectors specified in https://edgar.jrc.ec.europa.eu/overview.php?v=50_AP (total: 27) except from the supersonic aviation sector (no monthly data available).

In [1]:
import xarray as xr
import numpy as np
import os
from glob import glob

## Merge monthly files per species and sector

In [2]:
# define folders
rootdir='../edgarv5_emissions' # dir for specific species.
save_dir = "/geos/d21/s1878599/edgarv5_process/monthly_all_sectors"

if not os.path.isdir(save_dir):
    !mkdir -p $save_dir  # create dir to save new files if it doesn't exist.

In [3]:
# LOOP OVER SPECIES
species_dirs = glob(rootdir+'/*')
for sp_dir in species_dirs:
    sp_name=sp_dir.split('/')[2] # species name from species dir path.
    if sp_name != 'NMVOC4.3.2': # exclude NMVOC from version 4.3.2: these data will be used for NMVOC speciation later.
        print('SPECIES '+ sp_name)

    # LOOP OVER SECTORS DIRS
        sector_dirs = glob(sp_dir+'/*') # sectors subdirs
        exclude_sec =[(sp_dir+'/TNR_Aviation_SPS')] # sectors to exclude from emission count.
        for s in sector_dirs:
            if s not in exclude_sec:  # exclude some subsectors.
                sector=s.split('/')[3] # sector name is in file str.
    
                # LOOP OVER MONTHLY FILES IN EACH SECTOR DIR
                # add time dim and coord (not present) to each monthly file, and merge all months in a single file.
                for f in os.listdir(s):
                    if f != '_readme.html':
                        ds=xr.open_dataset(os.path.join(s,f))
                        month=int(f.split('_')[3]) # month idx is in the file str.
                        ds=ds.assign_coords({'time':month})
                        ds=ds.expand_dims(dim='time')
                        ds=ds.rename_vars({('emi_'+sp_name.lower()):(sector.replace("_", "-"))})
                        ds.to_netcdf('time_'+f,format='NETCDF3_64BIT') # temporary monthly files.
       
        #For a given species, put all sectors in a single file.     
        m_ds=xr.open_mfdataset('*.nc', combine='by_coords')
        m_ds.to_netcdf(save_dir +'/monthly_v50_2015_'+ sp_name +'_.0.1x0.1.nc') 
      
        ! rm *nc # remove temporary monthly files.


SPECIES CO
SPECIES PM2.5
SPECIES NH3
SPECIES PM10
SPECIES NMVOC
SPECIES OC
SPECIES SO2
SPECIES BC
SPECIES NOx


## Check

In [5]:
pm10=xr.open_dataset(save_dir +'/monthly_v50_2015_OC_.0.1x0.1.nc')

In [6]:
pm10