# Creating emission masks for the GEOS-Chem model

## Here I show how to use Xarray to create model masks over northern China

In [None]:
# load packages
import os
import sys
import numpy as np
import xarray as xr
import pandas as pd

In [None]:
###########################################################################################################################
# HEMCO can process your masks and adjust it to the model resolutions that you choose
# but we can still prepare our masks at the model resolution directly
# here I use nested GEOS-Chem model over Asia at 05x0625 as the example
###########################################################################################################################
# decide the mask domain over North China Plain (NCP) at 05x0625
# first learn from the example, check grids in the model outputs

# find a sample model output file, and get the lat and lon
os.chdir("/rds/projects/2018/maraisea-glu-01/RDS/GEOSChem/MASK")
sample_data = xr.open_dataset("sample_GEOS-Chem_output_AS_nested.nc4")
print("GEOS-Chem model output lon:",sample_data['lon'],"#"*50,"GEOS-Chem model output lat:",sample_data['lat'],"#"*50,sep="\n")

# list lons and lats for GEOS-Chem grid centres, not the domain boundaries
NCP_lon = np.arange(107.5,120+5/16,5/8) # use (min,max+1/2 resolution,resolution) so that you can keep the maximum
NCP_lat = np.arange(32,43+1/4,1/2)
time = np.array(['2020-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
print("Your lon:",NCP_lon,"#"*50,"Your lat:",NCP_lat,"#"*50,sep="\n")

# you can double check if default GEOS-Chem grids contain all the NCP_lon and NCP_lat you've created
set1 = set(NCP_lon.flatten())
set2 = set(sample_data['lon'].values)
set3 = set(NCP_lat.flatten())
set4 = set(sample_data['lat'].values)
print("Do default GEOS-Chem grids contain all your lon and lat?",set1.issubset(set2),set3.issubset(set4),sep="\n")

In [None]:
###########################################################################################################################
# now make the masks matching the same dimensions

# example 1: emission mask for CO, with the scale factor of 5 or any number you like

# first assign values and convert to xarray data arrays
NCP_mask_CO = np.full((1,len(NCP_lat),len(NCP_lon)), 5)
NCP_mask_CO = xr.DataArray(NCP_mask_CO, coords=[('time',time),('lat', NCP_lat),('lon', NCP_lon)])

# provide details of the xarray data array as these will appear in the output file
NCP_mask_CO.name = "MASK"
NCP_mask_CO['lon'].attrs = {'long_name':'Longitude','units':'degrees_east','axis':'X'}
NCP_mask_CO['lat'].attrs = {'long_name':'Latitude','units':'degrees_north','axis':'Y'}
NCP_mask_CO.attrs = {'long_name':'CO emission mask for North China Plain','units':'unitless'}

# output
NCP_mask_CO.to_netcdf("NCP_CO_mask.nc")

In [None]:
###########################################################################################################################
# To verify your results, you can

# Method 1: build a quick plot function to plot the mask values
%matplotlib inline
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import geopandas as gpd

# read shapefile
os.chdir("/rds/projects/2018/maraisea-glu-01/Study/Research_Data/BTH/domain/gadm36_CHN_shp")
China_provinces = gpd.read_file("gadm36_CHN_1.shp")

# just check the data ranges from the default colorbar scheme
def quick_plot(input_xr):
    t= plt.axes(projection=ccrs.PlateCarree())
    t.set_extent([100, 125, 30, 45], crs=ccrs.PlateCarree()) # [lon,lon,lat,lat]
    t.add_geometries(China_provinces.geometry, crs=ccrs.PlateCarree(),edgecolor='black',facecolor='none')
    input_xr['MASK'].plot(ax=t,cmap='jet')
###########################################################################################################################
# Method 2: view all scale factors in pandas dataframes

# open the mask that you created
NCP_CO_mask  = xr.open_dataset("NCP_CO_mask.nc")
NCP_CO_mask  = NCP_CO_mask['MASK'].to_dataset(name='CO_mask_value')

# convert the xarrays to a single pandas dataframe
def xr_to_df(data):
    data = data.to_dataframe()
    data.reset_index(inplace=True)
    return data

# view in pandas data frames
NCP_CO_mask_df = xr_to_df(NCP_CO_mask)
display(NCP_CO_mask_df)
###########################################################################################################################