# Copyright Netherlands eScience Center <br>
** Function     : Packing netCDF for the surface wind and precipitation fields from MERRA2** <br>
** Author       : Yang Liu ** <br>
** First Built  : 2019.10.03 ** <br>
** Last Update  : 2019.10.03 ** <br>
Description     : This notebook aims to pack the surface wind and precipitation fields from MERRA2.<br>
Return Values   : netCDF4 <br>
Caveat          : The precipitation fields are forecast fields instead of analysis fields. Unlike ERA-Interim, those forecast fields are unpacked by NASA and thus they are not accumulated values. They can be used directly. <br>

However, the unit is kg/m2s. We will change it to m/s.<br>
1 kg of rain water spread over 1 square meter of surface is 1 mm in thickness.<br>
There are 60X60X24=86400 seconds in one day.<br>
Therefore, 1 kg/m2s = 86400 mm/day.<br>
Consequently, from kg/m2s to m/s:<br>
precipitation \* 86400 / (24\*3600) / 1000<br>

In [1]:
import numpy as np
from netCDF4 import Dataset
import os

Initialization - Start with location of input and extraction of variables
Time span of each product:
- **ERA-Interim** 1979 - 2016
- **MERRA2**      1980 - 2016
- **JRA55**       1979 - 2015
- **ORAS4**       1958 - 2017
- **GLORYS2V3**   1993 - 2014
- **SODA3**       1980 - 2015

In [2]:
################################   Input zone  ######################################
# specify starting and ending time
start_year = 1980
end_year = 2017
# specify data path
# MERRA2 2D fields - radiations
datapath_prec = '/home/ESLT0068/WorkFlow/Core_Database_AMET_OMET_reanalysis/MERRA2/regression/PRECPTOT'
# sample
datapath_benchmark = '/home/ESLT0068/WorkFlow/Core_Database_AMET_OMET_reanalysis/MERRA2/regression/PRECPTOT'
benchmark_key = Dataset(os.path.join(datapath_benchmark,"MERRA2_100.tavgM_2d_flx_Nx.198801.nc4.nc"))
# specify output path for figures
output_path = '/home/ESLT0068/WorkFlow/Core_Database_AMET_OMET_reanalysis/MERRA2/regression'
####################################################################################

In [8]:
def var_key_retrieve(datapath_prec, year, month):
    # get the path to each datasets
    print ("Start retrieving datasets {} (y) - {} (m)".format(year,namelist_month[month-1]))
    # The shape of each variable is (361,576)
    # Total precipitation
    if year < 1992:
        datapath_prec = os.path.join(datapath_prec,
                                     'MERRA2_100.tavgM_2d_flx_Nx.{}{}.nc4.nc'.format(year,namelist_month[month-1]))
    elif year < 2001:
        datapath_prec = os.path.join(datapath_prec,
                                     'MERRA2_200.tavgM_2d_flx_Nx.{}{}.nc4.nc'.format(year,namelist_month[month-1]))
    elif year < 2011:
        datapath_prec = os.path.join(datapath_prec,
                                     'MERRA2_300.tavgM_2d_flx_Nx.{}{}.nc4.nc'.format(year,namelist_month[month-1]))
    else:
        datapath_prec = os.path.join(datapath_prec,
                                     'MERRA2_400.tavgM_2d_flx_Nx.{}{}.nc4.nc'.format(year,namelist_month[month-1]))
    # get the variable keys
    var_key_prec = Dataset(datapath_prec)


    print ("Retrieving datasets successfully and return the variable key!")
    return var_key_prec

In [4]:
def create_netcdf_point(pool_prec, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(os.path.join(output_path, 'surface_merra_monthly_regress_1980_2017_prec.nc'),'w',format = 'NETCDF4')
    # create dimensions for netcdf data
    year_wrap_dim = data_wrap.createDimension('year',Dim_year)
    month_wrap_dim = data_wrap.createDimension('month',Dim_month)
    lat_wrap_dim = data_wrap.createDimension('latitude',Dim_latitude)
    lon_wrap_dim = data_wrap.createDimension('longitude',Dim_longitude)
    # create coordinate variables for 3-dimensions
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    month_wrap_var = data_wrap.createVariable('month',np.int32,('month',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float32,('latitude',))
    lon_wrap_var = data_wrap.createVariable('longitude',np.float32,('longitude',))
    # create the actual 3-d variable
    prec_wrap_var = data_wrap.createVariable('prec',np.float64,('year','month','latitude','longitude'),zlib=True)
    # global attributes
    data_wrap.description = 'Monthly mean precipitation from MERRA2 at each grid point'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lon_wrap_var.units = 'degree_east'

    prec_wrap_var.units = 'm/s'

    prec_wrap_var.long_name = 'total precipitation'

    # writing data
    lat_wrap_var[:] = latitude
    lon_wrap_var[:] = longitude
    month_wrap_var[:] = index_month
    year_wrap_var[:] = period

    prec_wrap_var[:] = pool_prec

    # close the file
    data_wrap.close()
    print ("Create netcdf file successfully")

In [9]:
if __name__=="__main__":
    ####################################################################
    ######  Create time namelist matrix for variable extraction  #######
    ####################################################################
    # date and time arrangement
    # namelist of month and days for file manipulation
    namelist_month = ['01','02','03','04','05','06','07','08','09','10','11','12']
    # index of months
    period = np.arange(start_year,end_year+1,1)
    index_month = np.arange(1,13,1)
    latitude = benchmark_key.variables['lat'][:]
    longitude = benchmark_key.variables['lon'][:]
    ####################################################################
    ######       Extract invariant and calculate constants       #######
    ####################################################################
    # get invariant from benchmark file
    Dim_year = len(period)
    Dim_month = len(index_month)
    Dim_latitude = len(latitude)
    Dim_longitude = len(longitude)
    #############################################
    #####   Create space for stroing data   #####
    #############################################
    # data pool for zonal integral
    pool_prec = np.zeros((Dim_year,Dim_month,Dim_latitude,Dim_longitude),dtype = float)
    # loop for calculation
    for i in period:
        for j in index_month:
            # get the key of each variable
            var_key_prec = var_key_retrieve(datapath_prec,i,j)
            # convert from kg/m2s to m/s
            pool_prec[i-1980,j-1,:,:] = var_key_prec.variables['PRECPTOT'][0,:,:]* 86400 / (24*3600) / 1000
    ####################################################################
    ######                 Data Wrapping (NetCDF)                #######
    ####################################################################
    create_netcdf_point(pool_prec, output_path)
    print ('Packing 2D fields of MERRA2 is complete!!!')
    print ('The output is in sleep, safe and sound!!!')

Start retrieving datasets 1980 (y) - 01 (m)
Retrieving datasets successfully and return the variable key!


NameError: name 'var_key_SFlux' is not defined