# Copyright Netherlands eScience Center <br>
** Function     : Packing netCDF for the precipitation fields from ERA-Interim** <br>
** Author       : Yang Liu ** <br>
** First Built  : 2019.04.12 ** <br>
** Last Update  : 2019.04.12 ** <br>
Description     : This notebook aims to pack the precipitation fields from ERA-Interim.<br>
Return Values   : netCDF4 <br>
Caveat          : The precipitation fields are forecast fields instead of analysis fields. It is accumulated from a certain forecast time and thus for the values at certain time we should subtract the accumulated values. The prediction is 12 hrs. The starting time for each prediction as well as the predicted time are given below: <br>
00:00 : 3:00 + 6:00 + 9:00 + 12:00 <br>
12:00 : 3:00 + 6:00 + 9:00 + 12:00 <br>

In [1]:
import numpy as np
import scipy as sp
import time as tttt
from netCDF4 import Dataset,num2date
import os

In [7]:
################################   Input zone  ######################################
# specify starting and ending time
start_year = 1979
end_year = 2017
# specify data path
# ERAI 3D fields on pressure level
datapath_radiation = '/home/ESLT0068/WorkFlow/Core_Database_AMET_OMET_reanalysis/ERAI/regression/surface/precipitation'
# specify output path for figures
output_path = '/home/ESLT0068/WorkFlow/Core_Database_AMET_OMET_reanalysis/ERAI/regression'
####################################################################################

In [3]:
def var_key_retrieve(datapath, year):
    # get the path to each datasets
    print ("Start retrieving datasets %d (y)" % (year))
    # The shape of each variable is (241,480)
    datapath_full = os.path.join(datapath, 'surface_erai_monthly_075_%d_precipitation.nc' % (year))
    # get the variable keys
    var_key = Dataset(datapath_full)

    print ("Retrieving datasets successfully and return the variable key!")
    return var_key

In [5]:
def create_netcdf_point (pool_tp, pool_e, pool_cp, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    #logging.info("Start creating netcdf file for the 2D fields of ERAI at each grid point.")
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(os.path.join(output_path, 'surface_erai_monthly_regress_1979_2017_precipitation.nc'),'w',format = 'NETCDF4')
    # create dimensions for netcdf data
    year_wrap_dim = data_wrap.createDimension('year',Dim_year)
    month_wrap_dim = data_wrap.createDimension('month',Dim_month)
    lat_wrap_dim = data_wrap.createDimension('latitude',Dim_latitude)
    lon_wrap_dim = data_wrap.createDimension('longitude',Dim_longitude)
    # create coordinate variable
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    month_wrap_var = data_wrap.createVariable('month',np.int32,('month',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float32,('latitude',))
    lon_wrap_var = data_wrap.createVariable('longitude',np.float32,('longitude',))
    # create the actual 4d variable
    tp_wrap_var = data_wrap.createVariable('tp',np.float64,('year','month','latitude','longitude'),zlib=True)
    e_wrap_var = data_wrap.createVariable('e',np.float64,('year','month','latitude','longitude'),zlib=True)
    cp_wrap_var = data_wrap.createVariable('cp',np.float64,('year','month','latitude','longitude'),zlib=True) 
    # global attributes
    data_wrap.description = 'Monthly mean 2D fields of ERA-Interim on surface level'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lon_wrap_var.units = 'degree_east'

    tp_wrap_var.units = 'm'
    e_wrap_var.units = 'm'
    cp_wrap_var.units = 'm'

    tp_wrap_var.long_name = 'total precipitation'
    e_wrap_var.long_name = 'evaporation'
    cp_wrap_var.long_name = 'convective precipitation'
    
    # writing data
    lat_wrap_var[:] = latitude
    lon_wrap_var[:] = longitude
    month_wrap_var[:] = index_month
    year_wrap_var[:] = period

    tp_wrap_var[:] = pool_tp
    e_wrap_var[:] = pool_e
    cp_wrap_var[:] = pool_cp

    # close the file
    data_wrap.close()
    print ("The generation of netcdf files for fields on surface is complete!!")

In [4]:
def retriver(key):
    print ('Extract synoptic mean fields.')
    tp_accumulate = var_key.variables['tp'][:]
    e_accumulate = var_key.variables['e'][:]
    cp_accumulate = var_key.variables['cp'][:]
    # create arrays to store the values after removing accumulation
    tp_synoptic = np.zeros(tp_accumulate.shape)
    e_synoptic = np.zeros(e_accumulate.shape)
    cp_synoptic = np.zeros(cp_accumulate.shape)
    # remove the accumulation and take the monthly mean
    tp_synoptic[0::4,:,:] = tp_accumulate[0::4,:,:]
    e_synoptic[0::4,:,:] = e_accumulate[0::4,:,:]
    cp_synoptic[0::4,:,:] = cp_accumulate[0::4,:,:]

    for i in np.arange(3):
        tp_synoptic[i+1::4,:,:] = tp_accumulate[i+1::4,:,:] - tp_accumulate[i::4,:,:]
        e_synoptic[i+1::4,:,:] = e_accumulate[i+1::4,:,:] - e_accumulate[i::4,:,:]
        cp_synoptic[i+1::4,:,:] = cp_accumulate[i+1::4,:,:] - cp_accumulate[i::4,:,:]

    # create the arrays for monthly mean
    lat = var_key.variables['latitude'][:]
    lon = var_key.variables['longitude'][:]
    tp_monthly = np.zeros((12, len(lat), len(lon)),dtype=float)
    e_monthly = np.zeros((12, len(lat), len(lon)),dtype=float)
    cp_monthly = np.zeros((12, len(lat), len(lon)),dtype=float)
    # take the mean per month and change the unit to W/m2
    for i in np.arange(12):
        tp_monthly[i,:,:] = np.mean(tp_synoptic[i*8:i*8+8,:,:], 0) / (3 * 3600)
        e_monthly[i,:,:] = np.mean(e_synoptic[i*8:i*8+8,:,:], 0) / (3 * 3600)
        cp_monthly[i,:,:] = np.mean(cp_synoptic[i*8:i*8+8,:,:], 0) / (3 * 3600)
    return tp_monthly, e_monthly, cp_monthly

In [8]:
if __name__=="__main__":
    ####################################################################
    ######  Create time namelist matrix for variable extraction  #######
    ####################################################################
    # date and time arrangement
    # namelist of month and days for file manipulation
    namelist_month = ['01','02','03','04','05','06','07','08','09','10','11','12']
    # index of months
    period = np.arange(start_year,end_year+1,1)
    index_month = np.arange(1,13,1)
    ####################################################################
    ######       Extract invariant and calculate constants       #######
    ####################################################################
    # get invariant from benchmark file
    Dim_year = len(period)
    Dim_month = len(index_month)
    Dim_latitude = 241
    Dim_longitude = 480
    #############################################
    #####   Create space for stroing data   #####
    #############################################
    # data pool
    pool_tp = np.zeros((Dim_year,Dim_month,Dim_latitude,Dim_longitude),dtype = float)
    pool_e = np.zeros((Dim_year,Dim_month,Dim_latitude,Dim_longitude),dtype = float)
    pool_cp = np.zeros((Dim_year,Dim_month,Dim_latitude,Dim_longitude),dtype = float)
    latitude = np.zeros(Dim_latitude,dtype=float)
    longitude = np.zeros(Dim_longitude,dtype=float)
    # loop for calculation
    for i in period:
        # get the key of each variable
        var_key = var_key_retrieve(datapath_radiation,i)
        latitude = var_key.variables['latitude'][:]
        longitude = var_key.variables['longitude'][:]
        tp_monthly, e_monthly, cp_monthly = retriver(var_key)
        pool_tp[i-1979,:,:,:] = tp_monthly
        pool_e[i-1979,:,:,:] = e_monthly
        pool_cp[i-1979,:,:,:] = cp_monthly
    ####################################################################
    ######                 Data Wrapping (NetCDF)                #######
    ####################################################################
    create_netcdf_point(pool_tp, pool_e, pool_cp, output_path)
    print ('Packing 2D fields of ERA-Interim on surface level is complete!!!')
    print ('The output is in sleep, safe and sound!!!')

Start retrieving datasets 1979 (y)
Retrieving datasets successfully and return the variable key!
Extract synoptic mean fields.
Start retrieving datasets 1980 (y)
Retrieving datasets successfully and return the variable key!
Extract synoptic mean fields.
Start retrieving datasets 1981 (y)
Retrieving datasets successfully and return the variable key!
Extract synoptic mean fields.
Start retrieving datasets 1982 (y)
Retrieving datasets successfully and return the variable key!
Extract synoptic mean fields.
Start retrieving datasets 1983 (y)
Retrieving datasets successfully and return the variable key!
Extract synoptic mean fields.
Start retrieving datasets 1984 (y)
Retrieving datasets successfully and return the variable key!
Extract synoptic mean fields.
Start retrieving datasets 1985 (y)
Retrieving datasets successfully and return the variable key!
Extract synoptic mean fields.
Start retrieving datasets 1986 (y)
Retrieving datasets successfully and return the variable key!
Extract synopt