# Copyright Netherlands eScience Center <br>
** Function     : Packing netCDF for the vertical profile of fields (pressure level) from ERA-Interim** <br>
** Author       : Yang Liu ** <br>
** First Built  : 2018.10.29 ** <br>
** Last Update  : 2019.01.02** <br>
Description     : This notebook aims to pack the vertical profile of 3D fields from ERA-Interim into netCDF files. It also serves for some calculations, e.g. du/dz (an indicator for the baroclinic instability) and meridional overturning stream function.<br>
For the calculation of stream function <br>
psi = 2 * pi * R * cos(theta) / g * int (v dp) <br>
Return Values   : netCDF4 <br>
Caveat          : <br>

In [1]:
import numpy as np
import scipy as sp
import time as tttt
from netCDF4 import Dataset,num2date
import os

In [2]:
# constants
constant = {'g' : 9.80616,      # gravititional acceleration [m / s2]
            'R' : 6371009,      # radius of the earth [m]
            'cp': 1004.64,      # heat capacity of air [J/(Kg*K)]
            'Lv': 2264670,      # Latent heat of vaporization [J/Kg]
            'R_dry' : 286.9,    # gas constant of dry air [J/(kg*K)]
            'R_vap' : 461.5,    # gas constant for water vapour [J/(kg*K)]
            }

In [12]:
################################   Input zone  ######################################
# specify starting and ending time
start_year = 1979
end_year = 2017
# specify data path
# ERAI 3D fields on pressure level
datapath_3D = '/home/ESLT0068/WorkFlow/Core_Database_AMET_OMET_reanalysis/ERAI/regression/pressure/monthly/3D'
# specify output path for figures
output_path = '/home/ESLT0068/WorkFlow/Core_Database_AMET_OMET_reanalysis/ERAI/regression'
####################################################################################

In [10]:
def var_key_retrieve(datapath, year):
    # get the path to each datasets
    print ("Start retrieving datasets {} (y)".format(year))
    # The shape of each variable is (121,480)
    datapath_full = os.path.join(datapath, 'pressure_monthly_075_{}_3D.nc'.format(year))
    # get the variable keys
    var_key = Dataset(datapath_full)

    print ("Retrieving datasets successfully and return the variable key!")
    return var_key

In [7]:
def create_netcdf_point (pool_t_vert, pool_q_vert, pool_u_vert, pool_v_vert,
                         pool_z_vert, pool_dudz_vert, pool_psi_vert, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    #logging.info("Start creating netcdf file for the 2D fields of ERAI at each grid point.")
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(os.path.join(output_path, 'pressure_erai_monthly_regress_1979_2017_vertProfile_dudz_psi_var3D.nc'),'w',format = 'NETCDF4')
    # create dimensions for netcdf data
    year_wrap_dim = data_wrap.createDimension('year',Dim_year)
    month_wrap_dim = data_wrap.createDimension('month',Dim_month)
    lat_wrap_dim = data_wrap.createDimension('latitude',Dim_latitude)
    lev_wrap_dim = data_wrap.createDimension('level',Dim_level)
    # create coordinate variable
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    month_wrap_var = data_wrap.createVariable('month',np.int32,('month',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float32,('latitude',))
    lev_wrap_var = data_wrap.createVariable('level',np.int32,('level',))
    # create the actual 4d variable
    t_vert_wrap_var = data_wrap.createVariable('t_vert',np.float64,('year', 'month', 'level', 'latitude'),zlib=True)
    q_vert_wrap_var = data_wrap.createVariable('q_vert',np.float64,('year', 'month', 'level', 'latitude'),zlib=True)
    u_vert_wrap_var = data_wrap.createVariable('u_vert',np.float64,('year', 'month', 'level', 'latitude'),zlib=True)
    v_vert_wrap_var = data_wrap.createVariable('v_vert',np.float64,('year', 'month', 'level', 'latitude'),zlib=True)
    z_vert_wrap_var = data_wrap.createVariable('z_vert',np.float64,('year', 'month', 'level', 'latitude'),zlib=True)
    dudz_vert_wrap_var = data_wrap.createVariable('dudz_vert',np.float64,('year', 'month', 'level', 'latitude'),zlib=True)
    psi_vert_wrap_var = data_wrap.createVariable('psi_vert',np.float64,('year', 'month', 'level', 'latitude'),zlib=True)
    # global attributes
    data_wrap.description = 'Monthly mean vertical profile of fields from ERA-Interim on pressure level'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lev_wrap_var.units = 'hPa'

    t_vert_wrap_var.units = 'K'
    q_vert_wrap_var.units = 'kg/kg'
    u_vert_wrap_var.units = 'm/s'
    v_vert_wrap_var.units = 'm/s'
    z_vert_wrap_var.units = 'm2/s2'
    dudz_vert_wrap_var.units = '/s'
    psi_vert_wrap_var.units = 'kg/s'

    t_vert_wrap_var.long_name = 'temperature'
    q_vert_wrap_var.long_name = 'specific humidity'
    u_vert_wrap_var.long_name = 'zonal wind velocity'
    v_vert_wrap_var.long_name = 'meridional wind velocity'
    z_vert_wrap_var.long_name = 'geopotential'
    dudz_vert_wrap_var.long_name = 'zonal wind vertical shear'
    psi_vert_wrap_var.long_name = 'meridional overturning stream function'

    # writing data
    lat_wrap_var[:] = latitude
    lev_wrap_var[:] = level
    month_wrap_var[:] = index_month
    year_wrap_var[:] = period

    t_vert_wrap_var[:] = pool_t_vert
    q_vert_wrap_var[:] = pool_q_vert
    u_vert_wrap_var[:] = pool_u_vert
    v_vert_wrap_var[:] = pool_v_vert
    z_vert_wrap_var[:] = pool_z_vert
    dudz_vert_wrap_var[:] = pool_dudz_vert
    psi_vert_wrap_var[:] = pool_psi_vert

    # close the file
    data_wrap.close()
    print ("The generation of netcdf files for fields on surface is complete!!")

Equation for the stokes stream function<br>
$$\psi(\phi,p)=\frac{2\pi a cos \phi}{g}\int_{0}^{p}{v(\phi,p)}dp$$ <br>
Note that the Hadley circulation, also known as the mean meridional circulation (MMC), is a zonal mean quantity by definition.<br>

In [16]:
def retriver(key):
    print ('Extract monthly mean fields.')
    u = var_key.variables['u'][:]
    v = var_key.variables['v'][:]
    t = var_key.variables['t'][:]
    q = var_key.variables['q'][:]
    gz = var_key.variables['z'][:]
    lev = var_key.variables['level'][:]
    lat = var_key.variables['latitude'][:]
    lon = var_key.variables['longitude'][:]
    # calculate the height
    z = gz / constant['g']
    # calculate the vertical shear
    # create arrays to store the values
    dudz = np.zeros(u.shape,dtype=float)
    for i in np.arange(len(level)-2):
        dudz[:,i+1,:,:] = (u[:,i,:,:] - u[:,i+2,:,:]) / (z[:,i,:,:] - z[:,i+2,:,:])
    # calculate the stokes stream function
    mass_flux = np.zeros(u.shape,dtype=float)
    psi = np.zeros(u.shape,dtype=float)
    dx = 2 * np.pi * constant['R'] * np.cos(2 * np.pi * lat / 360) / len(lon)
    for i in np.arange(len(level)-1):
        for j in np.arange(len(lat)):
            mass_flux[:,i+1,j,:] = dx[j] * (v[:,i+1,j,:] + v[:,i,j,:]) / 2 * (lev[i+1] - lev[i]) * 100 / constant['g']
    for i in np.arange(len(level)-1):
        psi[:,i,:,:] = np.sum(mass_flux[:,0:i+1,:,:],1)
    # take the vertical profile
    t_vert = np.mean(t,3)
    q_vert = np.mean(q,3)
    u_vert = np.mean(u,3)
    v_vert = np.mean(v,3)
    gz_vert = np.mean(gz,3)
    dudz_vert = np.mean(dudz,3)
    psi_vert = np.mean(psi,3) * len(lon) # by definition
    
    return t_vert, q_vert, u_vert, v_vert, gz_vert, dudz_vert, psi_vert

In [17]:
if __name__=="__main__":
    ####################################################################
    ######  Create time namelist matrix for variable extraction  #######
    ####################################################################
    # date and time arrangement
    # namelist of month and days for file manipulation
    namelist_month = ['01','02','03','04','05','06','07','08','09','10','11','12']
    # index of months
    period = np.arange(start_year,end_year+1,1)
    index_month = np.arange(1,13,1)
    ####################################################################
    ######       Extract invariant and calculate constants       #######
    ####################################################################
    # get invariant from benchmark file
    Dim_year = len(period)
    Dim_month = len(index_month)
    Dim_latitude = 121
    Dim_longitude = 480
    Dim_level = 37
    #############################################
    #####   Create space for stroing data   #####
    #############################################
    # data pool
    pool_t = np.zeros((Dim_year, Dim_month, Dim_level, Dim_latitude),dtype = float)
    pool_q = np.zeros((Dim_year, Dim_month, Dim_level, Dim_latitude),dtype = float)
    pool_u = np.zeros((Dim_year, Dim_month, Dim_level, Dim_latitude),dtype = float)
    pool_v = np.zeros((Dim_year, Dim_month, Dim_level, Dim_latitude),dtype = float)
    pool_z = np.zeros((Dim_year, Dim_month, Dim_level, Dim_latitude),dtype = float)
    pool_dudz = np.zeros((Dim_year, Dim_month, Dim_level, Dim_latitude),dtype = float)
    pool_psi = np.zeros((Dim_year, Dim_month, Dim_level, Dim_latitude),dtype = float)
    latitude = np.zeros(Dim_latitude,dtype=float)
    level = np.zeros(Dim_level,dtype=int)
    # loop for calculation
    for i in period:
        # get the key of each variable
        var_key = var_key_retrieve(datapath_3D,i)
        latitude = var_key.variables['latitude'][:]
        level = var_key.variables['level'][:]
        t, q, u, v, z, dudz, psi = retriver(var_key)
        pool_t[i-1979,:,:,:] = t
        pool_q[i-1979,:,:,:] = q
        pool_u[i-1979,:,:,:] = u
        pool_v[i-1979,:,:,:] = v
        pool_z[i-1979,:,:,:] = z
        pool_dudz[i-1979,:,:,:] = dudz
        pool_psi[i-1979,:,:,:] = psi
    ####################################################################
    ######                 Data Wrapping (NetCDF)                #######
    ####################################################################
    create_netcdf_point(pool_t, pool_q, pool_u, pool_v,
                        pool_z, pool_dudz, pool_psi, output_path)
    print ('Packing 3D fields of ERA-Interim on pressure level is complete!!!')
    print ('The output is in sleep, safe and sound!!!')

Start retrieving datasets 1979 (y)
Retrieving datasets successfully and return the variable key!
Extract monthly mean fields.
Start retrieving datasets 1980 (y)
Retrieving datasets successfully and return the variable key!
Extract monthly mean fields.
Start retrieving datasets 1981 (y)
Retrieving datasets successfully and return the variable key!
Extract monthly mean fields.
Start retrieving datasets 1982 (y)
Retrieving datasets successfully and return the variable key!
Extract monthly mean fields.
Start retrieving datasets 1983 (y)
Retrieving datasets successfully and return the variable key!
Extract monthly mean fields.
Start retrieving datasets 1984 (y)
Retrieving datasets successfully and return the variable key!
Extract monthly mean fields.
Start retrieving datasets 1985 (y)
Retrieving datasets successfully and return the variable key!
Extract monthly mean fields.
Start retrieving datasets 1986 (y)
Retrieving datasets successfully and return the variable key!
Extract monthly mean 