# Copyright Netherlands eScience Center <br>
** Function     : Compute monthly mean variance of geopotential at 500hPa (ERAI)** <br>
** Author       : Yang Liu ** <br>
** First Built  : 2018.10.11 ** <br>
** Last Update  : 2018.10.11 ** <br>
Description  : This script serves to calculate the monthly mean variance of the geopotential at 500hPa based on subdaily data. The variance of Z500 is a good indicator of the behavior of cyclones. This can provide extra information about the role of transient eddy when we investigate the meridional energy transport.<br>

Return Values : jpeg <br>
Caveat!         :  <br>

In [1]:
%matplotlib inline

import sys
import numpy as np
import scipy
from scipy import stats
from netCDF4 import Dataset
import os

Initialization - Start with location of input and extraction of variables
Time span of each product:
- **ERA-Interim** 1979 - 2016
- **MERRA2**      1980 - 2016
- **JRA55**       1979 - 2015
- **ORAS4**       1958 - 2017
- **GLORYS2V3**   1993 - 2014
- **SODA3**       1980 - 2015

In [2]:
################################   Input zone  ######################################
# specify starting and ending time
start_year = 1979
end_year = 2017
# specify data path
# ERAI 3D fields on pressure level
datapath = '/home/yang/workbench/Core_Database_AMET_OMET_reanalysis/ERAI/regression/pressure/daily'
# specify output path for figures
output_path = '/home/yang/workbench/Core_Database_AMET_OMET_reanalysis/ERAI/regression'
####################################################################################

In [3]:
def var_key_retrieve(datapath, year, month):
    # get the path to each datasets
    print ("Start retrieving datasets {} (y) {} (m)" % (year,month))
    # The shape of each variable is (241,480)
    datapath = os.path.join(datapath, 'era{}'.format(year),
                            'pressure_daily_075_diagnostic_{}_{}_z.nc'.format(year,month))
    # get the variable keys
    var_key = Dataset(datapath)

    print ("Retrieving datasets successfully and return the variable key!")
    return var_key

In [4]:
def create_netcdf_point (pool_z500var, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    #logging.info("Start creating netcdf file for the 2D fields of ERAI at each grid point.")
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(os.path.join(output_path, 'pressure_erai_subdaily_regress_1979_2017_z500var.nc'),'w',format = 'NETCDF4')
    # create dimensions for netcdf data
    year_wrap_dim = data_wrap.createDimension('year',Dim_year)
    month_wrap_dim = data_wrap.createDimension('month',Dim_month)
    lat_wrap_dim = data_wrap.createDimension('latitude',Dim_latitude)
    lon_wrap_dim = data_wrap.createDimension('longitude',Dim_longitude)
    # create coordinate variable
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    month_wrap_var = data_wrap.createVariable('month',np.int32,('month',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float32,('latitude',))
    lon_wrap_var = data_wrap.createVariable('longitude',np.float32,('longitude',))
    # create the actual 4d variable
    z500var_wrap_var = data_wrap.createVariable('z500var',np.float64,('year','month','latitude','longitude'),zlib=True) 
    # global attributes
    data_wrap.description = 'Monthly mean 2D fields of ERA-Interim on pressure level'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lon_wrap_var.units = 'degree_east'
    z500var_wrap_var.units = 'm2'
    z500var_wrap_var.long_name = 'monthly mean variance of subdaily geopotential height at 500hPa'
    # writing data
    lat_wrap_var[:] = latitude
    lon_wrap_var[:] = longitude
    month_wrap_var[:] = index_month
    year_wrap_var[:] = period

    z500var_wrap_var[:] = pool_z500var

    # close the file
    data_wrap.close()
    print ("The generation of netcdf files for fields on surface is complete!!")

In [5]:
def retriver(key):
    print ('Extract subdaily fields and calculate the variance for each month.')
    z500 = var_key.variables['z'][:,1,:,:] # we only need z500
    var_2D = np.var(z500, axis=0, ddof=0)
    
    return var_2D

In [None]:
if __name__=="__main__":
    ####################################################################
    ######  Create time namelist matrix for variable extraction  #######
    ####################################################################
    # date and time arrangement
    # namelist of month and days for file manipulation
    namelist_month = ['1','2','3','4','5','6','7','8','9','10','11','12']
    # index of months
    period = np.arange(start_year,end_year+1,1)
    index_month = np.arange(1,13,1)
    ####################################################################
    ######       Extract invariant and calculate constants       #######
    ####################################################################
    # get invariant from benchmark file
    Dim_year = len(period)
    Dim_month = len(index_month)
    Dim_latitude = 241
    Dim_longitude = 480
    #############################################
    #####   Create space for stroing data   #####
    #############################################
    # data pool
    pool_z500var = np.zeros((Dim_year,Dim_month,Dim_latitude,Dim_longitude),dtype = float)
    latitude = np.zeros(Dim_latitude,dtype=float)
    longitude = np.zeros(Dim_longitude,dtype=float)
    # loop for calculation
    for i in period:
        for j in index_month:
        # get the key of each variable
            var_key = var_key_retrieve(datapath,i, j)
            latitude = var_key.variables['latitude'][:]
            longitude = var_key.variables['longitude'][:]
            z500var = retriver(var_key)
            pool_z500var[i-1979,j-1,:,:] = z500var
    ####################################################################
    ######                 Data Wrapping (NetCDF)                #######
    ####################################################################
    create_netcdf_point(pool_z500var, output_path)
    print ('Packing 2D fields of ERA-Interim on pressure level is complete!!!')
    print ('The output is in sleep, safe and sound!!!')