# Copyright Netherlands eScience Center <br>
** Function     : Packing the subdaily 2 meter temperature fields into 5daily fields from ERA-Interim as netCDF** <br>
** Author       : Yang Liu ** <br>
** First Built  : 2020.07.07 ** <br>
** Last Update  : 2020.07.07 ** <br>
Description     : This notebook aims to pack the subdaily 2 meter temperature fields to 5daily 2 meter temperature fields from ERA-Interim.<br>
Return Values   : netCDF4 <br>
Caveat          : The data is subdaily mean of the spatial distribution of sea ice from 40N to 90N, dated from 1979 to 2017. It has sampling times as:<br>
0:00 + 6:00 + 12:00 + 18:00 <br>

For the calculation of 5 daily fields, we assume each month consist of 6 target fields. The first 5 fields including 5 days. The 6th field contain the rest of the days in that month.<br>

In [3]:
import numpy as np
import scipy as sp
import time as tttt
from netCDF4 import Dataset,num2date
import os

In [4]:
################################   Input zone  #########################################
# specify starting and ending time
start_year = 1979
end_year = 2017
# specify data path
# ERAI 3D fields on pressure level
datapath = '/home/ESLT0068/WorkFlow/Core_Database_DeepLearn/ERA-Interim/t2m_daily'
# specify output path for figures
output_path = '/home/ESLT0068/WorkFlow/Core_Database_DeepLearn/ERA-Interim'
########################################################################################

In [5]:
def var_key_retrieve(datapath, year, month):
    # get the path to each datasets
    print ("Start retrieving datasets {} (y) {} (m)".format(year,month))
    # The shape of each variable is (67,480)
    datapath_full = os.path.join(datapath, 'era{}'.format(year),'pressure_daily_075_diagnostic_{}_{}_t2m.nc'.format(year,month))
    # get the variable keys
    var_key = Dataset(datapath_full)
    
    print ("Retrieving datasets successfully and return the variable key!")
    return var_key

In [6]:
def retriver(key,latitude,longitude):
    print ('Extract daily fields and calculate 5daily fields.')
    t2m_subdaily = var_key.variables['t2m'][:]
    # take daily mean
    t2m_daily = (t2m_subdaily[::4,:,:] + t2m_subdaily[1::4,:,:] + t2m_subdaily[2::4,:,:] + t2m_subdaily[3::4,:,:]) / 4
    # take 5daily mean
    t2m_5daily = np.zeros((6,len(latitude),len(longitude)),dtype=float)
    for i in np.arange(6):
        if i < 5:
            t2m_5daily[i,:,:] = np.mean(t2m_daily[i*5:i*5+5,:,:],axis=0)
        else:
            t2m_5daily[i,:,:] = np.mean(t2m_daily[i*5:,:,:],axis=0)

    return t2m_5daily

In [7]:
# save output datasets
# we only pack our timeseries from 1979 to 2016
def create_netcdf_point (t2m, period, daily5, latitude, longitude, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    print("Start creating netcdf file for T2M from 1979 to 2017.")
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(output_path + os.sep + 't2m_5daily_erai_1979_2017.nc','w',format = 'NETCDF4')
    # create dimensions for netcdf data
    year_wrap_dim = data_wrap.createDimension('year', len(period))
    daily5_wrap_dim = data_wrap.createDimension('5daily', len(daily5))
    lat_wrap_dim = data_wrap.createDimension('latitude', len(latitude))
    lon_wrap_dim = data_wrap.createDimension('longitude', len(longitude))
    # create coordinate variables for 3-dimensions
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    daily5_wrap_var = data_wrap.createVariable('5daily',np.int32,('5daily',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float64,('latitude',))
    lon_wrap_var = data_wrap.createVariable('longitude',np.float64,('longitude',))    
    # create the actual 4-d variable
    T2M_wrap_var = data_wrap.createVariable('t2m',np.float64,('year','5daily','latitude','longitude'))

    # global attributes
    data_wrap.description = '5daily Sea Ice Concentration'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lon_wrap_var.units = 'degree_east'
    T2M_wrap_var.units = 'Kelvin'
    T2M_wrap_var.long_name = '2 meter temperature'

    # writing data
    year_wrap_var[:] = period
    daily5_wrap_var[:] = daily5
    lat_wrap_var[:] = latitude
    lon_wrap_var[:] = longitude
    T2M_wrap_var[:] = t2m

    # close the file
    data_wrap.close()
    print ("Create netcdf file successfully")

In [8]:
if __name__=="__main__":
    ####################################################################
    ######  Create time namelist matrix for variable extraction  #######
    ####################################################################
    # date and time arrangement
    # namelist of month and days for file manipulation
    namelist_month = ['01','02','03','04','05','06','07','08','09','10','11','12']
    # index of months
    period = np.arange(start_year,end_year+1,1)
    index_month = np.arange(1,13,1)
    index_5daily = np.arange(1,73,1)
    ####################################################################
    ######       Extract invariant and calculate constants       #######
    ####################################################################
    # get invariant from benchmark file
    Dim_year = len(period)
    Dim_month = len(index_month)
    Dim_5daily = len(index_5daily)
    Dim_latitude = 67
    Dim_longitude = 480
    #############################################
    #####   Create space for stroing data   #####
    #############################################
    # data pool
    pool_t2m = np.zeros((Dim_year,Dim_5daily,Dim_latitude,Dim_longitude),dtype = float)
    # loop for calculation
    for i in period:
        for j in index_month:
            var_key = var_key_retrieve(datapath,i,j)
            # get the key of each variable
            latitude = var_key.variables['latitude'][:]
            longitude = var_key.variables['longitude'][:]
            t2m_5daily = retriver(var_key,latitude,longitude)
            pool_t2m[i-1979,j*6-6:j*6,:,:] = t2m_5daily
    ####################################################################
    ######                 Data Wrapping (NetCDF)                #######
    ####################################################################
    # round off the values in case of leaking
    #pool_sic = np.around(pool_sic,decimals=6)
    create_netcdf_point(pool_t2m, period, index_5daily,
                        latitude, longitude, output_path)
    print ('Packing 2D fields of ERA-Interim on surface level is complete!!!')
    print ('The output is in sleep, safe and sound!!!')

Start retrieving datasets 1979 (y) 1 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate 5daily fields.
Start retrieving datasets 1979 (y) 2 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate 5daily fields.
Start retrieving datasets 1979 (y) 3 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate 5daily fields.
Start retrieving datasets 1979 (y) 4 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate 5daily fields.
Start retrieving datasets 1979 (y) 5 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate 5daily fields.
Start retrieving datasets 1979 (y) 6 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate 5daily fields.
Start retrieving datasets 1979 (y) 7 (m)
Retrieving datasets successfully and retu