# Copyright Netherlands eScience Center <br>
** Function     : Packing the subdaily sea ice concentration fields into weekly fields from ERA-Interim as netCDF** <br>
** Author       : Yang Liu ** <br>
** First Built  : 2019.05.17 ** <br>
** Last Update  : 2019.05.17 ** <br>
Description     : This notebook aims to pack the subdaily SIC fields to weekly SIC fields from ERA-Interim.<br>
Return Values   : netCDF4 <br>
Caveat          : The data is subdaily mean of the spatial distribution of sea ice from 40N to 90N, dated from 1979 to 2017. It has sampling times as:<br>
0:00 + 6:00 + 12:00 + 18:00 <br>
For the calculation of weekly fields, we assume each month consist of 4 weeks. The first 3 weeks including 7 days. The 4th week contain the rest of the days in that month.

In [1]:
import numpy as np
import scipy as sp
import time as tttt
from netCDF4 import Dataset,num2date
import os

In [2]:
################################   Input zone  #########################################
# specify starting and ending time
start_year = 1979
end_year = 2017
# specify data path
# ERAI 3D fields on pressure level
datapath = '/home/ESLT0068/WorkFlow/Core_Database_DeepLearn/ERA-Interim/sic_daily'
# specify output path for figures
output_path = '/home/ESLT0068/WorkFlow/Core_Database_DeepLearn/ERA-Interim'
########################################################################################

In [3]:
def var_key_retrieve(datapath, year, month):
    # get the path to each datasets
    print ("Start retrieving datasets {} (y) {} (m)".format(year,month))
    # The shape of each variable is (67,480)
    datapath_full = os.path.join(datapath, 'era{}'.format(year),'pressure_daily_075_diagnostic_{}_{}_sic.nc'.format(year,month))
    # get the variable keys
    var_key = Dataset(datapath_full)
    
    print ("Retrieving datasets successfully and return the variable key!")
    return var_key

In [4]:
def retriver(key,latitude,longitude):
    print ('Extract daily fields and calculate weekly fields.')
    sic_subdaily = var_key.variables['siconc'][:]
    # for mask
    mask = np.ma.getmask(sic_subdaily[:,:,:])
    sic_subdaily[mask==True] = -1.0
    # take daily mean
    sic_daily = (sic_subdaily[::4,:,:] + sic_subdaily[1::4,:,:] + sic_subdaily[2::4,:,:] + sic_subdaily[3::4,:,:]) / 4
    # take weekly mean
    sic_weekly = np.zeros((4,len(latitude),len(longitude)),dtype=float)
    for i in np.arange(4):
        if i < 3:
            sic_weekly[i,:,:] = np.mean(sic_daily[i*7:i*7+7,:,:],axis=0)
        else:
            sic_weekly[i,:,:] = np.mean(sic_daily[i*7:,:,:],axis=0)

    return sic_weekly

In [5]:
# save output datasets
# we only pack our timeseries from 1979 to 2016
def create_netcdf_point (sic, period, week, latitude, longitude, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    print("Start creating netcdf file for SIV from 1979 to 2017.")
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(output_path + os.sep + 'sic_weekly_erai_1979_2017.nc','w',format = 'NETCDF3_64BIT')
    # create dimensions for netcdf data
    year_wrap_dim = data_wrap.createDimension('year', len(period))
    week_wrap_dim = data_wrap.createDimension('week', len(week))
    lat_wrap_dim = data_wrap.createDimension('latitude', len(latitude))
    lon_wrap_dim = data_wrap.createDimension('longitude', len(longitude))
    # create coordinate variables for 3-dimensions
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    week_wrap_var = data_wrap.createVariable('week',np.int32,('week',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float64,('latitude',))
    lon_wrap_var = data_wrap.createVariable('longitude',np.float64,('longitude',))    
    # create the actual 4-d variable
    SIC_wrap_var = data_wrap.createVariable('sic',np.float64,('year','week','latitude','longitude'))

    # global attributes
    data_wrap.description = 'Weekly Sea Ice Concentration'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lon_wrap_var.units = 'degree_east'
    SIC_wrap_var.units = 'fraction of sea ice'
    SIC_wrap_var.long_name = 'sea ice concentration'

    # writing data
    year_wrap_var[:] = period
    week_wrap_var[:] = week
    lat_wrap_var[:] = latitude
    lon_wrap_var[:] = longitude
    SIC_wrap_var[:] = sic

    # close the file
    data_wrap.close()
    print ("Create netcdf file successfully")

In [6]:
if __name__=="__main__":
    ####################################################################
    ######  Create time namelist matrix for variable extraction  #######
    ####################################################################
    # date and time arrangement
    # namelist of month and days for file manipulation
    namelist_month = ['01','02','03','04','05','06','07','08','09','10','11','12']
    # index of months
    period = np.arange(start_year,end_year+1,1)
    index_month = np.arange(1,13,1)
    index_week = np.arange(1,49,1)
    ####################################################################
    ######       Extract invariant and calculate constants       #######
    ####################################################################
    # get invariant from benchmark file
    Dim_year = len(period)
    Dim_month = len(index_month)
    Dim_week = len(index_week)
    Dim_latitude = 67
    Dim_longitude = 480
    #############################################
    #####   Create space for stroing data   #####
    #############################################
    # data pool
    pool_sic = np.zeros((Dim_year,Dim_week,Dim_latitude,Dim_longitude),dtype = float)
    # loop for calculation
    for i in period:
        for j in index_month:
            var_key = var_key_retrieve(datapath,i,j)
            # get the key of each variable
            latitude = var_key.variables['latitude'][:]
            longitude = var_key.variables['longitude'][:]
            sic_weekly = retriver(var_key,latitude,longitude)
            pool_sic[i-1979,j*4-4:j*4,:,:] = sic_weekly
    ####################################################################
    ######                 Data Wrapping (NetCDF)                #######
    ####################################################################
    # round off the values in case of leaking
    #pool_sic = np.around(pool_sic,decimals=6)
    create_netcdf_point(pool_sic, period, index_week,
                        latitude, longitude, output_path)
    print ('Packing 2D fields of ERA-Interim on surface level is complete!!!')
    print ('The output is in sleep, safe and sound!!!')

Start retrieving datasets 1979 (y) 1 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate weekly fields.
Start retrieving datasets 1979 (y) 2 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate weekly fields.
Start retrieving datasets 1979 (y) 3 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate weekly fields.
Start retrieving datasets 1979 (y) 4 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate weekly fields.
Start retrieving datasets 1979 (y) 5 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate weekly fields.
Start retrieving datasets 1979 (y) 6 (m)
Retrieving datasets successfully and return the variable key!
Extract daily fields and calculate weekly fields.
Start retrieving datasets 1979 (y) 7 (m)
Retrieving datasets successfully and retu