# Copyright Netherlands eScience Center <br>
** Function     : Packing the 5 daily sea ice concentration fields into weekly fields from S2S as netCDF** <br>
** Author       : Yang Liu ** <br>
** First Built  : 2020.07.17 ** <br>
** Last Update  : 2020.07.20 ** <br>
Description     : This notebook aims to pack the 5 daily SIC fields to weeky SIC fields from ECMWF S2S dataset.<br>
Return Values   : netCDF4 <br>
Caveat          : For the calculation of weekly fields, we assume each month consist of 4 weeks. The first 3 weeks
                  including 7 days. The 4th week contain the rest of the days in that month.<br>

In [2]:
import numpy as np
import scipy as sp
import time as tttt
from netCDF4 import Dataset,num2date
import os
import pygrib

In [None]:
################################   Input zone  #########################################
#time
start_year = 2015
end_year = 2017
forecast_type = "control" # control / ensemble
# specify data path
datapath = '/home/ESLT0068/WorkFlow/Core_Database_DeepLearn/ECMWFens/S2S{}'.format(forecast_type)
# specify output path for figures
output_path = '/home/ESLT0068/WorkFlow/Core_Database_DeepLearn/ECMWFens'
########################################################################################

In [None]:
##########################   collect dimension info  ###################################
if forecast_type == "ensemble":
    grbs_SIC = pygrib.open(os.path.join(datapath,'S2S_sic_ens_ensemble_201501.grib'))
else:
    grbs_SIC = pygrib.open(os.path.join(datapath,'S2S_sic_control_201501.grib'))
print ('Number of messages',grbs_SIC.messages)
#for messenger in grbs_SIC:
#    print (messenger)
key_SIC = grbs_SIC.message(1)
sample = key_SIC.values
#sample = np.ma.masked_where(sample==0,sample)
mask = np.ma.getmask(sample)
lats, lons = key_SIC.latlons()
latitude_SIC = lats[:,:31]
longitude_SIC = lons[0,:]
grbs_SIC.close()
########################################################################################

In [None]:
# save output datasets
# we only pack our timeseries from 2015 to 2017
def create_netcdf_point_control (SIC, period, week, latitude, longitude, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    print("Start creating netcdf file for SIV from 1979 to 2017.")
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(os.path.join(output_path,'sic_weekly_S2S_control_2015_2017.nc'),'w',format = 'NETCDF4')
    # create dimensions for netcdf data
    year_wrap_dim = data_wrap.createDimension('year', len(period))
    week_wrap_dim = data_wrap.createDimension('week', len(week))
    leadweek_wrap_dim = data_wrap.createDimension('leadweek', 4)
    lat_wrap_dim = data_wrap.createDimension('latitude', len(latitude))
    lon_wrap_dim = data_wrap.createDimension('longitude', len(longitude))
    # create coordinate variables for 3-dimensions
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    week_wrap_var = data_wrap.createVariable('week',np.int32,('week',))
    leadweek_wrap_var = data_wrap.createVariable('leadweek',np.int32,('leadweek',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float32,('latitude',))
    lon_wrap_var = data_wrap.createVariable('longitude',np.float32,('longitude',))    
    # create the actual 4-d variable
    SIC_wrap_var = data_wrap.createVariable('sic',np.float64,('year','week','leadweek','latitude','longitude'))
    # global attributes
    data_wrap.description = 'Lead time dependent weekly Sea Ice Concentration from ECMWF S2S'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lon_wrap_var.units = 'degree_east'
    SIC_wrap_var.units = 'fraction of sea ice'
    SIC_wrap_var.long_name = 'sea ice concentration'

    # writing data
    year_wrap_var[:] = period
    week_wrap_var[:] = week
    leadweek_wrap_var[:] = range(4)
    lat_wrap_var[:] = latitude
    lon_wrap_var[:] = longitude
    SIC_wrap_var[:] = SIC

    # close the file
    data_wrap.close()
    print ("Create netcdf file successfully")
    
    
def create_netcdf_point_ensemble (SIC, period, week, latitude, longitude, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    print("Start creating netcdf file for SIV from 1979 to 2017.")
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(os.path.join(output_path,'sic_weekly_S2S_ensemble_2015_2017.nc'),'w',format = 'NETCDF4')
    # create dimensions for netcdf data
    ensemble_wrap_dim = data_wrap.createDimension('ens', 50)
    year_wrap_dim = data_wrap.createDimension('year', len(period))
    week_wrap_dim = data_wrap.createDimension('week', len(week))
    leadweek_wrap_dim = data_wrap.createDimension('leadweek', 4)
    lat_wrap_dim = data_wrap.createDimension('latitude', len(latitude))
    lon_wrap_dim = data_wrap.createDimension('longitude', len(longitude))
    # create coordinate variables for 3-dimensions
    ensemble_wrap_var = data_wrap.createVariable('ens',np.int32,('ens',))
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    week_wrap_var = data_wrap.createVariable('week',np.int32,('week',))
    leadweek_wrap_var = data_wrap.createVariable('leadweek',np.int32,('leadweek',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float32,('latitude',))
    lon_wrap_var = data_wrap.createVariable('longitude',np.float32,('longitude',))    
    # create the actual 4-d variable
    SIC_wrap_var = data_wrap.createVariable('sic',np.float64,('ens','year','week',
                                                              'leadweek','latitude','longitude'))
    # global attributes
    data_wrap.description = 'Lead time dependent weekly Sea Ice Concentration from ECMWF S2S'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lon_wrap_var.units = 'degree_east'
    SIC_wrap_var.units = 'fraction of sea ice'
    SIC_wrap_var.long_name = 'sea ice concentration'
    # writing data
    ensemble_wrap_var[:] = range(50)
    year_wrap_var[:] = period
    week_wrap_var[:] = week
    leadweek_wrap_var[:] = range(4)
    lat_wrap_var[:] = latitude
    lon_wrap_var[:] = longitude
    SIC_wrap_var[:] = SIC

    # close the file
    data_wrap.close()
    print ("Create netcdf file successfully")    

In [None]:
if __name__=="__main__":
    ####################################################################
    ######  Create time namelist matrix for variable extraction  #######
    ####################################################################
    # date and time arrangement
    # namelist of month and days for file manipulation
    namelist_month = ['01','02','03','04','05','06','07','08','09','10','11','12']
    # index of months
    period = np.arange(start_year,end_year+1,1)
    index_month = np.arange(1,13,1)
    index_week = np.arange(1,49,1)
    ####################################################################
    ######       Extract invariant and calculate constants       #######
    ####################################################################
    # get invariant from benchmark file
    Dim_ens = 50
    Dim_year = len(period)
    Dim_month = len(index_month)
    Dim_week = len(index_week)
    Dim_lead_weeks = 4 # leading time in weeks
    Dim_lead_days = 32 # leading time in days
    Dim_latitude = 31 # from 90 - 45 N
    Dim_longitude = len(longitude_SIC)
    ###################################################
    #####   Month classifier for week selection   #####
    ###################################################
    date_classifier = [0,2,4,6] # day index, twice sampling by S2S
    #############################################
    #####   Create space for stroing data   #####
    #############################################
    if forecast_type == "ensemble":
        # data pool
        pool_sic = np.zeros((Dim_ens, Dim_year, Dim_week, Dim_lead_weeks,
                             Dim_latitude, Dim_longitude), dtype = float)
        # loop for calculation
        for i in period:
            for j in index_month:
                grbs_SIC = pygrib.open(os.path.join(datapath,'S2S_sic_ens_ensemble_{}{}.grib'.format(i,j)))
                messages = grbs_SIC.messages
                time_forecast = messages // Dim_lead_days // Dim_ens
                pool_sic_month = np.zeros((Dim_ens, time_forecast, Dim_lead_days,
                                           Dim_latitude, Dim_longitude),dtype = float)
                # retrieve fields from GRIB file
                counter_time = 0
                counter_ens = 0
                counter_lead_day = 0
                counter_message = 1
                while (counter_message <= messages):
                    key_sic = grbs_sic.message(counter_message)
                    sic = key_sic.values
                    sic[mask==True] = -1.0
                    pool_sic_month[counter_ens, counter_time, counter_lead_day, :, :] = sic[:Dim_latitude,:]
                    counter_ens += 1
                    if counter_message % Dim_ens == 0:
                        counter_ens = 0
                        counter_lead_day += 1
                    if counter_message % (Dim_lead_days * Dim_ens) == 0:
                        counter_time += 1
                        counter_lead_day = 0
                    counter_message += 1
                grbs_sic.close()                    
                # save data via classifier
                for n in range(4): # 4 weeks per month
                    day_select = date_classifier[n]
                    # we take 4 weeks from leading 32 days
                    pool_sic[:,i-start_year,j*4-4+n,0,:,:] = np.mean(pool_sic_month[:,day_select,:7,:,:],1)
                    pool_sic[:,i-start_year,j*4-4+n,1,:,:] = np.mean(pool_sic_month[:,day_select,7:14,:,:],1)
                    pool_sic[:,i-start_year,j*4-4+n,2,:,:] = np.mean(pool_sic_month[:,day_select,14:21,:,:],1)
                    pool_sic[:,i-start_year,j*4-4+n,3,:,:] = np.mean(pool_sic_month[:,day_select,21:28,:,:],1)                    
        ####################################################################
        ######                 Data Wrapping (NetCDF)                #######
        ####################################################################
        create_netcdf_point_ensemble(pool_sic, period, index_week,
                                     latitude_SIC, longitude_SIC, output_path)
        
    else:
        pool_sic = np.zeros((Dim_year, Dim_week, Dim_lead_weeks, 
                             Dim_latitude, Dim_longitude), dtype = float)
        # loop for calculation
        for i in period:
            for j in index_month:
                grbs_sic = pygrib.open(os.path.join(datapath,'S2S_sic_control_{}{}.grib'.format(i,j)))
                messages = grbs_sic.messages
                time_forecast = messages // Dim_lead_days
                pool_sic_month = np.zeros((time_forecast, Dim_lead_days,
                                           Dim_latitude, Dim_longitude),dtype = float)
                # retrieve fields from GRIB file
                counter_time = 0
                counter_lead_day = 0
                counter_message = 1
                while (counter_message <= messages):
                    key_sic = grbs_sic.message(counter_message)
                    sic = key_sic.values
                    sic[mask==True] = -1.0
                    pool_sic_month[counter_time, counter_lead_day, :, :] = sic[:Dim_latitude,:]
                    counter_lead_day += 1
                    if counter_message % Dim_lead_days == 0:
                        counter_time += 1
                        counter_lead_day = 0
                    counter_message += 1
                grbs_sic.close()
                # save data via classifier
                for n in range(4): # 4 weeks per month
                    day_select = date_classifier[n]
                    # we take 4 weeks from leading 32 days
                    pool_sic[i-start_year,j*4-4+n,0,:,:] = np.mean(pool_sic_month[day_select,:7,:,:],0)
                    pool_sic[i-start_year,j*4-4+n,1,:,:] = np.mean(pool_sic_month[day_select,7:14,:,:],0)
                    pool_sic[i-start_year,j*4-4+n,2,:,:] = np.mean(pool_sic_month[day_select,14:21,:,:],0)
                    pool_sic[i-start_year,j*4-4+n,3,:,:] = np.mean(pool_sic_month[day_select,21:28,:,:],0)
        ####################################################################
        ######                 Data Wrapping (NetCDF)                #######
        ####################################################################
        create_netcdf_point_control(pool_sic, period, index_week,
                                    latitude_SIC, longitude_SIC, output_path)
    print ('Packing 2D fields of ERA-Interim on surface level is complete!!!')
    print ('The output is in sleep, safe and sound!!!')