# Copyright Netherlands eScience Center <br>
** Function     : Packing the 5 daily sea ice concentration fields into weekly fields from S2S as netCDF** <br>
** Author       : Yang Liu ** <br>
** First Built  : 2020.07.17 ** <br>
** Last Update  : 2020.07.20 ** <br>
Description     : This notebook aims to pack the 5 daily SIC fields to weeky SIC fields from ECMWF S2S dataset.<br>
Return Values   : netCDF4 <br>
Caveat          : For the calculation of weekly fields, we assume each month consist of 4 weeks. The first 3 weeks
                  including 7 days. The 4th week contain the rest of the days in that month.<br>

In [1]:
import numpy as np
import scipy as sp
import time as tttt
from netCDF4 import Dataset,num2date
import os
import pygrib

In [2]:
################################   Input zone  #########################################
#time
start_year = 2015
end_year = 2017
forecast_type = "ensemble" # control / ensemble
#forecast_type = "control"
# specify data path
datapath = '/home/ESLT0068/WorkFlow/Core_Database_DeepLearn/S2S/NCEPens/S2S{}'.format(forecast_type)
# specify output path for figures
output_path = '/home/ESLT0068/WorkFlow/Core_Database_DeepLearn/S2S/NCEPens'
########################################################################################

In [3]:
##########################   collect dimension info  ###################################
if forecast_type == "ensemble":
    grbs_SIC = pygrib.open(os.path.join(datapath,'S2S_NCEP_sic_ensemble_201501.grib'))
else:
    grbs_SIC = pygrib.open(os.path.join(datapath,'S2S_NCEP_control_201501.grib'))
print ('Number of messages',grbs_SIC.messages)
#for messenger in grbs_SIC:
#    print (messenger)
key_SIC = grbs_SIC.message(1)
sample = key_SIC.values
#sample = np.ma.masked_where(sample==0,sample)
mask = np.ma.getmask(sample)
lats, lons = key_SIC.latlons()
latitude_SIC = lats[:31,0]
longitude_SIC = lons[0,:]
grbs_SIC.close()
########################################################################################
print(latitude_SIC)
print(longitude_SIC)

Number of messages 19995
[90.  88.5 87.  85.5 84.  82.5 81.  79.5 78.  76.5 75.  73.5 72.  70.5
 69.  67.5 66.  64.5 63.  61.5 60.  58.5 57.  55.5 54.  52.5 51.  49.5
 48.  46.5 45. ]
[  0.    1.5   3.    4.5   6.    7.5   9.   10.5  12.   13.5  15.   16.5
  18.   19.5  21.   22.5  24.   25.5  27.   28.5  30.   31.5  33.   34.5
  36.   37.5  39.   40.5  42.   43.5  45.   46.5  48.   49.5  51.   52.5
  54.   55.5  57.   58.5  60.   61.5  63.   64.5  66.   67.5  69.   70.5
  72.   73.5  75.   76.5  78.   79.5  81.   82.5  84.   85.5  87.   88.5
  90.   91.5  93.   94.5  96.   97.5  99.  100.5 102.  103.5 105.  106.5
 108.  109.5 111.  112.5 114.  115.5 117.  118.5 120.  121.5 123.  124.5
 126.  127.5 129.  130.5 132.  133.5 135.  136.5 138.  139.5 141.  142.5
 144.  145.5 147.  148.5 150.  151.5 153.  154.5 156.  157.5 159.  160.5
 162.  163.5 165.  166.5 168.  169.5 171.  172.5 174.  175.5 177.  178.5
 180.  181.5 183.  184.5 186.  187.5 189.  190.5 192.  193.5 195.  196.5
 198.  199.5 

In [4]:
# save output datasets
# we only pack our timeseries from 2015 to 2017
def create_netcdf_point_control (SIC, period, week, latitude, longitude, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    print("Start creating netcdf file for SIV from 2015 to 2017.")
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(os.path.join(output_path,'sic_weekly_S2S_NCEP_control_2015_2017.nc'),'w',format = 'NETCDF4')
    # create dimensions for netcdf data
    year_wrap_dim = data_wrap.createDimension('year', len(period))
    week_wrap_dim = data_wrap.createDimension('week', len(week))
    leadweek_wrap_dim = data_wrap.createDimension('leadweek', 6)
    lat_wrap_dim = data_wrap.createDimension('latitude', len(latitude))
    lon_wrap_dim = data_wrap.createDimension('longitude', len(longitude))
    # create coordinate variables for 3-dimensions
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    week_wrap_var = data_wrap.createVariable('week',np.int32,('week',))
    leadweek_wrap_var = data_wrap.createVariable('leadweek',np.int32,('leadweek',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float32,('latitude',))
    lon_wrap_var = data_wrap.createVariable('longitude',np.float32,('longitude',))    
    # create the actual 4-d variable
    SIC_wrap_var = data_wrap.createVariable('sic',np.float64,('year','week','leadweek','latitude','longitude'))
    # global attributes
    data_wrap.description = 'Lead time dependent weekly Sea Ice Concentration from ECMWF S2S'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lon_wrap_var.units = 'degree_east'
    SIC_wrap_var.units = 'fraction of sea ice'
    SIC_wrap_var.long_name = 'sea ice concentration'

    # writing data
    year_wrap_var[:] = period
    week_wrap_var[:] = week
    leadweek_wrap_var[:] = range(6)
    lat_wrap_var[:] = latitude
    lon_wrap_var[:] = longitude
    SIC_wrap_var[:] = SIC

    # close the file
    data_wrap.close()
    print ("Create netcdf file successfully")
    
    
def create_netcdf_point_ensemble (SIC, period, week, latitude, longitude, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    print("Start creating netcdf file for SIV from 2015 to 2017.")
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(os.path.join(output_path,'sic_weekly_S2S_NCEP_ensemble_2015_2017.nc'),'w',format = 'NETCDF4')
    # create dimensions for netcdf data
    ensemble_wrap_dim = data_wrap.createDimension('ens', 15)
    year_wrap_dim = data_wrap.createDimension('year', len(period))
    week_wrap_dim = data_wrap.createDimension('week', len(week))
    leadweek_wrap_dim = data_wrap.createDimension('leadweek', 6)
    lat_wrap_dim = data_wrap.createDimension('latitude', len(latitude))
    lon_wrap_dim = data_wrap.createDimension('longitude', len(longitude))
    # create coordinate variables for 3-dimensions
    ensemble_wrap_var = data_wrap.createVariable('ens',np.int32,('ens',))
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    week_wrap_var = data_wrap.createVariable('week',np.int32,('week',))
    leadweek_wrap_var = data_wrap.createVariable('leadweek',np.int32,('leadweek',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float32,('latitude',))
    lon_wrap_var = data_wrap.createVariable('longitude',np.float32,('longitude',))    
    # create the actual 4-d variable
    SIC_wrap_var = data_wrap.createVariable('sic',np.float64,('ens','year','week',
                                                              'leadweek','latitude','longitude'))
    # global attributes
    data_wrap.description = 'Lead time dependent weekly Sea Ice Concentration from ECMWF S2S'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lon_wrap_var.units = 'degree_east' 
    SIC_wrap_var.units = 'fraction of sea ice'
    SIC_wrap_var.long_name = 'sea ice concentration'
    # writing data
    ensemble_wrap_var[:] = range(15)
    year_wrap_var[:] = period
    week_wrap_var[:] = week
    leadweek_wrap_var[:] = range(6)
    lat_wrap_var[:] = latitude
    lon_wrap_var[:] = longitude
    SIC_wrap_var[:] = SIC

    # close the file
    data_wrap.close()
    print ("Create netcdf file successfully")    

In [5]:
if __name__=="__main__":
    ####################################################################
    ######  Create time namelist matrix for variable extraction  #######
    ####################################################################
    # date and time arrangement
    # namelist of month and days for file manipulation
    namelist_month = ['01','02','03','04','05','06','07','08','09','10','11','12']
    # index of months
    period = np.arange(start_year,end_year+1,1)
    index_month = np.arange(1,13,1)
    index_week = np.arange(1,49,1)
    ####################################################################
    ######       Extract invariant and calculate constants       #######
    ####################################################################
    # get invariant from benchmark file
    Dim_ens = 15
    Dim_year = len(period)
    Dim_month = len(index_month)
    Dim_week = len(index_week)
    Dim_lead_weeks = 6 # leading time in weeks
    Dim_lead_days = 43 # leading time in days
    Dim_latitude = len(latitude_SIC) # from 90 - 45 N
    Dim_longitude = len(longitude_SIC)
    ###################################################
    #####   Month classifier for day selection    #####
    ###################################################
    #date_classifier = [0,2,4,6] # day index, twice sampling by S2S
    #############################################
    #####   Create space for stroing data   #####
    #############################################
    if forecast_type == "ensemble":
        print("Data cleaning for S2S ensemble forecast!")
        # data pool
        pool_sic = np.zeros((Dim_ens, Dim_year, Dim_week, Dim_lead_weeks,
                             Dim_latitude, Dim_longitude), dtype = float)
        # loop for calculation
        for i in period:
            for j in index_month:
                print("Processing ensemble forecast of {}{}!".format(i,namelist_month[j-1]))
                grbs_sic = pygrib.open(os.path.join(datapath,'S2S_NCEP_sic_ensemble_{}{}.grib'.format(i,namelist_month[j-1])))
                messages = grbs_sic.messages
                time_forecast = messages // Dim_lead_days // Dim_ens
                pool_sic_month = np.zeros((Dim_ens, time_forecast, Dim_lead_days,
                                           Dim_latitude, Dim_longitude),dtype = float)
                # retrieve fields from GRIB file
                counter_time = 0
                counter_ens = 0
                counter_lead_day = 0
                counter_message = 1
                while (counter_message <= messages):
                    key_sic = grbs_sic.message(counter_message)
                    sic = key_sic.values
                    sic[mask==True] = -1.0
                    sic[sic>1.1] = 0.0 # missing values
                    pool_sic_month[counter_ens, counter_time, counter_lead_day, :, :] = sic[:Dim_latitude,:]
                    counter_ens += 1
                    if counter_message % Dim_ens == 0:
                        counter_ens = 0
                        counter_lead_day += 1
                    if counter_message % (Dim_lead_days * Dim_ens) == 0:
                        counter_time += 1
                        counter_lead_day = 0
                    counter_message += 1
                grbs_sic.close()                    
                # save data via classifier
                # take weekly mean
                # For the calculation of weekly fields, we assume each month consist of 4 weeks.
                # The first 3 weeks including 7 days. The 4th week contain the rest of the days in that month.
                pool_sic_week = np.zeros((Dim_ens, 4, Dim_lead_days, Dim_latitude, Dim_longitude),dtype=float)
                for w in np.arange(4):
                    if w < 3:
                        pool_sic_week[:,w,:,:,:] = np.mean(pool_sic_month[:,w*7:w*7+7,:,:,:],axis=1)
                    else:
                        pool_sic_week[:,w,:,:,:] = np.mean(pool_sic_month[:,w*7:,:,:,:],axis=1)
                for n in range(6): # 4 weeks per month
                    # we take 6 weeks from leading 42 days
                    pool_sic[:,i-start_year,j*4-4:j*4,n,:,:] = np.mean(pool_sic_week[:,:,n*7:n*7+7,:,:],2)               
        ####################################################################
        ######                 Data Wrapping (NetCDF)                #######
        ####################################################################
        create_netcdf_point_ensemble(pool_sic, period, index_week,
                                     latitude_SIC, longitude_SIC, output_path)
        
    else:
        print("Data cleaning for S2S control forecast!")
        pool_sic = np.zeros((Dim_year, Dim_week, Dim_lead_weeks, 
                             Dim_latitude, Dim_longitude), dtype = float)
        # loop for calculation
        for i in period:
            for j in index_month:
                print("Processing control forecast of {}{}!".format(i,namelist_month[j-1]))
                grbs_sic = pygrib.open(os.path.join(datapath,'S2S_NCEP_control_{}{}.grib'.format(i,namelist_month[j-1])))
                messages = grbs_sic.messages
                time_forecast = messages // Dim_lead_days
                pool_sic_month = np.zeros((time_forecast, Dim_lead_days,
                                           Dim_latitude, Dim_longitude),dtype = float)
                # retrieve fields from GRIB file
                counter_time = 0
                counter_lead_day = 0
                counter_message = 1
                while (counter_message <= messages):
                    key_sic = grbs_sic.message(counter_message)
                    sic = key_sic.values
                    sic[mask==True] = -1.0
                    sic[sic>1.1] = 0.0 # missing values
                    pool_sic_month[counter_time, counter_lead_day, :, :] = sic[:Dim_latitude,:]
                    counter_lead_day += 1
                    if counter_message % Dim_lead_days == 0:
                        counter_time += 1
                        counter_lead_day = 0
                    counter_message += 1
                grbs_sic.close()
                # save data via classifier
                # take weekly mean
                # For the calculation of weekly fields, we assume each month consist of 4 weeks.
                # The first 3 weeks including 7 days. The 4th week contain the rest of the days in that month.
                pool_sic_week = np.zeros((4, Dim_lead_days, Dim_latitude, Dim_longitude),dtype=float)
                for w in np.arange(4):
                    if w < 3:
                        pool_sic_week[w,:,:,:] = np.mean(pool_sic_month[w*7:w*7+7,:,:,:],axis=0)
                    else:
                        pool_sic_week[w,:,:,:] = np.mean(pool_sic_month[w*7:,:,:,:],axis=0)
                
                for n in range(6):
                    # we take 6 weeks from leading 42 days
                    pool_sic[i-start_year,j*4-4:j*4,n,:,:] = np.mean(pool_sic_week[:,n*7:n*7+7,:,:],1)
        ####################################################################
        ######                 Data Wrapping (NetCDF)                #######
        ####################################################################
        create_netcdf_point_control(pool_sic, period, index_week,
                                    latitude_SIC, longitude_SIC, output_path)
    print ('Packing 2D fields of NCEP S2S on surface level is complete!!!')
    print ('The output is in sleep, safe and sound!!!')

Data cleaning for S2S ensemble forecast!
Processing ensemble forecast of 201501!
Processing ensemble forecast of 201502!
Processing ensemble forecast of 201503!
Processing ensemble forecast of 201504!
Processing ensemble forecast of 201505!
Processing ensemble forecast of 201506!
Processing ensemble forecast of 201507!
Processing ensemble forecast of 201508!
Processing ensemble forecast of 201509!
Processing ensemble forecast of 201510!
Processing ensemble forecast of 201511!
Processing ensemble forecast of 201512!
Processing ensemble forecast of 201601!
Processing ensemble forecast of 201602!
Processing ensemble forecast of 201603!
Processing ensemble forecast of 201604!
Processing ensemble forecast of 201605!
Processing ensemble forecast of 201606!
Processing ensemble forecast of 201607!
Processing ensemble forecast of 201608!
Processing ensemble forecast of 201609!
Processing ensemble forecast of 201610!
Processing ensemble forecast of 201611!
Processing ensemble forecast of 201612!