# Copyright Netherlands eScience Center <br>
** Function     : Check grib file** <br>
** Author       : Yang Liu ** <br>
** First Built  : 2020.07.17 ** <br>
** Last Update  : 2020.07.20 ** <br>
Description     : This notebook aims to precheck the grib file.<br>
Return Values   : netCDF4 <br>
Caveat          : 

In [1]:
import numpy as np
import scipy as sp
import time as tttt
from netCDF4 import Dataset,num2date
import os
import pygrib

In [2]:
################################   Input zone  #########################################
# specify data path
datapath = '/home/ESLT0068/WorkFlow/Core_Database_DeepLearn/ECMWFens/S2Scontrol'
# specify output path for figures
output_path = '/home/ESLT0068/WorkFlow/Core_Database_DeepLearn/ECMWFens'
########################################################################################

In [10]:
grbs_SIC = pygrib.open(os.path.join(datapath,'S2S_sic_control_201501.grib'))
print ('Number of messages',grbs_SIC.messages)
for messenger in grbs_SIC:
    print (messenger)
key_SIC = grbs_SIC.message(1)
sample = key_SIC.values
#sample = np.ma.masked_where(sample==0,sample)
mask = np.ma.getmask(sample)
lats, lons = key_SIC.latlons()
latitude_SIC = lats[:,0]
longitude_SIC = lons[0,:]
grbs_SIC.close()

Number of messages 144
1:Sea-ice cover:(0 - 1) (avg):regular_ll:surface:level 0:fcst time 0-24 hrs (avg):from 201501010000
2:Sea-ice cover:(0 - 1) (avg):regular_ll:surface:level 0:fcst time 24-48 hrs (avg):from 201501010000
3:Sea-ice cover:(0 - 1) (avg):regular_ll:surface:level 0:fcst time 48-72 hrs (avg):from 201501010000
4:Sea-ice cover:(0 - 1) (avg):regular_ll:surface:level 0:fcst time 72-96 hrs (avg):from 201501010000
5:Sea-ice cover:(0 - 1) (avg):regular_ll:surface:level 0:fcst time 96-120 hrs (avg):from 201501010000
6:Sea-ice cover:(0 - 1) (avg):regular_ll:surface:level 0:fcst time 120-144 hrs (avg):from 201501010000
7:Sea-ice cover:(0 - 1) (avg):regular_ll:surface:level 0:fcst time 144-168 hrs (avg):from 201501010000
8:Sea-ice cover:(0 - 1) (avg):regular_ll:surface:level 0:fcst time 168-192 hrs (avg):from 201501010000
9:Sea-ice cover:(0 - 1) (avg):regular_ll:surface:level 0:fcst time 192-216 hrs (avg):from 201501010000
10:Sea-ice cover:(0 - 1) (avg):regular_ll:surface:level 0:fc

In [11]:
print(sample.shape)
print(sample[20:40,30:40])
print(mask[20:40,30:40])

(121, 240)
[[-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- 0.654296875 0.6019287109375 -- -- -- --]
 [-- -- 0.0 0.0 0.0 -- -- -- -- 1.0]
 [-- -- 0.0 0.0 0.0 -- -- -- -- --]
 [-- -- -- 0.0 0.0 -- -- -- -- --]
 [-- -- -- 0.0 0.0 0.0 -- -- -- --]
 [-- -- -- 0.0 0.0 0.0 -- -- -- --]
 [-- -- -- 0.0 0.0 0.0 -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]]
[[ True  True  True  True  True  True  True  True  True  True]
 [ True  True  True  True  True  True  True  True  True  True]
 [ True  True  True  True  True  True  True  True  True  True]
 [ True  True  True  True  True  True  True  True  True  True]
 [ True  True  True  True  True  Tru

In [3]:
#datapath_full = os.path.join(datapath, 'era1991','pressure_daily_075_diagnostic_1991_2_sic.nc')
datapath_full = os.path.join('/home/ESLT0068/WorkFlow/Core_Database_DeepLearn/ERA-Interim',
                             'sic_weekly_erai_1979_2017.nc')
# get the variable keys
var_key = Dataset(datapath_full)

In [None]:
sic_subdaily = var_key.variables['siconc'][:]
mask = np.ma.getmask(sic_subdaily[:,:,:])
sic_subdaily[mask==True] = -1.0
sic_daily = (sic_subdaily[::4,:,:] + sic_subdaily[1::4,:,:] + sic_subdaily[2::4,:,:] + sic_subdaily[3::4,:,:]) / 4
# take weekly mean
sic_weekly = np.zeros((4,67,480),dtype=float)
for i in np.arange(4):
    if i < 3:
        sic_weekly[i,:,:] = np.mean(sic_daily[i*7:i*7+7,:,:],axis=0)
    else:
        sic_weekly[i,:,:] = np.mean(sic_daily[i*7:,:,:],axis=0)

In [None]:
print(sic_subdaily[12,10:30,120:150])
print(sic_daily[12,10:30,120:150])
print(sic_weekly[0,10:30,120:150])
print(mask.shape)

In [None]:
# save output datasets
# we only pack our timeseries from 1979 to 2016
def create_netcdf_point (SIC, period, week, latitude, longitude, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    print("Start creating netcdf file for SIV from 1979 to 2017.")
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(output_path + os.sep + 'sic_weekly_erai_1979_2017.nc','w',format = 'NETCDF4')
    # create dimensions for netcdf data
    year_wrap_dim = data_wrap.createDimension('year', len(period))
    week_wrap_dim = data_wrap.createDimension('week', len(week))
    lat_wrap_dim = data_wrap.createDimension('latitude', len(latitude))
    lon_wrap_dim = data_wrap.createDimension('longitude', len(longitude))
    # create coordinate variables for 3-dimensions
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    week_wrap_var = data_wrap.createVariable('week',np.int32,('week',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float32,('latitude',))
    lon_wrap_var = data_wrap.createVariable('longitude',np.float32,('longitude',))    
    # create the actual 4-d variable
    SIC_wrap_var = data_wrap.createVariable('sic',np.float64,('year','week','latitude','longitude'))

    # global attributes
    data_wrap.description = 'Weekly Sea Ice Concentration'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lon_wrap_var.units = 'degree_east'
    SIC_wrap_var.units = 'fraction of sea ice'
    SIC_wrap_var.long_name = 'sea ice concentration'

    # writing data
    year_wrap_var[:] = period
    week_wrap_var[:] = week
    lat_wrap_var[:] = latitude
    lon_wrap_var[:] = longitude
    SIC_wrap_var[:] = SIC

    # close the file
    data_wrap.close()
    print ("Create netcdf file successfully")

In [None]:
if __name__=="__main__":
    ####################################################################
    ######  Create time namelist matrix for variable extraction  #######
    ####################################################################
    # date and time arrangement
    # namelist of month and days for file manipulation
    namelist_month = ['01','02','03','04','05','06','07','08','09','10','11','12']
    # index of months
    period = np.arange(start_year,end_year+1,1)
    index_month = np.arange(1,13,1)
    index_week = np.arange(1,49,1)
    ####################################################################
    ######       Extract invariant and calculate constants       #######
    ####################################################################
    # get invariant from benchmark file
    Dim_year = len(period)
    Dim_month = len(index_month)
    Dim_week = len(index_week)
    Dim_latitude = 67
    Dim_longitude = 480
    #############################################
    #####   Create space for stroing data   #####
    #############################################
    # data pool
    pool_sic = np.zeros((Dim_year,Dim_week,Dim_latitude,Dim_longitude),dtype = float)
    # loop for calculation
    for i in period:
        for j in index_month:
            var_key = var_key_retrieve(datapath,i,j)
            # get the key of each variable
            latitude = var_key.variables['latitude'][:]
            longitude = var_key.variables['longitude'][:]
            sic_weekly = retriver(var_key,latitude,longitude)
            pool_sic[i-1979,j*4-4:j*4,:,:] = sic_weekly
    ####################################################################
    ######                 Data Wrapping (NetCDF)                #######
    ####################################################################
    create_netcdf_point(pool_sic, period, index_week,
                        latitude, longitude, output_path)
    print ('Packing 2D fields of ERA-Interim on surface level is complete!!!')
    print ('The output is in sleep, safe and sound!!!')