# Copyright Netherlands eScience Center <br>
** Function     : Packing netCDF for the surface wind fields (uv10m) from JRA55** <br>
** Author       : Yang Liu ** <br>
** First Built  : 2019.10.10 ** <br>
** Last Update  : 2019.10.12 ** <br>
Description     : This notebook aims to pack the surface wind fields from JRA55.<br>
Return Values   : netCDF4 <br>
Caveat          : 

In [6]:
import numpy as np
from netCDF4 import Dataset
import pygrib
import os

Initialization - Start with location of input and extraction of variables
Time span of each product:
- **ERA-Interim** 1979 - 2016
- **MERRA2**      1980 - 2016
- **JRA55**       1979 - 2015
- **ORAS4**       1958 - 2017
- **GLORYS2V3**   1993 - 2014
- **SODA3**       1980 - 2015

In [7]:
################################   Input zone  ######################################
# specify starting and ending time
start_year = 1979
end_year = 2017
# specify data path
# JRA55 2D fields - radiations
datapath = '/home/ESLT0068/WorkFlow/Core_Database_AMET_OMET_reanalysis/JRA55/regression/surface'
# specify output path for figures
output_path = '/home/ESLT0068/WorkFlow/Core_Database_AMET_OMET_reanalysis/JRA55/regression'
####################################################################################

In [8]:
# benmark data and basic dimensions extraction
benchmark_grbs = pygrib.open(os.path.join(datapath,'anl_surf.199102'))
print ('Number of messages',benchmark_grbs.messages)
for messenger in benchmark_grbs:
    print (messenger)
benchmark_key = benchmark_grbs.message(1)
lats, lons = benchmark_key.latlons()
latitude = lats[:,0]
longitude = lons[0,:]* (-639.569395018) # the coordinate is not right
longitude[0] = 0
benchmark_grbs.close()
#print (latitude)
#print (longitude)

Number of messages 7
1:Surface pressure:Pa (avgua):reduced_gg:surface:level 0:fcst time 0 hrs (avgua):from 199102010000
2:Potential temperature:K (avgua):reduced_gg:surface:level 0:fcst time 0 hrs (avgua):from 199102010000
3:2 metre temperature:K (avgua):reduced_gg:heightAboveGround:level 2:fcst time 0 hrs (avgua):from 199102010000
4:2 metre specific humidity:kg kg**-1 (avgua):reduced_gg:heightAboveGround:level 2:fcst time 0 hrs (avgua):from 199102010000
5:2 metre relative humidity:% (avgua):reduced_gg:heightAboveGround:level 2:fcst time 0 hrs (avgua):from 199102010000
6:10 metre U wind component:m s**-1 (avgua):reduced_gg:heightAboveGround:level 10:fcst time 0 hrs (avgua):from 199102010000
7:10 metre V wind component:m s**-1 (avgua):reduced_gg:heightAboveGround:level 10:fcst time 0 hrs (avgua):from 199102010000


In [12]:
def data_extract(datapath, namelist_month, latitude, longitude):
    print ('*******************************************************************')
    print ('*********************** extract variables *************************')
    print ('*******************************************************************')
    #############################################
    #####   Create space for stroing data   #####
    #############################################
    # data pool
    pool_u10 = np.zeros((len(period),len(month),len(latitude),len(longitude)),dtype=float)
    pool_v10 = np.zeros((len(period),len(month),len(latitude),len(longitude)),dtype=float)
    
    for i in period:
        for j in month:
            datapath_grbs = pygrib.open(os.path.join(datapath, 'anl_surf.{0}{1}'.format(i,namelist_month[j-1])))
            # read messages
            key_u10 = datapath_grbs.message(6)
            var_u10 = key_u10.values
            key_v10 = datapath_grbs.message(7)
            var_v10 = key_v10.values
            # close file
            datapath_grbs.close()
            pool_u10[i-1979,j-1,:,:] = var_u10
            pool_v10[i-1979,j-1,:,:] = var_v10

        print("Post-process data of {}".format(i))
    
    return pool_u10, pool_v10

In [10]:
def pack_netcdf_point (pool_u10, pool_v10, output_path):
    print ('*******************************************************************')
    print ('*********************** create netcdf file*************************')
    print ('*******************************************************************')
    #logging.info("Start creating netcdf file for the 2D fields of ERAI at each grid point.")
    # wrap the datasets into netcdf file
    # 'NETCDF3_CLASSIC', 'NETCDF3_64BIT', 'NETCDF4_CLASSIC', and 'NETCDF4'
    data_wrap = Dataset(os.path.join(output_path, 'surface_JRA55_monthly_model_regress_1979_2017_uv10m.nc'),'w',format = 'NETCDF4')
    # create dimensions for netcdf data
    year_wrap_dim = data_wrap.createDimension('year',len(period))
    month_wrap_dim = data_wrap.createDimension('month',len(month))
    lat_wrap_dim = data_wrap.createDimension('latitude',len(latitude))
    lon_wrap_dim = data_wrap.createDimension('longitude',len(longitude))
    # create coordinate variables for 1-dimensions
    year_wrap_var = data_wrap.createVariable('year',np.int32,('year',))
    month_wrap_var = data_wrap.createVariable('month',np.int32,('month',))
    lat_wrap_var = data_wrap.createVariable('latitude',np.float32,('latitude',))
    lon_wrap_var = data_wrap.createVariable('longitude',np.float32,('longitude',))
    # create the actual 4d variable
    u10_wrap_var = data_wrap.createVariable('u10',np.float64,('year','month','latitude','longitude'),zlib=True)
    v10_wrap_var = data_wrap.createVariable('v10',np.float64,('year','month','latitude','longitude'),zlib=True)
    # global attributes
    data_wrap.description = 'Monthly mean 2D fields of ERA-Interim on surface level'
    # variable attributes
    lat_wrap_var.units = 'degree_north'
    lon_wrap_var.units = 'degree_east'

    u10_wrap_var.units = 'm/s'
    v10_wrap_var.units = 'm/s'

    u10_wrap_var.long_name = '10m zonal wind'
    v10_wrap_var.long_name = '10m meridional wind'

    # writing data
    year_wrap_var[:] = period
    lat_wrap_var[:] = latitude
    lon_wrap_var[:] = longitude
    month_wrap_var[:] = month

    u10_wrap_var[:] = pool_u10
    v10_wrap_var[:] = pool_v10

    # close the file
    data_wrap.close()
    print ("The generation of netcdf files for fields on surface is complete!!")

In [13]:
if __name__=="__main__":
    period = np.arange(start_year,end_year+1,1)
    month = np.arange(1,13,1)
    namelist_month = ['01','02','03','04','05','06','07','08','09','10','11','12']
    pool_u10, pool_v10 = data_extract(datapath, namelist_month, latitude, longitude)
    pack_netcdf_point(pool_u10, pool_v10, output_path)
    print ('Packing netcdf files complete!')

print ("Create netcdf file successfully")

*******************************************************************
*********************** extract variables *************************
*******************************************************************
Post-process data of 1979
Post-process data of 1980
Post-process data of 1981
Post-process data of 1982
Post-process data of 1983
Post-process data of 1984
Post-process data of 1985
Post-process data of 1986
Post-process data of 1987
Post-process data of 1988
Post-process data of 1989
Post-process data of 1990
Post-process data of 1991
Post-process data of 1992
Post-process data of 1993
Post-process data of 1994
Post-process data of 1995
Post-process data of 1996
Post-process data of 1997
Post-process data of 1998
Post-process data of 1999
Post-process data of 2000
Post-process data of 2001
Post-process data of 2002
Post-process data of 2003
Post-process data of 2004
Post-process data of 2005
Post-process data of 2006
Post-process data of 2007
Post-process data of 2008
Post-process dat

NameError: name 'pack_netcdf_point' is not defined