From 72f246912b4e322772173f58ee3ee4564e1316ad Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Fri, 19 Jan 2024 15:52:31 +0000 Subject: [PATCH 01/19] #2772 Added quit_msg --- scripts/python/met/logger.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/python/met/logger.py b/scripts/python/met/logger.py index a7296124a6..418a39b0d1 100644 --- a/scripts/python/met/logger.py +++ b/scripts/python/met/logger.py @@ -33,7 +33,11 @@ def log_msg(msg): print(f'{logger.PROMPT} {msg}') @staticmethod - def quit(msg): - logger.error_msg([msg, "Quit..."]) - sys.exit(1) + def quit(msg, do_quit=True): + logger.quit_msg(msg) + if do_quit: + sys.exit(1) + @staticmethod + def quit_msg(msg): + logger.error_msg([msg, "Quit..."]) From d5b4550420d34083fd199afe959efa85bec688bf Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Fri, 19 Jan 2024 19:35:17 +0000 Subject: [PATCH 02/19] #2772 Use JSON for attrubutes and numpy serialization dor 2D data instead of NetCDF --- scripts/python/met/dataplane.py | 153 ++++++++++++-------------------- 1 file changed, 59 insertions(+), 94 deletions(-) diff --git a/scripts/python/met/dataplane.py b/scripts/python/met/dataplane.py index 57c9ac367b..1a8b834c73 100644 --- a/scripts/python/met/dataplane.py +++ b/scripts/python/met/dataplane.py @@ -1,7 +1,8 @@ import os import sys +import json +import re import numpy as np -import netCDF4 as nc import xarray as xr from importlib import util as import_util @@ -19,17 +20,16 @@ class dataplane(logger): @staticmethod def call_python(argv): + # argv[0] is the python wrapper script (caller) logger.log_msg(f"Module:\t{repr(argv[0])}") if 1 == len(argv): - logger.quit(f"User command is missing") + logger.quit_msg(f"User python command is missing") + sys.exit(1) + + logger.log_msg(f"User python command:\t{repr(' '.join(argv[1:]))}") - logger.log_msg("User Command:\t" + repr(' '.join(argv[1:]))) - # argv[0] is the python wrapper script (caller) # argv[1] contains the user defined python script pyembed_module_name = argv[1] - sys.argv = argv[1:] - logger.log_msg(f" sys.argv:\t{sys.argv}") - # append user script dir to system path pyembed_dir, pyembed_name = os.path.split(pyembed_module_name) if pyembed_dir: @@ -40,11 +40,18 @@ def call_python(argv): user_base = pyembed_name.replace('.py','') + argv_org = sys.argv # save sys.argv + sys.argv = argv[1:] spec = import_util.spec_from_file_location(user_base, pyembed_module_name) met_in = import_util.module_from_spec(spec) spec.loader.exec_module(met_in) + sys.argv = argv_org # restore sys.argv return met_in + @staticmethod + def get_tmp_numpy_filename(tmp_filename): + return re.sub(".json$", ".npy", tmp_filename) if tmp_filename.endswith(".json") else f'{tmp_filename}.npy' + @staticmethod def is_integer(a_data): return isinstance(a_data, int) @@ -100,76 +107,33 @@ def read_2d_text_input(input_file): return met_data @staticmethod - def read_dataplane(netcdf_filename): - # read NetCDF file - ds = nc.Dataset(netcdf_filename, 'r') - - dp = ds['met_data'] - met_data = dp[:] - attr_name = dataplane.ATTR_USER_FILL_VALUE - user_fill_value = dp.getncattr(attr_name) if hasattr(dp, attr_name) else None - - met_attrs = {} - - # grid is defined as a dictionary or string - grid = {} - for attr, attr_val in ds.__dict__.items(): - if 'grid.' in attr: - grid_attr = attr.split('.')[1] - grid[grid_attr] = attr_val - else: - met_attrs[attr] = attr_val - - if grid: - met_attrs['grid'] = grid - - met_attrs['name'] = met_attrs['name_str'] - del met_attrs['name_str'] - + def read_dataplane(tmp_filename): met_info = {} - met_info['met_data'] = met_data - if user_fill_value is not None: - met_attrs['fill_value'] = user_fill_value - met_info['attrs'] = met_attrs - + with open(tmp_filename) as json_h: + met_info['attrs'] = json.load(json_h) + # read 2D numeric data + numpy_dump_name = dataplane.get_tmp_numpy_filename(tmp_filename) + met_dp_data = np.load(numpy_dump_name) + met_info['met_data'] = met_dp_data return met_info @staticmethod - def write_dataplane(met_in, netcdf_filename): - met_info = {'met_data': met_in.met_data} + def write_dataplane(met_in, tmp_filename): if hasattr(met_in.met_data, 'attrs') and met_in.met_data.attrs: attrs = met_in.met_data.attrs else: attrs = met_in.attrs - met_info['attrs'] = attrs + with open(tmp_filename,'w') as json_h: + json.dump(attrs, json_h) - # write NetCDF file - ds = nc.Dataset(netcdf_filename, 'w') - - # create dimensions and variable - nx, ny = met_in.met_data.shape - ds.createDimension('x', nx) - ds.createDimension('y', ny) - dp = ds.createVariable('met_data', met_in.met_data.dtype, ('x', 'y'), - fill_value=dataplane.MET_FILL_VALUE) - dp[:] = met_in.met_data - - # append attributes - for attr, attr_val in met_info['attrs'].items(): - if attr_val is None: - continue - - if attr == 'name': - setattr(ds, 'name_str', attr_val) - elif attr == 'fill_value': - setattr(dp, dataplane.ATTR_USER_FILL_VALUE, attr_val) - elif type(attr_val) == dict: - for key in attr_val: - setattr(ds, attr + '.' + key, attr_val[key]) - else: - setattr(ds, attr, attr_val) - - ds.close() + if isinstance(met_in.met_data, (np.ma.MaskedArray, np.ma.core.MaskedArray)): + met_dp_data = np.ma.getdata(met_in.met_data, subok=False) + elif isinstance(met_in.met_data, np.ndarray): + met_dp_data = met_in.met_data + else: + met_dp_data = np.array(met_in.met_data) + numpy_dump_name = dataplane.get_tmp_numpy_filename(tmp_filename) + np.save(numpy_dump_name, met_dp_data) @staticmethod def validate_met_data(met_data, fill_value=None): @@ -180,33 +144,34 @@ def validate_met_data(met_data, fill_value=None): from_ndarray = False if met_data is None: logger.quit(f"{method_name} The met_data is None") + sys.exit(1) + + nx, ny = met_data.shape + + met_fill_value = dataplane.MET_FILL_VALUE + if dataplane.is_xarray_dataarray(met_data): + from_xarray = True + attrs = met_data.attrs + met_data = met_data.data + modified_met_data = True + if isinstance(met_data, np.ndarray): + from_ndarray = True + met_data = np.ma.array(met_data) + + if isinstance(met_data, np.ma.MaskedArray): + is_int_data = dataplane.is_integer(met_data[0,0]) or dataplane.is_integer(met_data[int(nx/2),int(ny/2)]) + met_data = np.ma.masked_equal(met_data, float('nan')) + met_data = np.ma.masked_equal(met_data, float('inf')) + if fill_value is not None: + met_data = np.ma.masked_equal(met_data, fill_value) + met_data = met_data.filled(int(met_fill_value) if is_int_data else met_fill_value) + else: + logger.log_msg(f"{method_name} unknown datatype {type(met_data)}") + + if dataplane.KEEP_XARRAY: + return xr.DataArray(met_data,attrs=attrs) if from_xarray else met_data else: - nx, ny = met_data.shape - - met_fill_value = dataplane.MET_FILL_VALUE - if dataplane.is_xarray_dataarray(met_data): - from_xarray = True - attrs = met_data.attrs - met_data = met_data.data - modified_met_data = True - if isinstance(met_data, np.ndarray): - from_ndarray = True - met_data = np.ma.array(met_data) - - if isinstance(met_data, np.ma.MaskedArray): - is_int_data = dataplane.is_integer(met_data[0,0]) or dataplane.is_integer(met_data[int(nx/2),int(ny/2)]) - met_data = np.ma.masked_equal(met_data, float('nan')) - met_data = np.ma.masked_equal(met_data, float('inf')) - if fill_value is not None: - met_data = np.ma.masked_equal(met_data, fill_value) - met_data = met_data.filled(int(met_fill_value) if is_int_data else met_fill_value) - else: - logger.log_msg(f"{method_name} unknown datatype {type(met_data)}") - - if dataplane.KEEP_XARRAY: - return xr.DataArray(met_data,attrs=attrs) if from_xarray else met_data - else: - return met_data + return met_data def main(argv): From 6ca151131ab743c6efac51a6645bb464473b594b Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 22:03:33 +0000 Subject: [PATCH 03/19] #2772 Initial release, Separated from point.py --- scripts/python/met/point_nc.py | 293 +++++++++++++++++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100644 scripts/python/met/point_nc.py diff --git a/scripts/python/met/point_nc.py b/scripts/python/met/point_nc.py new file mode 100644 index 0000000000..37063bdb0d --- /dev/null +++ b/scripts/python/met/point_nc.py @@ -0,0 +1,293 @@ +''' +Created on Jan 10, 2024 + +@author: hsoh + +- This is a derived class to support a NetCDF format data. Separated from point.py +- The potential risk with the netCDF python package is the NetCDF library conflicts beteen MET and python3. + +''' + +import os + +import numpy as np +import netCDF4 as nc + +from met.point import met_point_obs, met_point_tools + +DO_PRINT_DATA = False +ARG_PRINT_DATA = "print_data" + + +def get_nc_point_obs(): + return nc_point_obs() + + +# Note: caller should import netCDF4 +# The argements nc_group(dataset) and nc_var should not be None +class met_point_nc_tools(met_point_tools): + + #met_missing = -99999999. + + @staticmethod + def get_num_array(nc_group, var_name): + nc_var = nc_group.variables.get(var_name, None) + return [] if nc_var is None else nc_var[:] + + @staticmethod + def get_nc_point_obs(): + return nc_point_obs() + + @staticmethod + def get_ncbyte_array_to_str(nc_var): + nc_str_data = nc_var[:] + if nc_var.datatype.name == 'bytes8': + nc_str_data = [ str(s.compressed(),"utf-8") for s in nc_var[:] ] + return nc_str_data + + @staticmethod + def get_string_array(nc_group, var_name): + nc_var = nc_group.variables.get(var_name, None) + return [] if nc_var is None else met_point_nc_tools.get_ncbyte_array_to_str(nc_var) + + +class nc_point_obs(met_point_obs): + + # args should be string, list, or dictionary + def get_nc_filename(self, args): + nc_filename = None + if isinstance(args, dict): + nc_filename = args.get('nc_name',None) + elif isinstance(args, list): + nc_filename = args[0] + elif args != ARG_PRINT_DATA: + nc_filename = args + + return nc_filename + + def read_data(self, nc_filename): + method_name = f"{self.__class__.__name__}.read_data()" + if nc_filename is None: + self.log_error_msg(f"{method_name} The input NetCDF filename is missing") + elif not os.path.exists(nc_filename): + self.log_error_msg(f"{method_name} input NetCDF file ({nc_filename}) does not exist") + else: + dataset = nc.Dataset(nc_filename, 'r') + + attr_name = 'use_var_id' + use_var_id_str = dataset.getncattr(attr_name) if attr_name in dataset.ncattrs() else "false" + self.use_var_id = use_var_id_str.lower() == 'true' + + # Header + self.hdr_typ = dataset['hdr_typ'][:] + self.hdr_sid = dataset['hdr_sid'][:] + self.hdr_vld = dataset['hdr_vld'][:] + self.hdr_lat = dataset['hdr_lat'][:] + self.hdr_lon = dataset['hdr_lon'][:] + self.hdr_elv = dataset['hdr_elv'][:] + self.hdr_typ_table = met_point_nc_tools.get_string_array(dataset, 'hdr_typ_table') + self.hdr_sid_table = met_point_nc_tools.get_string_array(dataset, 'hdr_sid_table') + self.hdr_vld_table = met_point_nc_tools.get_string_array(dataset, 'hdr_vld_table') + + nc_var = dataset.variables.get('obs_unit', None) + if nc_var: + self.obs_var_unit = nc_var[:] + nc_var = dataset.variables.get('obs_desc', None) + if nc_var: + self.obs_var_desc = nc_var[:] + + nc_var = dataset.variables.get('hdr_prpt_typ', None) + if nc_var: + self.hdr_prpt_typ = nc_var[:] + nc_var = dataset.variables.get('hdr_irpt_typ', None) + if nc_var: + self.hdr_irpt_typ = nc_var[:] + nc_var = dataset.variables.get('hdr_inst_typ', None) + if nc_var: + self.hdr_inst_typ =nc_var[:] + + #Observation data + self.hdr_sid = dataset['hdr_sid'][:] + self.obs_qty = np.array(dataset['obs_qty'][:]) + self.obs_hid = np.array(dataset['obs_hid'][:]) + self.obs_lvl = np.array(dataset['obs_lvl'][:]) + self.obs_hgt = np.array(dataset['obs_hgt'][:]) + self.obs_val = np.array(dataset['obs_val'][:]) + nc_var = dataset.variables.get('obs_vid', None) + if nc_var is None: + self.use_var_id = False + nc_var = dataset.variables.get('obs_gc', None) + else: + self.obs_var_table = met_point_nc_tools.get_string_array(dataset, 'obs_var') + if nc_var: + self.obs_vid = np.array(nc_var[:]) + + self.obs_qty_table = met_point_nc_tools.get_string_array(dataset, 'obs_qty_table') + + def save_ncfile(self, nc_filename): + met_data = self.get_point_data() + with nc.Dataset(nc_filename, 'w') as nc_dataset: + self.set_nc_data(nc_dataset) + return met_data + + def set_nc_data(self, nc_dataset): + return nc_point_obs.write_nc_data(nc_dataset, self) + + @staticmethod + def write_nc_file(nc_filename, point_obs): + with nc.Dataset(nc_filename, 'w') as nc_dataset: + nc_point_obs.write_nc_data(nc_dataset, point_obs) + + @staticmethod + def write_nc_data(nc_dataset, point_obs): + method_name = "point_nc.write_nc_data()" + try: + do_nothing = False + if 0 == point_obs.nhdr: + do_nothing = True + met_point_obs.info_message(f"{method_name} the header is empty") + if 0 == point_obs.nobs: + do_nothing = True + met_point_obs.info_message(f"{method_name} the observation data is empty") + if do_nothing: + print() + return + + # Set global attributes + nc_dataset.MET_Obs_version = "1.02" ; + nc_dataset.use_var_id = "true" if point_obs.use_var_id else "false" + + # Create dimensions + nc_dataset.createDimension('mxstr', 16) + nc_dataset.createDimension('mxstr2', 40) + nc_dataset.createDimension('mxstr3', 80) + nc_dataset.createDimension('nhdr', point_obs.nhdr) + nc_dataset.createDimension('nobs', point_obs.nobs) + #npbhdr = len(point_obs.hdr_prpt_typ) + if 0 < point_obs.npbhdr: + nc_dataset.createDimension('npbhdr', point_obs.npbhdr) + nc_dataset.createDimension('nhdr_typ', point_obs.nhdr_typ) + nc_dataset.createDimension('nhdr_sid', point_obs.nhdr_sid) + nc_dataset.createDimension('nhdr_vld', point_obs.nhdr_vld) + nc_dataset.createDimension('nobs_qty', point_obs.nobs_qty) + nc_dataset.createDimension('obs_var_num', point_obs.nobs_var) + + type_for_string = 'S1' # np.byte + dims_hdr = ('nhdr',) + dims_obs = ('nobs',) + + # Create header and observation variables + var_hdr_typ = nc_dataset.createVariable('hdr_typ', np.int32, dims_hdr, fill_value=-9999) + var_hdr_sid = nc_dataset.createVariable('hdr_sid', np.int32, dims_hdr, fill_value=-9999) + var_hdr_vld = nc_dataset.createVariable('hdr_vld', np.int32, dims_hdr, fill_value=-9999) + var_hdr_lat = nc_dataset.createVariable('hdr_lat', np.float32, dims_hdr, fill_value=-9999.) + var_hdr_lon = nc_dataset.createVariable('hdr_lon', np.float32, dims_hdr, fill_value=-9999.) + var_hdr_elv = nc_dataset.createVariable('hdr_elv', np.float32, dims_hdr, fill_value=-9999.) + + var_obs_qty = nc_dataset.createVariable('obs_qty', np.int32, dims_obs, fill_value=-9999) + var_obs_hid = nc_dataset.createVariable('obs_hid', np.int32, dims_obs, fill_value=-9999) + var_obs_vid = nc_dataset.createVariable('obs_vid', np.int32, dims_obs, fill_value=-9999) + var_obs_lvl = nc_dataset.createVariable('obs_lvl', np.float32, dims_obs, fill_value=-9999.) + var_obs_hgt = nc_dataset.createVariable('obs_hgt', np.float32, dims_obs, fill_value=-9999.) + var_obs_val = nc_dataset.createVariable('obs_val', np.float32, dims_obs, fill_value=-9999.) + + if 0 == point_obs.npbhdr: + var_hdr_prpt_typ = None + var_hdr_irpt_typ = None + var_hdr_inst_typ = None + else: + dims_npbhdr = ('npbhdr',) + var_hdr_prpt_typ = nc_dataset.createVariable('hdr_prpt_typ', np.int32, dims_npbhdr, fill_value=-9999.) + var_hdr_irpt_typ = nc_dataset.createVariable('hdr_irpt_typ', np.int32, dims_npbhdr, fill_value=-9999.) + var_hdr_inst_typ = nc_dataset.createVariable('hdr_inst_typ', np.int32, dims_npbhdr, fill_value=-9999.) + + var_hdr_typ_table = nc_dataset.createVariable('hdr_typ_table', type_for_string, ('nhdr_typ','mxstr2')) + var_hdr_sid_table = nc_dataset.createVariable('hdr_sid_table', type_for_string, ('nhdr_sid','mxstr2')) + var_hdr_vld_table = nc_dataset.createVariable('hdr_vld_table', type_for_string, ('nhdr_vld','mxstr')) + var_obs_qty_table = nc_dataset.createVariable('obs_qty_table', type_for_string, ('nobs_qty','mxstr')) + var_obs_var_table = nc_dataset.createVariable('obs_var', type_for_string, ('obs_var_num','mxstr2')) + var_obs_var_unit = nc_dataset.createVariable('obs_unit', type_for_string, ('obs_var_num','mxstr2')) + var_obs_var_desc = nc_dataset.createVariable('obs_desc', type_for_string, ('obs_var_num','mxstr3')) + + # Set variables + var_hdr_typ[:] = point_obs.hdr_typ[:] + var_hdr_sid[:] = point_obs.hdr_sid[:] + var_hdr_vld[:] = point_obs.hdr_vld[:] + var_hdr_lat[:] = point_obs.hdr_lat[:] + var_hdr_lon[:] = point_obs.hdr_lon[:] + var_hdr_elv[:] = point_obs.hdr_elv[:] + for i in range(0, point_obs.nhdr_typ): + for j in range(0, len(point_obs.hdr_typ_table[i])): + var_hdr_typ_table[i,j] = point_obs.hdr_typ_table[i][j] + for i in range(0, point_obs.nhdr_sid): + for j in range(0, len(point_obs.hdr_sid_table[i])): + var_hdr_sid_table[i,j] = point_obs.hdr_sid_table[i][j] + for i in range(0, point_obs.nhdr_vld): + for j in range(0, len(point_obs.hdr_vld_table[i])): + var_hdr_vld_table[i,j] = point_obs.hdr_vld_table[i][j] + if 0 < point_obs.npbhdr: + var_hdr_prpt_typ[:] = point_obs.hdr_prpt_typ[:] + var_hdr_irpt_typ[:] = point_obs.hdr_irpt_typ[:] + var_hdr_inst_typ[:] = point_obs.hdr_inst_typ[:] + + var_obs_qty[:] = point_obs.obs_qty[:] + var_obs_hid[:] = point_obs.obs_hid[:] + var_obs_vid[:] = point_obs.obs_vid[:] + var_obs_lvl[:] = point_obs.obs_lvl[:] + var_obs_hgt[:] = point_obs.obs_hgt[:] + var_obs_val[:] = point_obs.obs_val[:] + for i in range(0, point_obs.nobs_var): + for j in range(0, len(point_obs.obs_var_table[i])): + var_obs_var_table[i,j] = point_obs.obs_var_table[i][j] + var_obs_var_unit[i] = "" if i >= len(point_obs.obs_var_unit) else point_obs.obs_var_unit[i] + var_obs_var_desc[i] = "" if i >= len(point_obs.obs_var_desc) else point_obs.obs_var_desc[i] + for i in range(0, point_obs.nobs_qty): + for j in range(0, len(point_obs.obs_qty_table[i])): + var_obs_qty_table[i,j] = point_obs.obs_qty_table[i][j] + + # Set variable attributes + var_hdr_typ.long_name = "index of message type" + var_hdr_sid.long_name = "index of station identification" + var_hdr_vld.long_name = "index of valid time" + var_hdr_lat.long_name = "latitude" + var_hdr_lat.units = "degrees_north" + var_hdr_lon.long_name = "longitude" + var_hdr_lon.units = "degrees_east" + var_hdr_elv.long_name = "elevation" + var_hdr_elv.units = "meters above sea level (msl)" + + var_obs_qty.long_name = "index of quality flag" + var_obs_hid.long_name = "index of matching header data" + var_obs_vid.long_name = "index of BUFR variable corresponding to the observation type" + var_obs_lvl.long_name = "pressure level (hPa) or accumulation interval (sec)" + var_obs_hgt.long_name = "height in meters above sea level (msl)" + var_obs_val.long_name = "observation value" + var_hdr_typ_table.long_name = "message type" + var_hdr_sid_table.long_name = "station identification" + var_hdr_vld_table.long_name = "valid time" + var_hdr_vld_table.units = "YYYYMMDD_HHMMSS UTC" + var_obs_qty_table.long_name = "quality flag" + var_obs_var_table.long_name = "variable names" + var_obs_var_unit.long_name = "variable units" + var_obs_var_desc.long_name = "variable descriptions" + except: + print(f' === ERROR at {method_name} type(nc_dataset)={type(nc_dataset)} type(point_obs)={type(point_obs)}') + raise + + +def main(argv): + if len(argv) != 1 and argv[1] != ARG_PRINT_DATA: + netcdf_filename = argv[1] + tmp_nc_name = 'tmp_met_point.nc' + point_obs_data = nc_point_obs() + point_obs_data.read_data(point_obs_data.get_nc_filename(netcdf_filename)) + met_point_data = point_obs_data.save_ncfile(tmp_nc_name) + print(f'{met_point_tools.get_prompt()} saved met_point_data to {tmp_nc_name}') + met_point_data['met_point_data'] = point_obs_data + + if DO_PRINT_DATA or ARG_PRINT_DATA == argv[-1]: + point_obs_data.print_point_data(met_point_data) + +if __name__ == '__main__': + main() + print('Done python script') From 355b38477634ef614d4440239ee0a2744c45b405 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 22:04:10 +0000 Subject: [PATCH 04/19] #2772 Added point_nc.py --- scripts/python/met/Makefile.am | 1 + scripts/python/met/Makefile.in | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/python/met/Makefile.am b/scripts/python/met/Makefile.am index fd802449dd..c24b47247b 100644 --- a/scripts/python/met/Makefile.am +++ b/scripts/python/met/Makefile.am @@ -28,6 +28,7 @@ pythonmetscripts_DATA = \ logger.py \ dataplane.py \ mprbase.py \ + point_nc.py \ point.py EXTRA_DIST = ${pythonmetscripts_DATA} diff --git a/scripts/python/met/Makefile.in b/scripts/python/met/Makefile.in index fea84eace6..6a7570efa2 100644 --- a/scripts/python/met/Makefile.in +++ b/scripts/python/met/Makefile.in @@ -314,6 +314,7 @@ pythonmetscripts_DATA = \ logger.py \ dataplane.py \ mprbase.py \ + point_nc.py \ point.py EXTRA_DIST = ${pythonmetscripts_DATA} From 118a2becfca344d2a04466fb668cb8cd6b510435 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 22:26:07 +0000 Subject: [PATCH 05/19] #2772 Changed write_tmp_nc and read_tmp_nc to write_tmp_py and read_tmp_py --- .../vx_data2d_python/python_dataplane.cc | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/libcode/vx_data2d_python/python_dataplane.cc b/src/libcode/vx_data2d_python/python_dataplane.cc index 118f2b45aa..1c5c0a1e31 100644 --- a/src/libcode/vx_data2d_python/python_dataplane.cc +++ b/src/libcode/vx_data2d_python/python_dataplane.cc @@ -31,9 +31,9 @@ extern GlobalPython GP; // this needs external linkage static const char * user_ppath = nullptr; -static const char write_tmp_nc [] = "MET_BASE/python/pyembed/write_tmp_dataplane.py"; +static const char write_tmp_py [] = "MET_BASE/python/pyembed/write_tmp_dataplane.py"; -static const char read_tmp_nc [] = "MET_BASE/python/pyembed/read_tmp_dataplane.py"; +static const char read_tmp_py [] = "MET_BASE/python/pyembed/read_tmp_dataplane.py"; static const char tmp_nc_var_name [] = "met_info"; @@ -48,10 +48,10 @@ static bool straight_python_dataplane(const char * script_name, Grid & met_grid_out, VarInfoPython &vinfo); -static bool tmp_nc_dataplane(const char * script_name, - int script_argc, char ** script_argv, - const bool use_xarray, DataPlane & met_dp_out, - Grid & met_grid_out, VarInfoPython &vinfo); +static bool tmp_dataplane(const char * script_name, + int script_argc, char ** script_argv, + const bool use_xarray, DataPlane & met_dp_out, + Grid & met_grid_out, VarInfoPython &vinfo); //////////////////////////////////////////////////////////////////////// @@ -83,10 +83,10 @@ bool status = false; if ( (user_ppath = getenv(user_python_path_env)) != 0 ) { // do_tmp_nc = true; - status = tmp_nc_dataplane(user_script_name, - user_script_argc, user_script_argv, - use_xarray, met_dp_out, - met_grid_out, vinfo); + status = tmp_dataplane(user_script_name, + user_script_argc, user_script_argv, + use_xarray, met_dp_out, + met_grid_out, vinfo); } else { @@ -309,10 +309,10 @@ return ( true ); //////////////////////////////////////////////////////////////////////// -bool tmp_nc_dataplane(const char * user_script_name, - int user_script_argc, char ** user_script_argv, - const bool use_xarray, DataPlane & met_dp_out, - Grid & met_grid_out, VarInfoPython &vinfo) +bool tmp_dataplane(const char * user_script_name, + int user_script_argc, char ** user_script_argv, + const bool use_xarray, DataPlane & met_dp_out, + Grid & met_grid_out, VarInfoPython &vinfo) { @@ -334,13 +334,13 @@ if ( ! tmp_dir ) tmp_dir = default_tmp_dir; path << cs_erase << tmp_dir << '/' - << tmp_nc_base_name; + << tmp_py_base_name; tmp_nc_path = make_temp_file_name(path.text(), 0); command << cs_erase << user_ppath << ' ' // user's path to python - << replace_path(write_tmp_nc) << ' ' // write_tmp_nc.py + << replace_path(write_tmp_py) << ' ' // write_tmp_nc.py << tmp_nc_path << ' ' // tmp_nc output filename << user_script_name; // user's script name @@ -397,7 +397,7 @@ StringArray a; a.add(validate_dataplane); -a.add(replace_path(read_tmp_nc)); +a.add(replace_path(read_tmp_py)); a.add(tmp_nc_path); @@ -412,7 +412,7 @@ mlog << Debug(4) << "Reading temporary Python dataplane file: " // import the python wrapper script as a module // -//path = get_short_name(read_tmp_nc); +//path = get_short_name(read_tmp_py); path = get_short_name(validate_dataplane); PyObject * module_obj = PyImport_ImportModule (path.text()); From 1cba5ec3e1cc09cc95fba40ce1193c1bcbfa56ff Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 22:29:06 +0000 Subject: [PATCH 06/19] #2772 Removed python_key_point_data & python_key_point_data_list and replaced them to tmp_point_var_name and tmp_point_data --- .../vx_pointdata_python/python_pointdata.cc | 14 +++++++------- src/libcode/vx_pointdata_python/python_pointdata.h | 3 --- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/libcode/vx_pointdata_python/python_pointdata.cc b/src/libcode/vx_pointdata_python/python_pointdata.cc index 1b34915bf8..acf6ce030c 100644 --- a/src/libcode/vx_pointdata_python/python_pointdata.cc +++ b/src/libcode/vx_pointdata_python/python_pointdata.cc @@ -543,18 +543,18 @@ if ( ! module_obj ) { } bool result = false; -PyObject *met_point_data = get_python_object(module_obj, python_key_point_data); +PyObject *met_point_data = get_python_object(module_obj, tmp_point_var_name); if ( met_point_data && met_point_data != &_Py_NoneStruct) { result = process_point_data(met_point_data, met_pd_out); } else { - PyObject *point_data = get_python_object(module_obj, python_key_point_data_list); + PyObject *point_data = get_python_object(module_obj, tmp_point_data); if ( point_data && point_data != &_Py_NoneStruct) result = process_point_data_list(point_data, met_pd_out, filters); else { mlog << Warning << "\n" << method_name - << "no \"" << python_key_point_data << "\" and \"" - << python_key_point_data_list << "\" from " + << "no \"" << tmp_point_var_name << "\" and \"" + << tmp_point_data << "\" from " << script_name << "\"\n\n"; } } @@ -626,7 +626,7 @@ if ( ! tmp_dir ) tmp_dir = default_tmp_dir; path << cs_erase << tmp_dir << '/' - << tmp_nc_base_name; + << tmp_py_base_name; tmp_nc_path = make_temp_file_name(path.text(), 0); @@ -722,12 +722,12 @@ if ( ! module_obj ) { // -PyObject *met_point_data = get_python_object(module_obj, python_key_point_data); +PyObject *met_point_data = get_python_object(module_obj, tmp_point_var_name); if ( met_point_data ) { process_point_data(met_point_data, met_pd_out); } else { - PyObject *point_data = get_python_object(module_obj, python_key_point_data_list); + PyObject *point_data = get_python_object(module_obj, tmp_point_data); process_point_data_list(point_data, met_pd_out, filters); } diff --git a/src/libcode/vx_pointdata_python/python_pointdata.h b/src/libcode/vx_pointdata_python/python_pointdata.h index 284421b17c..409d2ad6c0 100644 --- a/src/libcode/vx_pointdata_python/python_pointdata.h +++ b/src/libcode/vx_pointdata_python/python_pointdata.h @@ -29,9 +29,6 @@ extern "C" { //////////////////////////////////////////////////////////////////////// -static const char python_key_point_data [] = "met_point_data"; -static const char python_key_point_data_list[] = "point_data"; - static const char python_key_nhdr [] = "nhdr"; //static const char python_key_npbhdr [] = "npbhdr"; static const char python_use_var_id [] = "use_var_id"; From d64d12766ac771fae712c92afab790a8ce5acff8 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 22:30:33 +0000 Subject: [PATCH 07/19] #2772 Renamed tmp_nc_base_name, tmp_nc_file_var_name & tmp_nc_point_var_name to tmp_py_base_name, tmp_file_var_name, & tmp_point_var_name --- src/libcode/vx_python3_utils/python3_util.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/libcode/vx_python3_utils/python3_util.h b/src/libcode/vx_python3_utils/python3_util.h index 33a5ed63b3..9924f9f89f 100644 --- a/src/libcode/vx_python3_utils/python3_util.h +++ b/src/libcode/vx_python3_utils/python3_util.h @@ -34,11 +34,13 @@ static const char wrappers_dir [] = "MET_BASE/wrappers"; static const char python_dir [] = "MET_BASE/python"; -static const char tmp_nc_base_name [] = "tmp_met_nc"; +static const char tmp_py_base_name [] = "tmp_met_data"; -static const char tmp_nc_file_var_name [] = "tmp_nc_filename"; +static const char tmp_file_var_name [] = "tmp_py_filename"; -static const char tmp_nc_point_var_name[] = "met_point_data"; +static const char tmp_point_var_name [] = "met_point_data"; + +static const char tmp_point_data [] = "point_data"; //////////////////////////////////////////////////////////////////////// From de19d84dac3b66a0bc7357b961810f32f974c000 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 22:57:18 +0000 Subject: [PATCH 08/19] #2772 More log messages for error --- scripts/python/examples/read_ascii_numpy_grid.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/python/examples/read_ascii_numpy_grid.py b/scripts/python/examples/read_ascii_numpy_grid.py index 79e6829052..7ca2b3b6b6 100644 --- a/scripts/python/examples/read_ascii_numpy_grid.py +++ b/scripts/python/examples/read_ascii_numpy_grid.py @@ -27,8 +27,11 @@ met_data = dataplane.read_2d_text_input(input_file) print("Data Shape:\t" + repr(met_data.shape)) print("Data Type:\t" + repr(met_data.dtype)) -except NameError: - print("Can't find the input file") +except NameError as ex: + print(" === ERROR from read_ascii_numpy_grid.py") + print(f" Exception: {type(ex)} {ex}") + print(f" sys.argv: {sys.argv}") + print(" Can't find the input file") # attrs is a dictionary which contains attributes describing the dataplane. # attrs should have 9 items, each of data type string: From df63d2ad7fe0367557012f4c589a59553dff6d18 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 22:58:08 +0000 Subject: [PATCH 09/19] #2772 Changed API (log_msg to log_message) --- scripts/python/examples/read_ascii_numpy.py | 4 ++-- scripts/python/examples/read_ascii_xarray.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/python/examples/read_ascii_numpy.py b/scripts/python/examples/read_ascii_numpy.py index 3c6310cec2..5692472b10 100644 --- a/scripts/python/examples/read_ascii_numpy.py +++ b/scripts/python/examples/read_ascii_numpy.py @@ -5,7 +5,7 @@ ########################################### def log(msg): - dataplane.log_msg(msg) + dataplane.log_message(msg) def set_dataplane_attrs(): # attrs is a dictionary which contains attributes describing the dataplane. @@ -95,5 +95,5 @@ def set_dataplane_attrs(): attrs = set_dataplane_attrs() log("Attributes:\t" + repr(attrs)) -# Sets fill_value if it exists +# Sets fill_value if it exists at the dataplane #attrs['fill_value'] = 255 # for letter.txt diff --git a/scripts/python/examples/read_ascii_xarray.py b/scripts/python/examples/read_ascii_xarray.py index e4ba1f9a28..f897982f07 100644 --- a/scripts/python/examples/read_ascii_xarray.py +++ b/scripts/python/examples/read_ascii_xarray.py @@ -6,7 +6,7 @@ ########################################### def log(msg): - dataplane.log_msg(msg) + dataplane.log_message(msg) log("Python Script:\t" + repr(sys.argv[0])) From be02853fc1d827e0099bbb2ad759d4c90fd2fe09 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 22:59:24 +0000 Subject: [PATCH 10/19] #2772 Use met_point_nc_tools instead of met_point_tools --- scripts/python/examples/read_met_point_obs.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/python/examples/read_met_point_obs.py b/scripts/python/examples/read_met_point_obs.py index e16ccf2d86..b8c8cb3db3 100644 --- a/scripts/python/examples/read_met_point_obs.py +++ b/scripts/python/examples/read_met_point_obs.py @@ -21,6 +21,7 @@ from datetime import datetime from met.point import met_point_tools +from met.point_nc import met_point_nc_tools from pyembed.python_embedding import pyembed_tools ARG_PRINT_DATA = 'show_data' @@ -44,14 +45,15 @@ netcdf_filename = os.path.expandvars(input_name) args = [ netcdf_filename ] #args = { 'nc_name': netcdf_filename } - point_obs_data = met_point_tools.get_nc_point_obs() + point_obs_data = met_point_nc_tools.get_nc_point_obs() point_obs_data.read_data(point_obs_data.get_nc_filename(args)) if point_obs_data is not None: met_point_data = point_obs_data.get_point_data() met_point_data['met_point_data'] = point_obs_data - print("met_point_data: ", met_point_data) - print(met_point_data) + if os.getenv("MET_PYTHON_DEBUG", "") != "": + print("met_point_data: ", met_point_data) + print(met_point_data) if DO_PRINT_DATA: point_obs_data.dump() From 549ff3dd2aebacb56551375006df10ecf3b929d5 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 23:00:09 +0000 Subject: [PATCH 11/19] #2772 Changed APIs --- scripts/python/pyembed/read_tmp_point_nc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/python/pyembed/read_tmp_point_nc.py b/scripts/python/pyembed/read_tmp_point_nc.py index 622405c520..d3e0f13cb7 100644 --- a/scripts/python/pyembed/read_tmp_point_nc.py +++ b/scripts/python/pyembed/read_tmp_point_nc.py @@ -8,7 +8,7 @@ import sys -from met.point import met_point_tools +from met.point import get_empty_point_obs, met_point_tools try: from python_embedding import pyembed_tools except: @@ -19,8 +19,8 @@ # read NetCDF file print('{p} reading {f}'.format(p=met_point_tools.get_prompt(), f=input_filename)) try: - point_obs_data = met_point_tools.get_nc_point_obs() - point_obs_data.read_data(input_filename) + point_obs_data = get_empty_point_obs() + point_obs_data.read_point_data(input_filename) met_point_data = point_obs_data.get_point_data() met_point_data['met_point_data'] = point_obs_data From 858a484a9be2afb7d5a3f12e82777494671357fd Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 23:01:43 +0000 Subject: [PATCH 12/19] #2772 Changed API --- scripts/python/pyembed/write_tmp_point_nc.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scripts/python/pyembed/write_tmp_point_nc.py b/scripts/python/pyembed/write_tmp_point_nc.py index 41f380de77..fc828e3237 100644 --- a/scripts/python/pyembed/write_tmp_point_nc.py +++ b/scripts/python/pyembed/write_tmp_point_nc.py @@ -17,20 +17,24 @@ except: from pyembed.python_embedding import pyembed_tools -from met.point import met_point_tools +try: + from point import get_empty_point_obs +except: + from met.point import get_empty_point_obs + if __name__ == '__main__': tmp_filename = sys.argv[1] met_in = pyembed_tools.call_python(sys.argv) - if hasattr(met_in, 'point_data'): + if hasattr(met_in, 'point_obs_data'): + met_in.point_obs_data.write_point_data(tmp_filename) + elif hasattr(met_in, 'point_data'): pyembed_tools.write_tmp_ascii(tmp_filename, met_in.point_data) - elif hasattr(met_in, 'point_obs_data'): - met_in.point_obs_data.save_ncfile(tmp_filename) else: if hasattr(met_in.met_point_data, 'point_obs_data'): - met_in.met_point_data['point_obs_data'].save_ncfile(tmp_filename) + met_in.met_point_data['point_obs_data'].write_point_data(tmp_filename) else: - tmp_point_obs = met_point_tools.get_nc_point_obs() + tmp_point_obs = get_empty_point_obs() tmp_point_obs.put_data(met_in.met_point_data) tmp_point_obs.save_ncfile(tmp_filename) From 6a54b1bbd66113f60e4f62953cbdb789affd3a94 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 23:02:58 +0000 Subject: [PATCH 13/19] #2772 Changed default temp output format to JSON ande numpoy serialization --- scripts/python/met/dataplane.py | 168 ++++++-- scripts/python/met/logger.py | 152 ++++++- scripts/python/met/point.py | 703 ++++++++++++-------------------- 3 files changed, 541 insertions(+), 482 deletions(-) diff --git a/scripts/python/met/dataplane.py b/scripts/python/met/dataplane.py index 1a8b834c73..8f3d1ac37c 100644 --- a/scripts/python/met/dataplane.py +++ b/scripts/python/met/dataplane.py @@ -1,32 +1,30 @@ import os import sys import json -import re import numpy as np import xarray as xr from importlib import util as import_util -from met.logger import logger +from met.logger import met_base, met_base_tools ########################################### -class dataplane(logger): +class dataplane(met_base): KEEP_XARRAY = True class_name = "dataplane" - MET_FILL_VALUE = -9999. ATTR_USER_FILL_VALUE = 'user_fill_value' @staticmethod def call_python(argv): # argv[0] is the python wrapper script (caller) - logger.log_msg(f"Module:\t{repr(argv[0])}") + met_base.log_message(f"Module:\t{repr(argv[0])}") if 1 == len(argv): - logger.quit_msg(f"User python command is missing") + met_base.quit_msg(f"User python command is missing") sys.exit(1) - logger.log_msg(f"User python command:\t{repr(' '.join(argv[1:]))}") + met_base.log_message(f"User python command:\t{repr(' '.join(argv[1:]))}") # argv[1] contains the user defined python script pyembed_module_name = argv[1] @@ -48,9 +46,10 @@ def call_python(argv): sys.argv = argv_org # restore sys.argv return met_in - @staticmethod - def get_tmp_numpy_filename(tmp_filename): - return re.sub(".json$", ".npy", tmp_filename) if tmp_filename.endswith(".json") else f'{tmp_filename}.npy' + #@staticmethod + #def get_numpy_filename(tmp_filename): + # return met_base_tools.replace_extension(tmp_filename, "json", "npy") if tmp_filename.endswith(".json") else \ + # met_base_tools.replace_extension(tmp_filename, "nc", "npy") if tmp_filename.endswith(".nc") else f'{tmp_filename}.npy' @staticmethod def is_integer(a_data): @@ -108,47 +107,78 @@ def read_2d_text_input(input_file): @staticmethod def read_dataplane(tmp_filename): + # Default is JSON for attributes and NUMPY serialization for 2D array + return dataplane.read_dataplane_nc(tmp_filename) if met_base_tools.use_netcdf_format() \ + else dataplane.read_dataplane_json_numpy(tmp_filename) + + @staticmethod + def read_dataplane_json_numpy(tmp_filename): + if met_base_tools.is_debug_enabled("dataplane"): + met_base.log_message(f"Read from a temporary JSON file and a temporary numpy output (dataplane)") + met_info = {} - with open(tmp_filename) as json_h: - met_info['attrs'] = json.load(json_h) + with open(tmp_filename) as json_fh: + met_info['attrs'] = json.load(json_fh) # read 2D numeric data - numpy_dump_name = dataplane.get_tmp_numpy_filename(tmp_filename) + numpy_dump_name = met_base_tools.get_numpy_filename(tmp_filename) met_dp_data = np.load(numpy_dump_name) met_info['met_data'] = met_dp_data return met_info @staticmethod - def write_dataplane(met_in, tmp_filename): - if hasattr(met_in.met_data, 'attrs') and met_in.met_data.attrs: - attrs = met_in.met_data.attrs - else: - attrs = met_in.attrs - with open(tmp_filename,'w') as json_h: - json.dump(attrs, json_h) + def read_dataplane_nc(netcdf_filename): + import netCDF4 as nc - if isinstance(met_in.met_data, (np.ma.MaskedArray, np.ma.core.MaskedArray)): - met_dp_data = np.ma.getdata(met_in.met_data, subok=False) - elif isinstance(met_in.met_data, np.ndarray): - met_dp_data = met_in.met_data - else: - met_dp_data = np.array(met_in.met_data) - numpy_dump_name = dataplane.get_tmp_numpy_filename(tmp_filename) - np.save(numpy_dump_name, met_dp_data) + if met_base_tools.is_debug_enabled("dataplane"): + met_base.log_message(f"Read from a temporary NetCDF file (dataplane)") + + # read NetCDF file + ds = nc.Dataset(netcdf_filename, 'r') + + dp = ds['met_data'] + met_data = dp[:] + attr_name = dataplane.ATTR_USER_FILL_VALUE + user_fill_value = dp.getncattr(attr_name) if hasattr(dp, attr_name) else None + + met_attrs = {} + + # grid is defined as a dictionary or string + grid = {} + for attr, attr_val in ds.__dict__.items(): + if 'grid.' in attr: + grid_attr = attr.split('.')[1] + grid[grid_attr] = attr_val + else: + met_attrs[attr] = attr_val + + if grid: + met_attrs['grid'] = grid + + met_attrs['name'] = met_attrs['name_str'] + del met_attrs['name_str'] + + met_info = {} + met_info['met_data'] = met_data + if user_fill_value is not None: + met_attrs['fill_value'] = user_fill_value + met_info['attrs'] = met_attrs + + return met_info @staticmethod def validate_met_data(met_data, fill_value=None): method_name = f"{dataplane.class_name}.validate()" - #logger.log_msg(f"{method_name} type(met_data)= {type(met_data)}") + #met_base.log_msg(f"{method_name} type(met_data)= {type(met_data)}") attrs = None from_xarray = False from_ndarray = False if met_data is None: - logger.quit(f"{method_name} The met_data is None") + met_base.quit(f"{method_name} The met_data is None") sys.exit(1) nx, ny = met_data.shape - met_fill_value = dataplane.MET_FILL_VALUE + met_fill_value = met_base.MET_FILL_VALUE if dataplane.is_xarray_dataarray(met_data): from_xarray = True attrs = met_data.attrs @@ -166,13 +196,79 @@ def validate_met_data(met_data, fill_value=None): met_data = np.ma.masked_equal(met_data, fill_value) met_data = met_data.filled(int(met_fill_value) if is_int_data else met_fill_value) else: - logger.log_msg(f"{method_name} unknown datatype {type(met_data)}") + met_base.log_message(f"{method_name} unknown datatype {type(met_data)}") if dataplane.KEEP_XARRAY: return xr.DataArray(met_data,attrs=attrs) if from_xarray else met_data else: return met_data + @staticmethod + def write_dataplane(met_in, tmp_filename): + # Default is JSON for attributes and NUMPY serialization for 2D array + if met_base_tools.use_netcdf_format(): + dataplane.write_dataplane_nc(met_in, tmp_filename) + else: + dataplane.write_dataplane_json_numpy(met_in, tmp_filename) + + @staticmethod + def write_dataplane_json_numpy(met_in, tmp_filename): + if met_base_tools.is_debug_enabled("dataplane"): + met_base.log_message(f"Save to a temporary JSON file and a temporary numpy output (dataplane)") + if hasattr(met_in.met_data, 'attrs') and met_in.met_data.attrs: + attrs = met_in.met_data.attrs + else: + attrs = met_in.attrs + with open(tmp_filename,'w') as json_fh: + json.dump(attrs, json_fh) + + met_dp_data = met_base_tools.convert_to_ndarray(met_in.met_data) + numpy_dump_name = met_base_tools.get_numpy_filename(tmp_filename) + np.save(numpy_dump_name, met_dp_data) + + @staticmethod + def write_dataplane_nc(met_in, netcdf_filename): + import netCDF4 as nc + + if met_base_tools.is_debug_enabled("dataplane"): + met_base.log_message(f"Save to a temporary NetCDF file (dataplane)") + + met_info = {'met_data': met_in.met_data} + if hasattr(met_in.met_data, 'attrs') and met_in.met_data.attrs: + attrs = met_in.met_data.attrs + else: + attrs = met_in.attrs + met_info['attrs'] = attrs + + # write NetCDF file + ds = nc.Dataset(netcdf_filename, 'w') + + # create dimensions and variable + nx, ny = met_in.met_data.shape + ds.createDimension('x', nx) + ds.createDimension('y', ny) + dp = ds.createVariable('met_data', met_in.met_data.dtype, ('x', 'y'), + fill_value=dataplane.MET_FILL_VALUE) + dp[:] = met_in.met_data + + # append attributes + for attr, attr_val in met_info['attrs'].items(): + if attr_val is None: + continue + + if attr == 'name': + setattr(ds, 'name_str', attr_val) + elif attr == 'fill_value': + setattr(dp, dataplane.ATTR_USER_FILL_VALUE, attr_val) + elif type(attr_val) == dict: + for key in attr_val: + setattr(ds, attr + '.' + key, attr_val[key]) + else: + setattr(ds, attr, attr_val) + + ds.close() + + def main(argv): global attrs, met_data, met_info @@ -196,14 +292,14 @@ def main(argv): fill_value = met_in.user_fill_value fill_value = attrs.get('fill_value', None) - dataplane.log_msg('validating the dataplane array...') + met_base.log_message('validating the dataplane array...') met_data = dataplane.validate_met_data(init_met_data, fill_value) met_info['met_data'] = met_data if os.environ.get('MET_PYTHON_DEBUG', None) is not None: - dataplane.log_msg('--- met_data after validating ---') - dataplane.log_msg(met_data) + met_base.log_message('--- met_data after validating ---') + met_base.log_message(met_data) if __name__ == '__main__' or __name__ == sys.argv[0]: main(sys.argv) - dataplane.log_msg(f'{__name__} complete') + met_base.log_message(f'{__name__} complete') diff --git a/scripts/python/met/logger.py b/scripts/python/met/logger.py index 418a39b0d1..db58286e07 100644 --- a/scripts/python/met/logger.py +++ b/scripts/python/met/logger.py @@ -1,35 +1,40 @@ ########################################### +import os import sys +import re -class logger(): +import numpy as np - PROMPT= " PYTHON:" - ERROR_PROMPT= "ERROR" +class logger(): - ## - ## create the metadata dictionary - ## + PROMPT = " PYTHON:" + ERROR_P = " ==PYTHON_ERROR==" + INFO_P = " ==PYTHON_INFO==" @staticmethod def append_error_prompt(msg): - return f'{logger.ERROR_PROMPT}: {msg}' + return f'{logger.ERROR_P}: {msg}' @staticmethod - def error_msg(msg): + def error_messageg(msg): msgs = msg if isinstance(msg, list) else [msg] msgs.insert(0, '') msgs.append('') for a_msg in msgs: - logger.log_msg(logger.append_error_prompt(a_msg)) + logger.log_message(logger.append_error_prompt(a_msg)) #@staticmethod #def get_met_fill_value(): # return logger.MET_FILL_VALUE @staticmethod - def log_msg(msg): + def info_message(msg): + print(f'{logger.PROMPT} {logger.INFO_P} {msg}') + + @staticmethod + def log_message(msg): print(f'{logger.PROMPT} {msg}') @staticmethod @@ -40,4 +45,129 @@ def quit(msg, do_quit=True): @staticmethod def quit_msg(msg): - logger.error_msg([msg, "Quit..."]) + logger.error_message([msg, "Quit..."]) + + +class met_base(logger): + + MET_FILL_VALUE = -9999. + + def convert_to_array(self, ndarray_data): + return met_base_tools.convert_to_array(ndarray_data) + + def convert_to_ndarray(self, array_data): + return met_base_tools.convert_to_ndarray(array_data) + + def get_met_fill_value(self): + return met_base.MET_FILL_VALUE + + def error_msg(self, msg): + logger.error_messageg(msg) + + def get_prompt(self): + return met_base_tools.get_prompt() + + def info_msg(self, msg): + logger.info_message(msg) + + def is_numpy_array(self, var_data): + return isinstance(var_data, np.ndarray) + + def log_msg(self, msg): + logger.log_message(msg) + + @staticmethod + def get_numpy_filename(tmp_filename): + return logger.replace_extension(tmp_filename, "json", "npy") if tmp_filename.endswith(".json") else \ + logger.replace_extension(tmp_filename, "nc", "npy") if tmp_filename.endswith(".nc") else f'{tmp_filename}.npy' + + def is_debug_enabled(self, component_name=""): + return met_base_tools.is_debug_enabled(component_name) + + def replace_extension(self, file_name, from_ext, to_ext): + return met_base_tools.replace_extension(file_name, from_ext, to_ext) + + def remove_file(self, file_name): + os.remove(file_name) + + def use_netcdf_format(self): + return met_base_tools.use_netcdf_format() + +class met_base_tools(object): + + ENV_MET_KEEP_TEMP_FILE = "MET_KEEP_TEMP_FILE" + ENV_MET_PYTHON_DEBUG = "MET_PYTHON_DEBUG" + ENV_MET_PYTHON_TMP_FORMAT = "MET_PYTHON_TMP_FORMAT" + + @staticmethod + def convert_to_array(ndarray_data): + is_byte_type = False + if 0 < len(ndarray_data): + is_byte_type = isinstance(ndarray_data[0], (bytes, np.bytes_)) + if isinstance(ndarray_data[0], np.ndarray): + if 0 < len(ndarray_data[0]): + is_byte_type = isinstance(ndarray_data[0][0], (bytes, np.bytes_)) + if is_byte_type: + array_data = [] + if isinstance(ndarray_data[0], (np.ma.MaskedArray, np.ma.core.MaskedArray)): + for byte_data in ndarray_data: + array_data.append(byte_data.tobytes(fill_value=' ').decode('utf-8').rstrip()) + else: + for byte_data in ndarray_data: + array_data.append(byte_data.decode("utf-8").rstrip()) + elif isinstance(ndarray_data, (np.ma.MaskedArray, np.ma.core.MaskedArray)): + array_data = np.ma.getdata(ndarray_data, subok=False).tolist() + elif isinstance(ndarray_data, np.ndarray): + array_data = ndarray_data.tolist() + else: + array_data = ndarray_data + return array_data + + @staticmethod + def convert_to_ndarray(array_data): + if isinstance(array_data, (np.ma.MaskedArray, np.ma.core.MaskedArray)): + ndarray_data = np.ma.getdata(array_data, subok=False) + elif isinstance(array_data, np.ndarray): + ndarray_data = array_data + else: + ndarray_data = np.array(array_data) + return ndarray_data + + @staticmethod + def get_numpy_filename(tmp_filename): + return logger.replace_extension(tmp_filename, "json", "npy") if tmp_filename.endswith(".json") else \ + logger.replace_extension(tmp_filename, "txt", "npy") if tmp_filename.endswith(".txt") else \ + logger.replace_extension(tmp_filename, "nc", "npy") if tmp_filename.endswith(".nc") else f'{tmp_filename}.npy' + + @staticmethod + def get_prompt(): + return logger.PROMPT + + @staticmethod + def is_debug_enabled(component_name=""): + env_value = os.getenv(met_base_tools.ENV_MET_PYTHON_DEBUG, "").lower() + return env_value == "all" or env_value == component_name.lower() + + @staticmethod + def keep_temp_file(): + env_value = os.getenv(met_base_tools.ENV_MET_KEEP_TEMP_FILE, "") + return env_value.lower() == "true" or env_value.lower() == "yes" + + @staticmethod + def replace_extension(file_name, from_ext, to_ext): + return re.sub(f".{from_ext}$", f".{to_ext}", file_name) + + @staticmethod + def remove_file(file_name): + if os.path.exists(file_name): + os.remove(file_name) + + @staticmethod + def remove_temp_file(file_name): + if not met_base_tools.keep_temp_file(): + met_base_tools.remove_file(file_name) + + @staticmethod + def use_netcdf_format(): + env_value = os.getenv(met_base_tools.ENV_MET_PYTHON_TMP_FORMAT, "") + return env_value.lower() == "netcdf" diff --git a/scripts/python/met/point.py b/scripts/python/met/point.py index fbfb112f51..a2d1619ec1 100644 --- a/scripts/python/met/point.py +++ b/scripts/python/met/point.py @@ -46,24 +46,24 @@ def read_data(data_filename): ''' import os +import json from abc import ABC, abstractmethod import numpy as np -import netCDF4 as nc import pandas as pd +from met.logger import met_base, met_base_tools + + COUNT_SHOW = 30 -class base_met_point_obs(object): + +class met_base_point(met_base): ''' classdocs ''' - ERROR_P = " ==PYTHON_ERROR==" - INFO_P = " ==PYTHON_INFO==" - python_prefix = 'PYTHON_POINT_USER' - - FILL_VALUE = -9999. + COMPONENT_NAME = "met_point" def __init__(self, use_var_id=True): ''' @@ -73,7 +73,7 @@ def __init__(self, use_var_id=True): self.input_name = None self.ignore_input_file = False self.use_var_id = use_var_id # True if variable index, False if GRIB code - self.error_msg = "" + self.error_msgs = "" self.has_error = False # Header @@ -114,11 +114,11 @@ def __init__(self, use_var_id=True): def add_error_msg(self, error_msg): self.has_error = True - self.log_error_msg(error_msg) - if 0 == len(self.error_msg): - self.error_msg = error_msg + self.error_msg(error_msg) + if 0 == len(self.error_msgs): + self.error_msgs = error_msg else: - self.error_msg = "{m1}\n{m2}".format(m1=self.error_msg, m2=error_msg) + self.error_msgs = "{m1}\n{m2}".format(m1=self.error_msgs, m2=error_msg) def add_error_msgs(self, error_msgs): self.has_error = True @@ -126,43 +126,40 @@ def add_error_msgs(self, error_msgs): self.add_error_msg(error_msg) def check_data_member_float(self, local_var, var_name): + method_name = f"{self.__class__.__name__}.check_data_member_float()" if 0 == len(local_var): - self.add_error_msg("{v} is empty (float)".format(v=var_name)) + self.add_error_msg(f"{method_name} {var_name} is empty") elif isinstance(local_var, list): if isinstance(local_var[0], str) and not self.is_number(local_var[0]): - self.add_error_msg("Not supported data type: {n}[0]={v}, string type, not a number (int or float only)".format( - n=var_name, v=local_var[0])) + self.add_error_msg(f"{method_name} Not supported data type: {type(local_var[0])} for {var_name}[0], string type, not a number (int or float only)") elif 0 > str(type(local_var[0])).find('numpy') and not isinstance(local_var[0], (int, float)): - self.add_error_msg("Not supported data type ({t}) for {v}[0] (int or float only)".format( - v=var_name, t=type(local_var[0]))) + self.add_error_msg(f"{method_name} Not supported data type: {type(local_var[0])} for {var_name}[0] (int or float only)") elif not self.is_numpy_array(local_var): - self.add_error_msg("Not supported data type ({t}) for {v} (list and numpy.ndarray)".format( - v=var_name, t=type(local_var))) + self.add_error_msg(f"{method_name} Not supported data type ({type(local_var)}) for {var_name} (list and numpy.ndarray)") def check_data_member_int(self, local_var, var_name): + method_name = f"{self.__class__.__name__}.check_data_member_int()" if 0 == len(local_var): - self.add_error_msg("{v} is empty (int)".format(v=var_name)) + self.add_error_msg(f"{method_name} {var_name} is empty") elif isinstance(local_var, list): if isinstance(local_var[0], str) and not self.is_number(local_var[0]): - self.add_error_msg("Not supported data type: {n}[0]={v}, string type, not a number (int only)".format( - n=var_name, v=local_var[0])) + self.add_error_msg(f"{method_name} Not supported data type: {type(local_var[0])} for {var_name}[0], string type, not a number (int only)") elif 0 > str(type(local_var[0])).find('numpy') and not isinstance(local_var[0], int): - self.add_error_msg("Not supported data type ({t}) for {v}[0] (int only)".format( - v=var_name, t=type(local_var[0]))) + self.add_error_msg(f"{method_name} Not supported data type: {type(local_var[0])} for {var_name}[0] (int only)") elif not self.is_numpy_array(local_var): - self.add_error_msg("Not supported data type ({t}) for {v} (list and numpy.ndarray)".format( - v=var_name, t=type(local_var))) + self.add_error_msg(f"{method_name} Not supported data type ({type(local_var)}) for {var_name} (list and numpy.ndarray)") def check_data_member_string(self, local_var, var_name): + method_name = f"{self.__class__.__name__}.check_data_member_string()" if 0 == len(local_var): - self.add_error_msg("{v} is empty (string)".format(v=var_name)) + self.add_error_msg(f"{method_name} {var_name} is empty") elif not isinstance(local_var, (list)): - self.add_error_msg("Not supported data type ({t}) for {v} (list)".format( - v=var_name, t=type(local_var))) + self.add_error_msg(f"{method_name} Not supported data type ({type(local_var)}) for {var_name} (list)") def check_point_data(self): + method_name = f"{self.__class__.__name__}.check_point_data()" if not self.ignore_input_file and self.input_name is not None and not os.path.exists(self.input_name): - self.add_error_msg('The netcdf input {f} does not exist'.format(f=self.input_name)) + self.add_error_msg(f'{method_name} The input {self.input_name} does not exist') else: self.check_data_member_int(self.hdr_typ,'hdr_typ') self.check_data_member_int(self.hdr_sid,'hdr_sid') @@ -184,11 +181,11 @@ def check_point_data(self): if self.use_var_id: self.check_data_member_string(self.obs_var_table,'obs_var_table') - def convert_to_numpy(self, value_list): - return np.array(value_list) + #def convert_to_numpy(self, value_list): + # return met_point_tools.convert_to_ndarray(value_list) def dump(self): - base_met_point_obs.print_point_data(self.get_point_data()) + met_base_point.print_point_data(self.get_point_data()) def get_count_string(self): return f' nobs={self.nobs} nhdr={self.nhdr} ntyp={self.nhdr_typ} nsid={self.nhdr_sid} nvld={self.nhdr_vld} nqty={self.nobs_qty} nvar={self.nobs_var}' @@ -213,53 +210,41 @@ def get_point_data(self): self.check_point_data() if not self.is_numpy_array(self.hdr_typ): - self.hdr_typ = self.convert_to_numpy(self.hdr_typ) + self.hdr_typ = self.convert_to_ndarray(self.hdr_typ) if not self.is_numpy_array(self.hdr_sid): - self.hdr_sid = self.convert_to_numpy(self.hdr_sid) + self.hdr_sid = self.convert_to_ndarray(self.hdr_sid) if not self.is_numpy_array(self.hdr_vld): - self.hdr_vld = self.convert_to_numpy(self.hdr_vld) + self.hdr_vld = self.convert_to_ndarray(self.hdr_vld) if not self.is_numpy_array(self.hdr_lat): - self.hdr_lat = self.convert_to_numpy(self.hdr_lat) + self.hdr_lat = self.convert_to_ndarray(self.hdr_lat) if not self.is_numpy_array(self.hdr_lon): - self.hdr_lon = self.convert_to_numpy(self.hdr_lon) + self.hdr_lon = self.convert_to_ndarray(self.hdr_lon) if not self.is_numpy_array(self.hdr_elv): - self.hdr_elv = self.convert_to_numpy(self.hdr_elv) + self.hdr_elv = self.convert_to_ndarray(self.hdr_elv) if not self.is_numpy_array(self.obs_qty): - self.obs_qty = self.convert_to_numpy(self.obs_qty) + self.obs_qty = self.convert_to_ndarray(self.obs_qty) if not self.is_numpy_array(self.obs_hid): - self.obs_hid = self.convert_to_numpy(self.obs_hid) + self.obs_hid = self.convert_to_ndarray(self.obs_hid) if not self.is_numpy_array(self.obs_vid): - self.obs_vid = self.convert_to_numpy(self.obs_vid) + self.obs_vid = self.convert_to_ndarray(self.obs_vid) if not self.is_numpy_array(self.obs_lvl): - self.obs_lvl = self.convert_to_numpy(self.obs_lvl) + self.obs_lvl = self.convert_to_ndarray(self.obs_lvl) if not self.is_numpy_array(self.obs_hgt): - self.obs_hgt = self.convert_to_numpy(self.obs_hgt) + self.obs_hgt = self.convert_to_ndarray(self.obs_hgt) if not self.is_numpy_array(self.obs_val): - self.obs_val = self.convert_to_numpy(self.obs_val) + self.obs_val = self.convert_to_ndarray(self.obs_val) self.count_info = self.get_count_string() self.met_point_data = self return self.__dict__ +# def get_prompt(self): +# return met_point_tools.get_prompt() + def is_number(self, num_str): return num_str.replace('-','1').replace('+','2').replace('.','3').isdigit() - def is_numpy_array(self, var): - return isinstance(var, np.ndarray) - - def log_error_msg(self, err_msg): - base_met_point_obs.error_msg(err_msg) - - def log_error(self, err_msgs): - print(self.ERROR_P) - for err_line in err_msgs.split('\n'): - self.log_error_msg(err_line) - print(self.ERROR_P) - - def log_info(self, info_msg): - base_met_point_obs.info_msg(info_msg) - def put_data(self, point_obs_dict): self.use_var_id = point_obs_dict['use_var_id'] self.hdr_typ = point_obs_dict['hdr_typ'] @@ -298,92 +283,122 @@ def put_data(self, point_obs_dict): if po_array is not None: self.hdr_inst_typ = po_array - @staticmethod - def get_prompt(): - return " python:" - - @staticmethod - def error_msg(msg): - print(f'{base_met_point_obs.get_prompt()} {base_met_point_obs.ERROR_P} {msg}') - - @staticmethod - def info_msg(msg): - print(f'{base_met_point_obs.get_prompt()} {base_met_point_obs.INFO_P} {msg}') - - @staticmethod - def get_python_script(arg_value): - return arg_value[len(met_point_obs.python_prefix)+1:] - - @staticmethod - def is_python_script(arg_value): - return arg_value.startswith(met_point_obs.python_prefix) - - @staticmethod - def print_data(key, data_array, show_count=COUNT_SHOW): - if isinstance(data_array, list): - data_len = len(data_array) - if show_count >= data_len: - print(" {k:10s}: {v}".format(k=key, v= data_array)) - else: - end_offset = int(show_count/2) - print(" {k:10s}: count={v}".format(k=key, v=data_len)) - print(" {k:10s}[0:{o}] {v}".format(k=key, v=data_array[:end_offset], o=end_offset)) - print(" {k:10s}[{s}:{e}]: {v}".format(k=key, v='...', s=end_offset+1, e=data_len-end_offset-1)) - print(" {k:10s}[{s}:{e}]: {v}".format(k=key, v= data_array[-end_offset:], s=(data_len-end_offset), e=(data_len-1))) - else: - print(" {k:10s}: {v}".format(k=key, v= data_array)) + def read_point_data(self, tmp_filename): + method_name = f"{self.__class__.__name__}.read_point_data()" + if met_base_tools.use_netcdf_format(): + from met.point_nc import nc_point_obs - @staticmethod - def print_point_data(met_point_data, print_subset=True): - print(' === MET point data by python embedding ===') - if print_subset: - met_point_obs.print_data('nhdr',met_point_data['nhdr']) - met_point_obs.print_data('nobs',met_point_data['nobs']) - met_point_obs.print_data('use_var_id',met_point_data['use_var_id']) - met_point_obs.print_data('hdr_typ',met_point_data['hdr_typ']) - met_point_obs.print_data('hdr_typ_table',met_point_data['hdr_typ_table']) - met_point_obs.print_data('hdr_sid',met_point_data['hdr_sid']) - met_point_obs.print_data('hdr_sid_table',met_point_data['hdr_sid_table']) - met_point_obs.print_data('hdr_vld',met_point_data['hdr_vld']) - met_point_obs.print_data('hdr_vld_table',met_point_data['hdr_vld_table']) - met_point_obs.print_data('hdr_lat',met_point_data['hdr_lat']) - met_point_obs.print_data('hdr_lon',met_point_data['hdr_lon']) - met_point_obs.print_data('hdr_elv',met_point_data['hdr_elv']) - met_point_obs.print_data('obs_hid',met_point_data['obs_hid']) - met_point_obs.print_data('obs_vid',met_point_data['obs_vid']) - met_point_obs.print_data('obs_var_table',met_point_data['obs_var_table']) - met_point_obs.print_data('obs_qty',met_point_data['obs_qty']) - met_point_obs.print_data('obs_qty_table',met_point_data['obs_qty_table']) - met_point_obs.print_data('obs_lvl',met_point_data['obs_lvl']) - met_point_obs.print_data('obs_hgt',met_point_data['obs_hgt']) - met_point_obs.print_data('obs_val',met_point_data['obs_val']) + met_point_data = nc_point_obs() + met_point_data.read_data(tmp_filename) + self.put_data(met_point_data.get_point_data()) + if met_base_tools.is_debug_enabled("point"): + met_base.log_message(f"{method_name} Read from a temporary NetCDF file (point)") else: - print('All',met_point_data) - print(" nhdr: ",met_point_data['nhdr']) - print(" nobs: ",met_point_data['nobs']) - print(' use_var_id: ',met_point_data['use_var_id']) - print(' hdr_typ: ',met_point_data['hdr_typ']) - print('hdr_typ_table: ',met_point_data['hdr_typ_table']) - print(' hdr_sid: ',met_point_data['hdr_sid']) - print('hdr_sid_table: ',met_point_data['hdr_sid_table']) - print(' hdr_vld: ',met_point_data['hdr_vld']) - print('hdr_vld_table: ',met_point_data['hdr_vld_table']) - print(' hdr_lat: ',met_point_data['hdr_lat']) - print(' hdr_lon: ',met_point_data['hdr_lon']) - print(' hdr_elv: ',met_point_data['hdr_elv']) - print(' obs_hid: ',met_point_data['obs_hid']) - print(' obs_vid: ',met_point_data['obs_vid']) - print('obs_var_table: ',met_point_data['obs_var_table']) - print(' obs_qty: ',met_point_data['obs_qty']) - print('obs_qty_table: ',met_point_data['obs_qty_table']) - print(' obs_lvl: ',met_point_data['obs_lvl']) - print(' obs_hgt: ',met_point_data['obs_hgt']) - print(' obs_val: ',met_point_data['obs_val']) - - print(' === MET point data by python embedding ===') + self.read_point_data_json_numpy(tmp_filename) + + def read_point_data_json_numpy(self, tmp_filename): + method_name = f"{self.__class__.__name__}.read_point_data_json_numpy()" + if met_base_tools.is_debug_enabled("point"): + met_base.log_message(f"{method_name} Read from a temporary JSON file and a temporary numpy output (point)") + + with open(tmp_filename) as json_fh: + json_dict = json.load(json_fh) + + self.use_var_id = json_dict['use_var_id'] + self.nhdr = json_dict['nhdr'] + self.nobs = json_dict['nobs'] + self.hdr_typ_table = json_dict['hdr_typ_table'] + self.hdr_sid_table = json_dict['hdr_sid_table'] + self.hdr_vld_table = json_dict['hdr_vld_table'] + self.obs_var_table = json_dict['obs_var_table'] + self.obs_qty_table = json_dict['obs_qty_table'] + self.obs_var_unit = json_dict['obs_var_unit'] + self.obs_var_desc = json_dict['obs_var_desc'] + self.hdr_prpt_typ = json_dict['hdr_prpt_typ'] + self.hdr_irpt_typ = json_dict['hdr_irpt_typ'] + self.hdr_inst_typ = json_dict['hdr_inst_typ'] + + # read 2D numeric data + numpy_dump_name = met_base_tools.get_numpy_filename(tmp_filename) + point_array_list = np.load(numpy_dump_name) + + # Header data + self.hdr_typ = point_array_list[0,:self.nhdr] + self.hdr_sid = point_array_list[1,:self.nhdr] + self.hdr_vld = point_array_list[2,:self.nhdr] + self.hdr_lat = point_array_list[3,:self.nhdr] + self.hdr_lon = point_array_list[4,:self.nhdr] + self.hdr_elv = point_array_list[5,:self.nhdr] + # Observation data + self.obs_hid = point_array_list[6] + self.obs_lvl = point_array_list[7] + self.obs_hgt = point_array_list[8] + self.obs_val = point_array_list[9] + self.obs_vid = point_array_list[10] + self.obs_qty = point_array_list[11] + + if numpy_dump_name != tmp_filename: + os.remove(numpy_dump_name) + def write_point_data(self, tmp_filename): + if met_base_tools.use_netcdf_format(): + from met.point_nc import nc_point_obs -class csv_point_obs(base_met_point_obs): + nc_point_obs.write_nc_file(tmp_filename, self) + if met_base_tools.is_debug_enabled("point"): + met_base.log_message(f"Save to a temporary NetCDF file (point)") + else: + self.write_point_data_json_numpy(tmp_filename) + + def write_point_data_json_numpy(self, tmp_filename): + method_name = f"{self.__class__.__name__}.write_point_data_json_numpy()" + if met_base_tools.is_debug_enabled("dataplane"): + met_base.log_message(f"{method_name} Save to a temporary JSON file and a temporary numpy output (point)") + + self.nhdr = len(self.hdr_sid) + self.nobs = len(self.obs_hid) + + json_dict = {} + json_dict['use_var_id'] = self.use_var_id + json_dict['nhdr'] = self.nhdr + json_dict['nobs'] = self.nobs + json_dict['hdr_typ_table'] = self.convert_to_array(self.hdr_typ_table) + json_dict['hdr_sid_table'] = self.convert_to_array(self.hdr_sid_table) + json_dict['hdr_vld_table'] = self.convert_to_array(self.hdr_vld_table) + json_dict['obs_var_table'] = self.convert_to_array(self.obs_var_table) + json_dict['obs_qty_table'] = self.convert_to_array(self.obs_qty_table) + json_dict['obs_var_unit'] = self.convert_to_array(self.obs_var_unit) + json_dict['obs_var_desc'] = self.convert_to_array(self.obs_var_desc) + json_dict['hdr_prpt_typ'] = self.convert_to_array(self.hdr_prpt_typ) + json_dict['hdr_irpt_typ'] = self.convert_to_array(self.hdr_irpt_typ) + json_dict['hdr_inst_typ'] = self.convert_to_array(self.hdr_inst_typ) + + point_array_list = np.empty([12, self.nobs]) + + # Header data + point_array_list[0,:self.nhdr] = self.convert_to_ndarray(self.hdr_typ) + point_array_list[1,:self.nhdr] = self.convert_to_ndarray(self.hdr_sid) + point_array_list[2,:self.nhdr] = self.convert_to_ndarray(self.hdr_vld) + point_array_list[3,:self.nhdr] = self.convert_to_ndarray(self.hdr_lat) + point_array_list[4,:self.nhdr] = self.convert_to_ndarray(self.hdr_lon) + point_array_list[5,:self.nhdr] = self.convert_to_ndarray(self.hdr_elv) + # Observation data + point_array_list[6] = self.convert_to_ndarray(self.obs_hid) + point_array_list[7] = self.convert_to_ndarray(self.obs_lvl) + point_array_list[8] = self.convert_to_ndarray(self.obs_hgt) + point_array_list[9] = self.convert_to_ndarray(self.obs_val) + point_array_list[10] = self.convert_to_ndarray(self.obs_vid) + point_array_list[11] = self.convert_to_ndarray(self.obs_qty) + + with open(tmp_filename,'w') as json_fh: + json.dump(json_dict, json_fh) + + numpy_dump_name = met_base_tools.get_numpy_filename(tmp_filename) + #np.save(numpy_dump_name, self.convert_to_ndarray(point_array_list)) + np.save(numpy_dump_name, point_array_list) + + +class csv_point_obs(met_base_point): def __init__(self, point_data): self.point_data = point_data @@ -400,50 +415,50 @@ def __init__(self, point_data): self.convert_point_data() def check_csv_record(self, csv_point_data, index): + method_name = f"{self.__class__.__name__}.check_csv_record()" error_msgs = [] # names=['typ', 'sid', 'vld', 'lat', 'lon', 'elv', 'var', 'lvl', 'hgt', 'qc', 'obs'] # dtype={'typ':'str', 'sid':'str', 'vld':'str', 'var':'str', 'qc':'str'} if 11 > len(csv_point_data): - error_msgs.append("{i}-th data: missing columns. should be 11 columns, not {c} columns".format( - i=index, c=len(csv_point_data))) + error_msgs.append(f"{method_name} {index}-th data: missing columns. should be 11 columns, not {len(csv_point_data)} columns") elif 11 < len(csv_point_data): - print("{i}-th data: ignore after 11-th columns out of {c} columns".format( - i=index, c=len(csv_point_data))) + print("{i}-th data: ignore after 11-th columns out of {len(csv_point_data)} columns") if not isinstance(csv_point_data[0], str): - error_msgs.append("{i}-th data: message_type is not string".format(i=index)) + error_msgs.append(f"{method_name} {index}-th data: message_type is not string") if not isinstance(csv_point_data[1], str): - error_msgs.append("{i}-th data: station_id is not string".format(i=index)) + error_msgs.append(f"{method_name} {index}-th data: station_id is not string") if not isinstance(csv_point_data[2], str): - error_msgs.append("{i}-th data: valid_time is not string".format(i=index)) + error_msgs.append(f"{method_name} {index}-th data: valid_time is not string") if isinstance(csv_point_data[3], str): - error_msgs.append("{i}-th data: latitude can not be a string".format(i=index)) + error_msgs.append(f"{method_name} {index}-th data: latitude can not be a string") elif csv_point_data[3] < -90.0 or csv_point_data[3] > 90.0: - error_msgs.append("{i}-th data: latitude ({l}) is out of range".format(i=index, l=csv_point_data[3])) + error_msgs.append(f"{method_name} {index}-th data: latitude ({csv_point_data[3]}) is out of range") if isinstance(csv_point_data[4], str): - error_msgs.append("{i}-th data: longitude can not be a string".format(i=index)) + error_msgs.append(f"{method_name} {index}-th data: longitude can not be a string") elif csv_point_data[4] < -180.0 or csv_point_data[4] > 360.0: - error_msgs.append("{i}-th data: longitude ({l}) is out of range".format(i=index, l=csv_point_data[4])) + error_msgs.append(f"{method_name} {index}-th data: longitude ({csv_point_data[4]}) is out of range") if not isinstance(csv_point_data[6], str): - error_msgs.append("{i}-th data: grib_code/var_name is not string".format(i=index)) + error_msgs.append(f"{method_name} {index}-th data: grib_code/var_name is not string") if not isinstance(csv_point_data[9], str): - error_msgs.append("{i}-th data: quality_mark is not string".format(i=index)) + error_msgs.append(f"{method_name} {index}-th data: quality_mark is not string") is_string, is_num = self.is_num_string(csv_point_data[5]) if is_string and not is_num: - error_msgs.append("{i}-th data: elevation: only NA is accepted as string".format(i=index)) + error_msgs.append(f"{method_name} {index}-th data: elevation: only NA is accepted as string") is_string, is_num = self.is_num_string(csv_point_data[7]) if is_string and not is_num: - error_msgs.append("{i}-th data: obs_level: only NA is accepted as string".format(i=index)) + error_msgs.append(f"{method_name} {index}-th data: obs_level: only NA is accepted as string") is_string, is_num = self.is_num_string(csv_point_data[8]) if is_string and not is_num: - error_msgs.append("{i}-th data: obs_height: only NA is accepted as string".format(i=index)) + error_msgs.append(f"{method_name} {index}-th data: obs_height: only NA is accepted as string") is_string, is_num = self.is_num_string(csv_point_data[10]) if is_string and not is_num: - error_msgs.append("{i}-th data: obs_value: only NA is accepted as string".format(i=index)) + error_msgs.append(f"{method_name} {index}-th data: obs_value: only NA is accepted as string") return error_msgs def check_csv_point_data(self, all_records=False): + method_name = f"{self.__class__.__name__}.check_csv_point_data()" if 0 == len(self.point_data): - self.add_error_msg("No data!") + self.add_error_msg(f"{method_name} No data!") elif all_records: data_idx = 0 for csv_point_data in self.point_data: @@ -573,6 +588,7 @@ def convert_point_data(self): self.obs_var_table[idx] = key def get_num_value(self, column_value): + method_name = f"{self.__class__.__name__}.get_num_value()" num_value = column_value if isinstance(column_value, str): if self.is_number(column_value): @@ -580,7 +596,7 @@ def get_num_value(self, column_value): else: num_value = self.FILL_VALUE if column_value.lower() != 'na' and column_value.lower() != 'n/a': - self.log_info(f'{column_value} is not a number, converted to the missing value') + self.info_msg(f'{method_name} {column_value} is not a number, converted to the missing value') return num_value def is_grib_code(self): @@ -602,9 +618,7 @@ def is_num_string(self, column_value): return is_string, is_num -class met_point_obs(ABC, base_met_point_obs): - - MET_ENV_RUN = 'MET_FORCE_TO_RUN' +class met_point_obs(ABC, met_base_point): @abstractmethod def read_data(self, args): @@ -625,7 +639,15 @@ def read_data(self, args): pass -class met_point_tools(): +class dummy_point_obs(met_point_obs): + + def read_data(self, args): + pass + + +class met_point_tools(met_base_tools): + + python_prefix = 'PYTHON_POINT_USER' @staticmethod def convert_point_data(point_data, check_all_records=False, input_type='csv'): @@ -635,24 +657,85 @@ def convert_point_data(point_data, check_all_records=False, input_type='csv'): csv_point_data.check_csv_point_data(check_all_records) tmp_point_data = csv_point_data.get_point_data() else: - base_met_point_obs.error_msg('Not supported input type: {input_type}') + met_base_point.error_message(f'met_point_tools.convert_point_data() Not supported input type: {input_type}') return tmp_point_data @staticmethod - def get_prompt(): - return " python:" + def get_sample_point_obs(): + return sample_met_point_obs() @staticmethod - def get_nc_point_obs(): - return nc_point_obs() + def get_python_script(arg_value): + return arg_value[len(met_point_tools.python_prefix)+1:] @staticmethod - def get_sample_point_obs(): - return sample_met_point_obs() + def is_python_prefix(user_cmd): + return user_cmd.startswith(met_point_tools.python_prefix) @staticmethod - def is_python_prefix(user_cmd): - return user_cmd.startswith(base_met_point_obs.python_prefix) + def print_data(key, data_array, show_count=COUNT_SHOW): + if isinstance(data_array, list): + data_len = len(data_array) + if show_count >= data_len: + print(" {k:10s}: {v}".format(k=key, v= data_array)) + else: + end_offset = int(show_count/2) + print(" {k:10s}: count={v}".format(k=key, v=data_len)) + print(" {k:10s}[0:{o}] {v}".format(k=key, v=data_array[:end_offset], o=end_offset)) + print(" {k:10s}[{s}:{e}]: {v}".format(k=key, v='...', s=end_offset+1, e=data_len-end_offset-1)) + print(" {k:10s}[{s}:{e}]: {v}".format(k=key, v= data_array[-end_offset:], s=(data_len-end_offset), e=(data_len-1))) + else: + print(" {k:10s}: {v}".format(k=key, v= data_array)) + + @staticmethod + def print_point_data(met_point_data, print_subset=True): + method_name = f"met_point_tools.print_point_data()" + print(' === MET point data by python embedding ===') + if print_subset: + met_point_tools.print_data('nhdr',met_point_data['nhdr']) + met_point_tools.print_data('nobs',met_point_data['nobs']) + met_point_tools.print_data('use_var_id',met_point_data['use_var_id']) + met_point_tools.print_data('hdr_typ',met_point_data['hdr_typ']) + met_point_tools.print_data('hdr_typ_table',met_point_data['hdr_typ_table']) + met_point_tools.print_data('hdr_sid',met_point_data['hdr_sid']) + met_point_tools.print_data('hdr_sid_table',met_point_data['hdr_sid_table']) + met_point_tools.print_data('hdr_vld',met_point_data['hdr_vld']) + met_point_tools.print_data('hdr_vld_table',met_point_data['hdr_vld_table']) + met_point_tools.print_data('hdr_lat',met_point_data['hdr_lat']) + met_point_tools.print_data('hdr_lon',met_point_data['hdr_lon']) + met_point_tools.print_data('hdr_elv',met_point_data['hdr_elv']) + met_point_tools.print_data('obs_hid',met_point_data['obs_hid']) + met_point_tools.print_data('obs_vid',met_point_data['obs_vid']) + met_point_tools.print_data('obs_var_table',met_point_data['obs_var_table']) + met_point_tools.print_data('obs_qty',met_point_data['obs_qty']) + met_point_tools.print_data('obs_qty_table',met_point_data['obs_qty_table']) + met_point_tools.print_data('obs_lvl',met_point_data['obs_lvl']) + met_point_tools.print_data('obs_hgt',met_point_data['obs_hgt']) + met_point_tools.print_data('obs_val',met_point_data['obs_val']) + else: + print(f'{method_name} All',met_point_data) + print(f" nhdr: met_point_data['nhdr']") + print(f" nobs: met_point_data['nobs']") + print(f" use_var_id: met_point_data['use_var_id']") + print(f" hdr_typ: met_point_data['hdr_typ']") + print(f"hdr_typ_table: met_point_data['hdr_typ_table']") + print(f" hdr_sid: met_point_data['hdr_sid']") + print(f"hdr_sid_table: met_point_data['hdr_sid_table']") + print(f" hdr_vld: met_point_data['hdr_vld']") + print(f"hdr_vld_table: met_point_data['hdr_vld_table']") + print(f" hdr_lat: met_point_data['hdr_lat']") + print(f" hdr_lon: met_point_data['hdr_lon']") + print(f" hdr_elv: met_point_data['hdr_elv']") + print(f" obs_hid: met_point_data['obs_hid']") + print(f" obs_vid: met_point_data['obs_vid']") + print(f"obs_var_table: met_point_data['obs_var_table']") + print(f" obs_qty: met_point_data['obs_qty']") + print(f"obs_qty_table: met_point_data['obs_qty_table']") + print(f" obs_lvl: met_point_data['obs_lvl']") + print(f" obs_hgt: met_point_data['obs_hgt']") + print(f" obs_val: met_point_data['obs_val']") + + print(' === MET point data by python embedding ===') @staticmethod # Read the input file which is 11 column text file as the first argument @@ -670,254 +753,14 @@ def read_text_point_obs(input_file, header=None, # (9) numeric: Height(msl or agl) # (10) string: QC_String # (11) numeric: Observation_Value - ascii_point_data = pd.read_csv(input_file, header=header, - delim_whitespace=delim_whitespace, - keep_default_na=keep_default_na, - names=['typ', 'sid', 'vld', 'lat', 'lon', 'elv', 'var', 'lvl', 'hgt', 'qc', 'obs'], - dtype={'typ':'string', 'sid':'string', 'vld':'string', 'var':'string', 'qc':'string'}).values.tolist() + ascii_point_data = pd.read_csv( + input_file, header=header, + delim_whitespace=delim_whitespace, + keep_default_na=keep_default_na, + names=['typ', 'sid', 'vld', 'lat', 'lon', 'elv', 'var', 'lvl', 'hgt', 'qc', 'obs'], + dtype={'typ':'string', 'sid':'string', 'vld':'string', 'var':'string', 'qc':'string'}).values.tolist() return ascii_point_data -# Note: caller should import netCDF4 -# The argements nc_group(dataset) and nc_var should not be None -class nc_tools(): - - met_missing = -99999999. - - @staticmethod - def get_num_array(nc_group, var_name): - nc_var = nc_group.variables.get(var_name, None) - return [] if nc_var is None else nc_var[:] - - @staticmethod - def get_ncbyte_array_to_str(nc_var): - nc_str_data = nc_var[:] - if nc_var.datatype.name == 'bytes8': - nc_str_data = [ str(s.compressed(),"utf-8") for s in nc_var[:] ] - return nc_str_data - - @staticmethod - def get_string_array(nc_group, var_name): - nc_var = nc_group.variables.get(var_name, None) - return [] if nc_var is None else nc_tools.get_ncbyte_array_to_str(nc_var) - - -class nc_point_obs(met_point_obs): - - # args should be string, list, or dictionary - def get_nc_filename(self, args): - nc_filename = None - if isinstance(args, dict): - nc_filename = args.get('nc_name',None) - elif isinstance(args, list): - nc_filename = args[0] - elif args != ARG_PRINT_DATA: - nc_filename = args - - return nc_filename - - def read_data(self, nc_filename): - if nc_filename is None: - self.log_error_msg("The input NetCDF filename is missing") - elif not os.path.exists(nc_filename): - self.log_error_msg(f"input NetCDF file ({nc_filename}) does not exist") - else: - dataset = nc.Dataset(nc_filename, 'r') - - attr_name = 'use_var_id' - use_var_id_str = dataset.getncattr(attr_name) if attr_name in dataset.ncattrs() else "false" - self.use_var_id = use_var_id_str.lower() == 'true' - - # Header - self.hdr_typ = dataset['hdr_typ'][:] - self.hdr_sid = dataset['hdr_sid'][:] - self.hdr_vld = dataset['hdr_vld'][:] - self.hdr_lat = dataset['hdr_lat'][:] - self.hdr_lon = dataset['hdr_lon'][:] - self.hdr_elv = dataset['hdr_elv'][:] - self.hdr_typ_table = nc_tools.get_string_array(dataset, 'hdr_typ_table') - self.hdr_sid_table = nc_tools.get_string_array(dataset, 'hdr_sid_table') - self.hdr_vld_table = nc_tools.get_string_array(dataset, 'hdr_vld_table') - - nc_var = dataset.variables.get('obs_unit', None) - if nc_var: - self.obs_var_unit = nc_var[:] - nc_var = dataset.variables.get('obs_desc', None) - if nc_var: - self.obs_var_desc = nc_var[:] - - nc_var = dataset.variables.get('hdr_prpt_typ', None) - if nc_var: - self.hdr_prpt_typ = nc_var[:] - nc_var = dataset.variables.get('hdr_irpt_typ', None) - if nc_var: - self.hdr_irpt_typ = nc_var[:] - nc_var = dataset.variables.get('hdr_inst_typ', None) - if nc_var: - self.hdr_inst_typ =nc_var[:] - - #Observation data - self.hdr_sid = dataset['hdr_sid'][:] - self.obs_qty = np.array(dataset['obs_qty'][:]) - self.obs_hid = np.array(dataset['obs_hid'][:]) - self.obs_lvl = np.array(dataset['obs_lvl'][:]) - self.obs_hgt = np.array(dataset['obs_hgt'][:]) - self.obs_val = np.array(dataset['obs_val'][:]) - nc_var = dataset.variables.get('obs_vid', None) - if nc_var is None: - self.use_var_id = False - nc_var = dataset.variables.get('obs_gc', None) - else: - self.obs_var_table = nc_tools.get_string_array(dataset, 'obs_var') - if nc_var: - self.obs_vid = np.array(nc_var[:]) - - self.obs_qty_table = nc_tools.get_string_array(dataset, 'obs_qty_table') - - def save_ncfile(self, nc_filename): - met_data = self.get_point_data() - with nc.Dataset(nc_filename, 'w') as nc_dataset: - self.set_nc_data(nc_dataset) - return met_data - - def set_nc_data(self, nc_dataset): - return nc_point_obs.write_nc_data(nc_dataset, self) - - @staticmethod - def write_nc_file(nc_filename, point_obs): - with nc.Dataset(nc_filename, 'w') as nc_dataset: - nc_point_obs.set_nc_data(nc_dataset, point_obs) - - @staticmethod - def write_nc_data(nc_dataset, point_obs): - do_nothing = False - if 0 == point_obs.nhdr: - do_nothing = True - base_met_point_obs.info_msg("the header is empty") - if 0 == point_obs.nobs: - do_nothing = True - base_met_point_obs.info_msg("the observation data is empty") - if do_nothing: - print() - return - - # Set global attributes - nc_dataset.MET_Obs_version = "1.02" ; - nc_dataset.use_var_id = "true" if point_obs.use_var_id else "false" - - # Create dimensions - nc_dataset.createDimension('mxstr', 16) - nc_dataset.createDimension('mxstr2', 40) - nc_dataset.createDimension('mxstr3', 80) - nc_dataset.createDimension('nhdr', point_obs.nhdr) - nc_dataset.createDimension('nobs', point_obs.nobs) - #npbhdr = len(point_obs.hdr_prpt_typ) - if 0 < point_obs.npbhdr: - nc_dataset.createDimension('npbhdr', point_obs.npbhdr) - nc_dataset.createDimension('nhdr_typ', point_obs.nhdr_typ) - nc_dataset.createDimension('nhdr_sid', point_obs.nhdr_sid) - nc_dataset.createDimension('nhdr_vld', point_obs.nhdr_vld) - nc_dataset.createDimension('nobs_qty', point_obs.nobs_qty) - nc_dataset.createDimension('obs_var_num', point_obs.nobs_var) - - type_for_string = 'S1' # np.byte - dims_hdr = ('nhdr',) - dims_obs = ('nobs',) - - # Create header and observation variables - var_hdr_typ = nc_dataset.createVariable('hdr_typ', np.int32, dims_hdr, fill_value=-9999) - var_hdr_sid = nc_dataset.createVariable('hdr_sid', np.int32, dims_hdr, fill_value=-9999) - var_hdr_vld = nc_dataset.createVariable('hdr_vld', np.int32, dims_hdr, fill_value=-9999) - var_hdr_lat = nc_dataset.createVariable('hdr_lat', np.float32, dims_hdr, fill_value=-9999.) - var_hdr_lon = nc_dataset.createVariable('hdr_lon', np.float32, dims_hdr, fill_value=-9999.) - var_hdr_elv = nc_dataset.createVariable('hdr_elv', np.float32, dims_hdr, fill_value=-9999.) - - var_obs_qty = nc_dataset.createVariable('obs_qty', np.int32, dims_obs, fill_value=-9999) - var_obs_hid = nc_dataset.createVariable('obs_hid', np.int32, dims_obs, fill_value=-9999) - var_obs_vid = nc_dataset.createVariable('obs_vid', np.int32, dims_obs, fill_value=-9999) - var_obs_lvl = nc_dataset.createVariable('obs_lvl', np.float32, dims_obs, fill_value=-9999.) - var_obs_hgt = nc_dataset.createVariable('obs_hgt', np.float32, dims_obs, fill_value=-9999.) - var_obs_val = nc_dataset.createVariable('obs_val', np.float32, dims_obs, fill_value=-9999.) - - if 0 == point_obs.npbhdr: - var_hdr_prpt_typ = None - var_hdr_irpt_typ = None - var_hdr_inst_typ = None - else: - dims_npbhdr = ('npbhdr',) - var_hdr_prpt_typ = nc_dataset.createVariable('hdr_prpt_typ', np.int32, dims_npbhdr, fill_value=-9999.) - var_hdr_irpt_typ = nc_dataset.createVariable('hdr_irpt_typ', np.int32, dims_npbhdr, fill_value=-9999.) - var_hdr_inst_typ = nc_dataset.createVariable('hdr_inst_typ', np.int32, dims_npbhdr, fill_value=-9999.) - - var_hdr_typ_table = nc_dataset.createVariable('hdr_typ_table', type_for_string, ('nhdr_typ','mxstr2')) - var_hdr_sid_table = nc_dataset.createVariable('hdr_sid_table', type_for_string, ('nhdr_sid','mxstr2')) - var_hdr_vld_table = nc_dataset.createVariable('hdr_vld_table', type_for_string, ('nhdr_vld','mxstr')) - var_obs_qty_table = nc_dataset.createVariable('obs_qty_table', type_for_string, ('nobs_qty','mxstr')) - var_obs_var_table = nc_dataset.createVariable('obs_var', type_for_string, ('obs_var_num','mxstr2')) - var_obs_var_unit = nc_dataset.createVariable('obs_unit', type_for_string, ('obs_var_num','mxstr2')) - var_obs_var_desc = nc_dataset.createVariable('obs_desc', type_for_string, ('obs_var_num','mxstr3')) - - # Set variables - var_hdr_typ[:] = point_obs.hdr_typ[:] - var_hdr_sid[:] = point_obs.hdr_sid[:] - var_hdr_vld[:] = point_obs.hdr_vld[:] - var_hdr_lat[:] = point_obs.hdr_lat[:] - var_hdr_lon[:] = point_obs.hdr_lon[:] - var_hdr_elv[:] = point_obs.hdr_elv[:] - for i in range(0, point_obs.nhdr_typ): - for j in range(0, len(point_obs.hdr_typ_table[i])): - var_hdr_typ_table[i,j] = point_obs.hdr_typ_table[i][j] - for i in range(0, point_obs.nhdr_sid): - for j in range(0, len(point_obs.hdr_sid_table[i])): - var_hdr_sid_table[i,j] = point_obs.hdr_sid_table[i][j] - for i in range(0, point_obs.nhdr_vld): - for j in range(0, len(point_obs.hdr_vld_table[i])): - var_hdr_vld_table[i,j] = point_obs.hdr_vld_table[i][j] - if 0 < point_obs.npbhdr: - var_hdr_prpt_typ[:] = point_obs.hdr_prpt_typ[:] - var_hdr_irpt_typ[:] = point_obs.hdr_irpt_typ[:] - var_hdr_inst_typ[:] = point_obs.hdr_inst_typ[:] - - var_obs_qty[:] = point_obs.obs_qty[:] - var_obs_hid[:] = point_obs.obs_hid[:] - var_obs_vid[:] = point_obs.obs_vid[:] - var_obs_lvl[:] = point_obs.obs_lvl[:] - var_obs_hgt[:] = point_obs.obs_hgt[:] - var_obs_val[:] = point_obs.obs_val[:] - for i in range(0, point_obs.nobs_var): - for j in range(0, len(point_obs.obs_var_table[i])): - var_obs_var_table[i,j] = point_obs.obs_var_table[i][j] - var_obs_var_unit[i] = "" if i >= len(point_obs.obs_var_unit) else point_obs.obs_var_unit[i] - var_obs_var_desc[i] = "" if i >= len(point_obs.obs_var_desc) else point_obs.obs_var_desc[i] - for i in range(0, point_obs.nobs_qty): - for j in range(0, len(point_obs.obs_qty_table[i])): - var_obs_qty_table[i,j] = point_obs.obs_qty_table[i][j] - - # Set variable attributes - var_hdr_typ.long_name = "index of message type" - var_hdr_sid.long_name = "index of station identification" - var_hdr_vld.long_name = "index of valid time" - var_hdr_lat.long_name = "latitude" - var_hdr_lat.units = "degrees_north" - var_hdr_lon.long_name = "longitude" - var_hdr_lon.units = "degrees_east" - var_hdr_elv.long_name = "elevation" - var_hdr_elv.units = "meters above sea level (msl)" - - var_obs_qty.long_name = "index of quality flag" - var_obs_hid.long_name = "index of matching header data" - var_obs_vid.long_name = "index of BUFR variable corresponding to the observation type" - var_obs_lvl.long_name = "pressure level (hPa) or accumulation interval (sec)" - var_obs_hgt.long_name = "height in meters above sea level (msl)" - var_obs_val.long_name = "observation value" - var_hdr_typ_table.long_name = "message type" - var_hdr_sid_table.long_name = "station identification" - var_hdr_vld_table.long_name = "valid time" - var_hdr_vld_table.units = "YYYYMMDD_HHMMSS UTC" - var_obs_qty_table.long_name = "quality flag" - var_obs_var_table.long_name = "variable names" - var_obs_var_unit.long_name = "variable units" - var_obs_var_desc.long_name = "variable descriptions" - # This is a sample drived class class sample_met_point_obs(met_point_obs): @@ -946,6 +789,7 @@ def read_data(self, arg_map={}): self.obs_var_table = [ "TMP", "RH" ] self.obs_qty_table = [ "NA" ] + def convert_point_data(point_data, check_all_records=False, input_type='csv'): tmp_point_data = {} if 'csv' == input_type: @@ -953,9 +797,12 @@ def convert_point_data(point_data, check_all_records=False, input_type='csv'): csv_point_data.check_csv_point_data(check_all_records) tmp_point_data = csv_point_data.get_point_data() else: - base_met_point_obs.error_msg('Not supported input type: {input_type}') + met_base.error_messageg(f'convert_point_data(() Not supported input type: {input_type}') return tmp_point_data +def get_empty_point_obs(): + return dummy_point_obs() + def main(): args = {} # or args = [] point_obs_data = sample_met_point_obs() @@ -964,20 +811,6 @@ def main(): point_obs_data.print_point_data(met_point_data, print_subset=False) -def main_nc(argv): - if len(argv) != 1 and argv[1] != ARG_PRINT_DATA: - netcdf_filename = argv[1] - tmp_nc_name = 'tmp_met_point.nc' - point_obs_data = nc_point_obs() - point_obs_data.read_data(point_obs_data.get_nc_filename(netcdf_filename)) - met_point_data = point_obs_data.save_ncfile(tmp_nc_name) - print(f'{base_met_point_obs.get_prompt()} saved met_point_data to {tmp_nc_name}') - met_point_data['met_point_data'] = point_obs_data - - if DO_PRINT_DATA or ARG_PRINT_DATA == argv[-1]: - met_point_obs.print_point_data(met_point_data) - - if __name__ == '__main__': main() print('Done python script') From 1600963e0d2e9bcd14269092e2598e046b0ce148 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 23:04:10 +0000 Subject: [PATCH 14/19] #2772 Allow to keep the temporary files by using the environment variable --- src/basic/vx_config/temp_file.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/basic/vx_config/temp_file.cc b/src/basic/vx_config/temp_file.cc index 98683a1c5b..f1ed751190 100644 --- a/src/basic/vx_config/temp_file.cc +++ b/src/basic/vx_config/temp_file.cc @@ -70,6 +70,12 @@ void remove_temp_file(const ConcatString file_name) { // // Attempt to remove the file and print out any error message // + const char *keep_temp = getenv("MET_KEEP_TEMP_FILE"); + if (nullptr != keep_temp + && (0 == strcmp(keep_temp, "true") || 0 == strcmp(keep_temp, "yes"))) { + return; + } + if((errno = remove(file_name.c_str())) != 0) { mlog << Error << "\nremove_temp_file() -> " << "can't delete temporary file: \"" From 6264c6dc119f13ba5af8fc4146c97eb0041f1a49 Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Mon, 29 Jan 2024 23:23:44 +0000 Subject: [PATCH 15/19] Added log message if the temprary fiule was not deleted --- src/basic/vx_config/temp_file.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/basic/vx_config/temp_file.cc b/src/basic/vx_config/temp_file.cc index f1ed751190..85d5528a49 100644 --- a/src/basic/vx_config/temp_file.cc +++ b/src/basic/vx_config/temp_file.cc @@ -73,6 +73,8 @@ void remove_temp_file(const ConcatString file_name) { const char *keep_temp = getenv("MET_KEEP_TEMP_FILE"); if (nullptr != keep_temp && (0 == strcmp(keep_temp, "true") || 0 == strcmp(keep_temp, "yes"))) { + mlog << Debug(2) << "The temporary file (" + << file_name << ") was not deleted. Please remove it manually\n\n"; return; } From c1532ddc223bf3f684a5a9ccfda2008d1f8adc5a Mon Sep 17 00:00:00 2001 From: Julie Prestopnik Date: Wed, 31 Jan 2024 15:11:18 -0700 Subject: [PATCH 16/19] Per #2772, added MET_PYTHON_EXE to various test cases and removed what would have been a duplicate after adding MET_PYTHON_EXE --- internal/test_unit/xml/unit_python.xml | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/internal/test_unit/xml/unit_python.xml b/internal/test_unit/xml/unit_python.xml index 051f709a62..f6e011dc0d 100644 --- a/internal/test_unit/xml/unit_python.xml +++ b/internal/test_unit/xml/unit_python.xml @@ -206,6 +206,7 @@ &MET_BIN;/point_stat + MET_PYTHON_EXE &MET_PYTHON_EXE; FCST_COMMAND &MET_BASE;/python/examples/read_ascii_numpy.py &DATA_DIR_PYTHON;/fcst.txt FCST OBS_COMMAND &MET_BASE;/python/examples/read_ascii_numpy.py &DATA_DIR_PYTHON;/obs.txt OBS @@ -480,21 +481,6 @@ - - &MET_BIN;/point2grid - \ - 'PYTHON_NUMPY=&MET_BASE;/python/examples/read_met_point_obs.py &OUTPUT_DIR;/pb2nc/ndas.20120409.t12z.prepbufr.tm00.nc' \ - G212 \ - &OUTPUT_DIR;/python/pb2nc_TMP.nc \ - -field 'name="TMP"; level="*"; valid_time="20120409_120000"; censor_thresh=[ <0 ]; censor_val=[0];' \ - -name TEMP \ - -v 1 - - - &OUTPUT_DIR;/python/pb2nc_TMP.nc - - - &MET_BIN;/point2grid @@ -535,6 +521,7 @@ &MET_BIN;/plot_point_obs + MET_PYTHON_EXE &MET_PYTHON_EXE; TO_GRID NONE \ @@ -561,6 +548,7 @@ > &OUTPUT_DIR;/python/ensemble_stat/input_file_list; \ &MET_BIN;/ensemble_stat + MET_PYTHON_EXE &MET_PYTHON_EXE; DESC NA OBS_ERROR_FLAG FALSE SKIP_CONST FALSE @@ -587,6 +575,7 @@ &MET_BIN;/point_stat + MET_PYTHON_EXE &MET_PYTHON_EXE; BEG_DS -1800 END_DS 1800 OUTPUT_PREFIX GRIB1_NAM_GDAS_WINDS @@ -605,6 +594,9 @@ + + MET_PYTHON_EXE &MET_PYTHON_EXE; + &MET_BIN;/plot_data_plane \ PYTHON_NUMPY \ @@ -619,6 +611,9 @@ + + MET_PYTHON_EXE &MET_PYTHON_EXE; + &MET_BIN;/pcp_combine \ -add PYTHON_NUMPY \ From 06498d7c7e7f11147195f73a06e547bb5f3114e2 Mon Sep 17 00:00:00 2001 From: John Halley Gotway Date: Wed, 31 Jan 2024 16:18:04 -0700 Subject: [PATCH 17/19] Per #2772, add documentation about 3 new environment variables. --- docs/Users_Guide/config_options.rst | 42 +++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/docs/Users_Guide/config_options.rst b/docs/Users_Guide/config_options.rst index f2dc2803ba..f59c8f9a87 100644 --- a/docs/Users_Guide/config_options.rst +++ b/docs/Users_Guide/config_options.rst @@ -494,6 +494,48 @@ Where code is running in a production context, it is worth being familiar with the binding / affinitization method on the particular system and building it into any relevant scripting. +.. _met_keep_temp_file: + +MET_KEEP_TEMP_FILE +------------------ + +The MET_KEEP_TEMP_FILE environment variable can be set to control the runtime +behavior of the MET tools. The MET tools write temporary files in several places +in the application and library code. By default, those temporary files are deleted +when they are no longer needed. However it can be useful for development, testing, +and debugging to keep them for further inspection. Setting this environment variable +to a value of :code:`yes` or :code:`true` instructs the MET tools to retain temporary +files instead of deleting them. + +Note that doing so may fill up the temporary directory. It is the responsiblity of +the user to monitor the temporary directory usage and remove temporary files that +are no longer needed. + +When running with this option, users are advised to refer to section +:numref:`config_tmp_dir` and write temporary files to a personal location rather than +the default shared :code:`/tmp` directory. + +.. _met_python_debug: + +MET_PYTHON_DEBUG +---------------- + +The MET_PYTHON_DEBUG environment variable can be set to turn one debugging log messages +related to Python embedding. By default, these log messages are disabled. It can be set +to a value of :code:`all` for all log messages, :code:`dataplane` for log messages when +reading gridded data, or :code:`point` for log messages when reading point data. + +.. _met_python_tmp_format: + +MET_PYTHON_TMP_FORMAT +--------------------- + +The MET_PYTHON_TMP_FORMAT environment variable defines whether temporary files for +Python embedding should be written as NetCDF files or using JSON and NumPy serialization. +By default, they are written as JSON (for attributes) and NumPy serialization (for data) +to avoid NetCDF library conflicts between MET and Python. Setting this environment +variable to :code:`netcdf` enables the use of temporary NetCDF files instead. + Settings Common to Multiple Tools ================================= From c0c3d69bf7d55eff20a6ef42af73feb1eb66d4ec Mon Sep 17 00:00:00 2001 From: John Halley Gotway Date: Wed, 31 Jan 2024 16:24:11 -0700 Subject: [PATCH 18/19] Per #2772, tweak the wording. --- docs/Users_Guide/config_options.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/Users_Guide/config_options.rst b/docs/Users_Guide/config_options.rst index f59c8f9a87..e3bf7ccd6e 100644 --- a/docs/Users_Guide/config_options.rst +++ b/docs/Users_Guide/config_options.rst @@ -520,10 +520,11 @@ the default shared :code:`/tmp` directory. MET_PYTHON_DEBUG ---------------- -The MET_PYTHON_DEBUG environment variable can be set to turn one debugging log messages -related to Python embedding. By default, these log messages are disabled. It can be set -to a value of :code:`all` for all log messages, :code:`dataplane` for log messages when -reading gridded data, or :code:`point` for log messages when reading point data. +The MET_PYTHON_DEBUG environment variable can be set to enable debugging log messages +related to Python embedding. These log messages are disabled by default. The environment +variable cab be set to a value of :code:`all` for all log messages, :code:`dataplane` +for log messages when reading gridded data, or :code:`point` for log messages when +reading point data. .. _met_python_tmp_format: @@ -531,8 +532,8 @@ MET_PYTHON_TMP_FORMAT --------------------- The MET_PYTHON_TMP_FORMAT environment variable defines whether temporary files for -Python embedding should be written as NetCDF files or using JSON and NumPy serialization. -By default, they are written as JSON (for attributes) and NumPy serialization (for data) +Python embedding should be written as NetCDF files or using JSON/NumPy serialization. +By default, they are written using JSON for attributes and NumPy serialization for data to avoid NetCDF library conflicts between MET and Python. Setting this environment variable to :code:`netcdf` enables the use of temporary NetCDF files instead. From e3343f77297c71b6930d866aa2802c4cdf0c7817 Mon Sep 17 00:00:00 2001 From: Julie Prestopnik Date: Thu, 1 Feb 2024 09:29:15 -0700 Subject: [PATCH 19/19] Fixing typo --- docs/Users_Guide/config_options.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Users_Guide/config_options.rst b/docs/Users_Guide/config_options.rst index e3bf7ccd6e..5b533a1bb1 100644 --- a/docs/Users_Guide/config_options.rst +++ b/docs/Users_Guide/config_options.rst @@ -522,7 +522,7 @@ MET_PYTHON_DEBUG The MET_PYTHON_DEBUG environment variable can be set to enable debugging log messages related to Python embedding. These log messages are disabled by default. The environment -variable cab be set to a value of :code:`all` for all log messages, :code:`dataplane` +variable can be set to a value of :code:`all` for all log messages, :code:`dataplane` for log messages when reading gridded data, or :code:`point` for log messages when reading point data.