Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature #2772 python embedding json #2803

Merged
merged 20 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 10 additions & 15 deletions internal/test_unit/xml/unit_python.xml
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@
<test name="python_numpy_point_stat">
<exec>&MET_BIN;/point_stat</exec>
<env>
<pair><name>MET_PYTHON_EXE</name> <value>&MET_PYTHON_EXE;</value></pair>
<pair><name>FCST_COMMAND</name> <value>&MET_BASE;/python/examples/read_ascii_numpy.py &DATA_DIR_PYTHON;/fcst.txt FCST</value></pair>
<pair><name>OBS_COMMAND</name> <value>&MET_BASE;/python/examples/read_ascii_numpy.py &DATA_DIR_PYTHON;/obs.txt OBS</value></pair>
</env>
Expand Down Expand Up @@ -480,21 +481,6 @@
</output>
</test>

<test name="python_point2grid_pb2nc_TMP">
<exec>&MET_BIN;/point2grid</exec>
<param> \
'PYTHON_NUMPY=&MET_BASE;/python/examples/read_met_point_obs.py &OUTPUT_DIR;/pb2nc/ndas.20120409.t12z.prepbufr.tm00.nc' \
G212 \
&OUTPUT_DIR;/python/pb2nc_TMP.nc \
-field 'name="TMP"; level="*"; valid_time="20120409_120000"; censor_thresh=[ &lt;0 ]; censor_val=[0];' \
-name TEMP \
-v 1
</param>
<output>
<grid_nc>&OUTPUT_DIR;/python/pb2nc_TMP.nc</grid_nc>
</output>
</test>

<!-- Invokes user-python logic to read a point obs -->
<test name="python_point2grid_pb2nc_TMP_user_python">
<exec>&MET_BIN;/point2grid</exec>
Expand Down Expand Up @@ -535,6 +521,7 @@
<test name="python_plot_point_obs_CONFIG">
<exec>&MET_BIN;/plot_point_obs</exec>
<env>
<pair><name>MET_PYTHON_EXE</name> <value>&MET_PYTHON_EXE;</value></pair>
<pair><name>TO_GRID</name> <value>NONE</value></pair>
</env>
<param> \
Expand All @@ -561,6 +548,7 @@
> &OUTPUT_DIR;/python/ensemble_stat/input_file_list; \
&MET_BIN;/ensemble_stat</exec>
<env>
<pair><name>MET_PYTHON_EXE</name> <value>&MET_PYTHON_EXE;</value></pair>
<pair><name>DESC</name> <value>NA</value></pair>
<pair><name>OBS_ERROR_FLAG</name> <value>FALSE</value></pair>
<pair><name>SKIP_CONST</name> <value>FALSE</value></pair>
Expand All @@ -587,6 +575,7 @@
<test name="python_point_stat_GRIB1_NAM_GDAS_WINDS">
<exec>&MET_BIN;/point_stat</exec>
<env>
<pair><name>MET_PYTHON_EXE</name> <value>&MET_PYTHON_EXE;</value></pair>
<pair><name>BEG_DS</name> <value>-1800</value></pair>
<pair><name>END_DS</name> <value>1800</value></pair>
<pair><name>OUTPUT_PREFIX</name> <value>GRIB1_NAM_GDAS_WINDS</value></pair>
Expand All @@ -605,6 +594,9 @@
</test>

<test name="python_plot_data_plane_SEMILATLON_ZONAL_MEAN">
<env>
<pair><name>MET_PYTHON_EXE</name> <value>&MET_PYTHON_EXE;</value></pair>
</env>
<exec>&MET_BIN;/plot_data_plane</exec>
<param> \
PYTHON_NUMPY \
Expand All @@ -619,6 +611,9 @@
</test>

<test name="python_pcp_combine_SEMILATLON_MERIDIONAL_MEAN">
<env>
<pair><name>MET_PYTHON_EXE</name> <value>&MET_PYTHON_EXE;</value></pair>
</env>
<exec>&MET_BIN;/pcp_combine</exec>
<param> \
-add PYTHON_NUMPY \
Expand Down
4 changes: 2 additions & 2 deletions scripts/python/examples/read_ascii_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
###########################################

def log(msg):
dataplane.log_msg(msg)
dataplane.log_message(msg)

def set_dataplane_attrs():
# attrs is a dictionary which contains attributes describing the dataplane.
Expand Down Expand Up @@ -95,5 +95,5 @@ def set_dataplane_attrs():
attrs = set_dataplane_attrs()
log("Attributes:\t" + repr(attrs))

# Sets fill_value if it exists
# Sets fill_value if it exists at the dataplane
#attrs['fill_value'] = 255 # for letter.txt
7 changes: 5 additions & 2 deletions scripts/python/examples/read_ascii_numpy_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@
met_data = dataplane.read_2d_text_input(input_file)
print("Data Shape:\t" + repr(met_data.shape))
print("Data Type:\t" + repr(met_data.dtype))
except NameError:
print("Can't find the input file")
except NameError as ex:
print(" === ERROR from read_ascii_numpy_grid.py")
print(f" Exception: {type(ex)} {ex}")
print(f" sys.argv: {sys.argv}")
print(" Can't find the input file")

# attrs is a dictionary which contains attributes describing the dataplane.
# attrs should have 9 items, each of data type string:
Expand Down
2 changes: 1 addition & 1 deletion scripts/python/examples/read_ascii_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
###########################################

def log(msg):
dataplane.log_msg(msg)
dataplane.log_message(msg)

log("Python Script:\t" + repr(sys.argv[0]))

Expand Down
8 changes: 5 additions & 3 deletions scripts/python/examples/read_met_point_obs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from datetime import datetime

from met.point import met_point_tools
from met.point_nc import met_point_nc_tools
from pyembed.python_embedding import pyembed_tools

ARG_PRINT_DATA = 'show_data'
Expand All @@ -44,14 +45,15 @@
netcdf_filename = os.path.expandvars(input_name)
args = [ netcdf_filename ]
#args = { 'nc_name': netcdf_filename }
point_obs_data = met_point_tools.get_nc_point_obs()
point_obs_data = met_point_nc_tools.get_nc_point_obs()
point_obs_data.read_data(point_obs_data.get_nc_filename(args))

if point_obs_data is not None:
met_point_data = point_obs_data.get_point_data()
met_point_data['met_point_data'] = point_obs_data
print("met_point_data: ", met_point_data)
print(met_point_data)
if os.getenv("MET_PYTHON_DEBUG", "") != "":
print("met_point_data: ", met_point_data)
print(met_point_data)

if DO_PRINT_DATA:
point_obs_data.dump()
Expand Down
1 change: 1 addition & 0 deletions scripts/python/met/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pythonmetscripts_DATA = \
logger.py \
dataplane.py \
mprbase.py \
point_nc.py \
point.py

EXTRA_DIST = ${pythonmetscripts_DATA}
Expand Down
1 change: 1 addition & 0 deletions scripts/python/met/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ pythonmetscripts_DATA = \
logger.py \
dataplane.py \
mprbase.py \
point_nc.py \
point.py

EXTRA_DIST = ${pythonmetscripts_DATA}
Expand Down
167 changes: 114 additions & 53 deletions scripts/python/met/dataplane.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,33 @@
import os
import sys
import json
import numpy as np
import netCDF4 as nc
import xarray as xr

from importlib import util as import_util
from met.logger import logger
from met.logger import met_base, met_base_tools

###########################################

class dataplane(logger):
class dataplane(met_base):

KEEP_XARRAY = True
class_name = "dataplane"

MET_FILL_VALUE = -9999.
ATTR_USER_FILL_VALUE = 'user_fill_value'

@staticmethod
def call_python(argv):
logger.log_msg(f"Module:\t{repr(argv[0])}")
# argv[0] is the python wrapper script (caller)
met_base.log_message(f"Module:\t{repr(argv[0])}")
if 1 == len(argv):
logger.quit(f"User command is missing")
met_base.quit_msg(f"User python command is missing")
sys.exit(1)

met_base.log_message(f"User python command:\t{repr(' '.join(argv[1:]))}")

logger.log_msg("User Command:\t" + repr(' '.join(argv[1:])))
# argv[0] is the python wrapper script (caller)
# argv[1] contains the user defined python script
pyembed_module_name = argv[1]
sys.argv = argv[1:]
logger.log_msg(f" sys.argv:\t{sys.argv}")

# append user script dir to system path
pyembed_dir, pyembed_name = os.path.split(pyembed_module_name)
if pyembed_dir:
Expand All @@ -40,11 +38,19 @@ def call_python(argv):

user_base = pyembed_name.replace('.py','')

argv_org = sys.argv # save sys.argv
sys.argv = argv[1:]
spec = import_util.spec_from_file_location(user_base, pyembed_module_name)
met_in = import_util.module_from_spec(spec)
spec.loader.exec_module(met_in)
sys.argv = argv_org # restore sys.argv
return met_in

#@staticmethod
#def get_numpy_filename(tmp_filename):
# return met_base_tools.replace_extension(tmp_filename, "json", "npy") if tmp_filename.endswith(".json") else \
# met_base_tools.replace_extension(tmp_filename, "nc", "npy") if tmp_filename.endswith(".nc") else f'{tmp_filename}.npy'

@staticmethod
def is_integer(a_data):
return isinstance(a_data, int)
Expand Down Expand Up @@ -100,7 +106,32 @@ def read_2d_text_input(input_file):
return met_data

@staticmethod
def read_dataplane(netcdf_filename):
def read_dataplane(tmp_filename):
# Default is JSON for attributes and NUMPY serialization for 2D array
return dataplane.read_dataplane_nc(tmp_filename) if met_base_tools.use_netcdf_format() \
else dataplane.read_dataplane_json_numpy(tmp_filename)

@staticmethod
def read_dataplane_json_numpy(tmp_filename):
if met_base_tools.is_debug_enabled("dataplane"):
met_base.log_message(f"Read from a temporary JSON file and a temporary numpy output (dataplane)")

met_info = {}
with open(tmp_filename) as json_fh:
met_info['attrs'] = json.load(json_fh)
# read 2D numeric data
numpy_dump_name = met_base_tools.get_numpy_filename(tmp_filename)
met_dp_data = np.load(numpy_dump_name)
met_info['met_data'] = met_dp_data
return met_info

@staticmethod
def read_dataplane_nc(netcdf_filename):
import netCDF4 as nc

if met_base_tools.is_debug_enabled("dataplane"):
met_base.log_message(f"Read from a temporary NetCDF file (dataplane)")

# read NetCDF file
ds = nc.Dataset(netcdf_filename, 'r')

Expand Down Expand Up @@ -135,7 +166,73 @@ def read_dataplane(netcdf_filename):
return met_info

@staticmethod
def write_dataplane(met_in, netcdf_filename):
def validate_met_data(met_data, fill_value=None):
method_name = f"{dataplane.class_name}.validate()"
#met_base.log_msg(f"{method_name} type(met_data)= {type(met_data)}")
attrs = None
from_xarray = False
from_ndarray = False
if met_data is None:
met_base.quit(f"{method_name} The met_data is None")
sys.exit(1)

nx, ny = met_data.shape

met_fill_value = met_base.MET_FILL_VALUE
if dataplane.is_xarray_dataarray(met_data):
from_xarray = True
attrs = met_data.attrs
met_data = met_data.data
modified_met_data = True
if isinstance(met_data, np.ndarray):
from_ndarray = True
met_data = np.ma.array(met_data)

if isinstance(met_data, np.ma.MaskedArray):
is_int_data = dataplane.is_integer(met_data[0,0]) or dataplane.is_integer(met_data[int(nx/2),int(ny/2)])
met_data = np.ma.masked_equal(met_data, float('nan'))
met_data = np.ma.masked_equal(met_data, float('inf'))
if fill_value is not None:
met_data = np.ma.masked_equal(met_data, fill_value)
met_data = met_data.filled(int(met_fill_value) if is_int_data else met_fill_value)
else:
met_base.log_message(f"{method_name} unknown datatype {type(met_data)}")

if dataplane.KEEP_XARRAY:
return xr.DataArray(met_data,attrs=attrs) if from_xarray else met_data
else:
return met_data

@staticmethod
def write_dataplane(met_in, tmp_filename):
# Default is JSON for attributes and NUMPY serialization for 2D array
if met_base_tools.use_netcdf_format():
dataplane.write_dataplane_nc(met_in, tmp_filename)
else:
dataplane.write_dataplane_json_numpy(met_in, tmp_filename)

@staticmethod
def write_dataplane_json_numpy(met_in, tmp_filename):
if met_base_tools.is_debug_enabled("dataplane"):
met_base.log_message(f"Save to a temporary JSON file and a temporary numpy output (dataplane)")
if hasattr(met_in.met_data, 'attrs') and met_in.met_data.attrs:
attrs = met_in.met_data.attrs
else:
attrs = met_in.attrs
with open(tmp_filename,'w') as json_fh:
json.dump(attrs, json_fh)

met_dp_data = met_base_tools.convert_to_ndarray(met_in.met_data)
numpy_dump_name = met_base_tools.get_numpy_filename(tmp_filename)
np.save(numpy_dump_name, met_dp_data)

@staticmethod
def write_dataplane_nc(met_in, netcdf_filename):
import netCDF4 as nc

if met_base_tools.is_debug_enabled("dataplane"):
met_base.log_message(f"Save to a temporary NetCDF file (dataplane)")

met_info = {'met_data': met_in.met_data}
if hasattr(met_in.met_data, 'attrs') and met_in.met_data.attrs:
attrs = met_in.met_data.attrs
Expand Down Expand Up @@ -171,42 +268,6 @@ def write_dataplane(met_in, netcdf_filename):

ds.close()

@staticmethod
def validate_met_data(met_data, fill_value=None):
method_name = f"{dataplane.class_name}.validate()"
#logger.log_msg(f"{method_name} type(met_data)= {type(met_data)}")
attrs = None
from_xarray = False
from_ndarray = False
if met_data is None:
logger.quit(f"{method_name} The met_data is None")
else:
nx, ny = met_data.shape

met_fill_value = dataplane.MET_FILL_VALUE
if dataplane.is_xarray_dataarray(met_data):
from_xarray = True
attrs = met_data.attrs
met_data = met_data.data
modified_met_data = True
if isinstance(met_data, np.ndarray):
from_ndarray = True
met_data = np.ma.array(met_data)

if isinstance(met_data, np.ma.MaskedArray):
is_int_data = dataplane.is_integer(met_data[0,0]) or dataplane.is_integer(met_data[int(nx/2),int(ny/2)])
met_data = np.ma.masked_equal(met_data, float('nan'))
met_data = np.ma.masked_equal(met_data, float('inf'))
if fill_value is not None:
met_data = np.ma.masked_equal(met_data, fill_value)
met_data = met_data.filled(int(met_fill_value) if is_int_data else met_fill_value)
else:
logger.log_msg(f"{method_name} unknown datatype {type(met_data)}")

if dataplane.KEEP_XARRAY:
return xr.DataArray(met_data,attrs=attrs) if from_xarray else met_data
else:
return met_data


def main(argv):
Expand All @@ -231,14 +292,14 @@ def main(argv):
fill_value = met_in.user_fill_value

fill_value = attrs.get('fill_value', None)
dataplane.log_msg('validating the dataplane array...')
met_base.log_message('validating the dataplane array...')
met_data = dataplane.validate_met_data(init_met_data, fill_value)
met_info['met_data'] = met_data

if os.environ.get('MET_PYTHON_DEBUG', None) is not None:
dataplane.log_msg('--- met_data after validating ---')
dataplane.log_msg(met_data)
met_base.log_message('--- met_data after validating ---')
met_base.log_message(met_data)

if __name__ == '__main__' or __name__ == sys.argv[0]:
main(sys.argv)
dataplane.log_msg(f'{__name__} complete')
met_base.log_message(f'{__name__} complete')
Loading
Loading