Merge pull request NCAR#50 from jmccreight/master

job, scheduler objects
jmccreight · May 2, 2018 · df94727 · df94727
2 parents 28d100e + e07b1b5
commit df94727
Show file tree

Hide file tree

Showing 15 changed files with 3,303 additions and 195 deletions.
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
     packages=find_packages(),
     url='https://github.com/NCAR/wrf_hydro_py',
     license='MIT',
-    install_requires=['pandas','f90nml','deepdiff','pathlib','xarray','datetime','pytest','pytest-datadir-ng'],
+    install_requires=['pandas','f90nml','deepdiff','pathlib','xarray','datetime','pytest','pytest-datadir-ng','boltons'],
     author='Joe Mills',
     author_email='jmills@ucar.edu',
     description='Crude API for the WRF-Hydro model',

diff --git a/wrfhydro/ensemble.py b/wrfhydro/ensemble.py
@@ -0,0 +1,223 @@
+from wrf_hydro_model import *
+from deepdiff import DeepDiff
+from boltons.iterutils import remap
+import copy
+
+
+# ########################
+class DeepDiffEq(DeepDiff):
+
+    def __init__(self,
+                 t1,
+                 t2,
+                 eq_types,
+                 ignore_order=False,
+                 report_repetition=False,
+                 significant_digits=None,
+                 exclude_paths=set(),
+                 exclude_regex_paths=set(),
+                 exclude_types=set(),
+                 include_string_type_changes=False,
+                 verbose_level=1,
+                 view='text',
+                 **kwargs):
+
+        # Must set this first for some reason.
+        self.eq_types = set(eq_types)
+
+        super().__init__(t1,
+                         t2,
+                         ignore_order=False,
+                         report_repetition=False,
+                         significant_digits=None,
+                         exclude_paths=set(),
+                         exclude_regex_paths=set(),
+                         exclude_types=set(),
+                         include_string_type_changes=False,
+                         verbose_level=1,
+                         view='text',
+                         **kwargs)
+
+    # Have to force override __diff_obj.
+    def _DeepDiff__diff_obj(self, level, parents_ids=frozenset({}),
+                            is_namedtuple=False):
+        """Difference of 2 objects using their __eq__ if requested"""
+
+        if type(level.t1) in self.eq_types:
+            if level.t1 == level.t2:
+                return
+            else:
+                self._DeepDiff__report_result('values_changed', level)
+                return
+
+        super(DeepDiffEq, self).__diff_obj(level, parents_ids=frozenset({}),
+                                           is_namedtuple=False)
+
+
+
+
+
+def copy_member(member,
+                do_copy: bool):
+    if do_copy:
+        return(copy.deepcopy(member))
+    else:
+        return(member)
+
+# ########################
+# Classes for constructing and running a wrf_hydro simulation
+class WrfHydroEnsembleSim(object):
+    """Class for a WRF-Hydro model, which consitutes the model source code and compiled binary.
+    """
+    def __init__(self,
+                 members: list,
+                 ensemble_dir: str='' ):
+        """Instantiate a WrfHydroEnsembleSim object.
+        Args:
+            members: 
+            ensemble_dir: Optional, 
+        Returns:
+            A WrfHydroEnsembleSim object.
+        """
+        self.__members = []
+        self.members = members
+        self.__members_dict = {}
+        """list: of WrfHydroSim objects."""
+
+        # Several simulation properties are not specified
+        # until run time. Place them here
+        self.ens_dir = ''
+
+
+    # Data to store in the ensemble object
+    # 1) list of simulations = the ensemble
+    # 2) N = __len__(), @property
+    # 3) ensemble dir, the directory containing the ensemble member_dir run dirs
+
+
+    def __len__(self):
+        return( len(self.members) )
+
+
+    # The "canonical" name for len
+    @property
+    def N(self):
+        return(self.__len__())
+
+
+    # Data to store with the "member" simulations, conceptually this
+    # data belongs to the members:
+    # 1) member number
+    # 2) description
+    # 3) member_dir
+    # 4) forcing_source_dir
+    #
+    # Ensemblize the individual members.
+    # Except for changing the Class definition, why
+    # would I define a child class instead of just adding attributes?
+
+
+    @property
+    def members(self):
+        return(self.__members)
+
+    @members.setter
+    def members(self,
+                     new_members: list, 
+                     copy_members: bool=True):
+
+        if( type(new_members) is not list ):
+            new_members = [ new_members ]
+
+        for nn in new_members:
+            self.__members.append(copy_member(nn, copy_members))
+            # If copying an existing ensemble member, nuke the metadata
+            # number is the detector for all ensemble metadata.
+            if hasattr(nn, 'number'):
+                delattr(self.__members[len(self.__members)-1], 'number')
+
+        # Put refs to these properties in the ensemble objects
+        for mm in range(len(self.__members)):
+            if not hasattr(self.__members[mm], 'number'):
+                self.__members[mm].number = -1
+                self.__members[mm].description = ''
+                self.__members[mm].run_dir = ''
+                self.__members[mm].forcing_source_dir = ''
+
+
+    # A quick way to setup a basic ensemble from a single sim.
+    def replicate_member(self,
+                         N: int,
+                         copy_members: bool=True):
+        if self.N > 1:
+            print('WTF mate?')
+        else:
+            self.members = [ self.members[0] for nn in range(N-1) ]
+
+
+    @property        
+    def members_dict(self):
+        m_dict = self.__members_dict
+        for mm in range(len(self.members)):
+            self.members[mm].number = mm
+        m_dict['number'] = [ mm.number for mm in self.members ]
+        m_dict['description'] = [ mm.description for mm in self.members ]
+        m_dict['run_dir'] = [ mm.run_dir for mm in self.members ]
+        m_dict['forcing_source_dir'] = [ mm.forcing_source_dir for mm in self.members ]
+        return(m_dict)
+
+    @members_dict.setter
+    def members_dict(self,
+                     att_path_key: str,
+                     values: list): 
+        m_dict = self.__members_dict
+
+        m_dict[att_path_key] =[]
+
+        att_path_key_tuple =  tuple(map(str, att_path_key.split('/')))
+        att_key = att_path_key_tuple[len(key_path_tuple)-1]
+        att_path = key_path_tuple[0:(len(key_path_tuple)-1)]
+
+        def visit(path, key, value):
+            if path == att_path:
+                if key == 'att_key':
+                    m_dict[att_path_key] = m_dict[att_path_key].append(value)
+
+        for mm in self.members:            
+            remap(mm.__dict__, visit=visit)
+
+
+
+    # Would want a method for detecting differences between ensemble members
+    # instead of just specifying them... 
+
+
+
+    # def get_ens_attributes(self, attribute, the_key):
+
+    #     # Parse up the attribute
+    #     return_list = []
+
+    #     def visit_path_key(path, key, value):
+    #         if key == the_key:
+    #             return_list.append(value) #print(path, key, value)
+    #             return key, value
+    #         return key, value
+
+    #     def remap_path_key(ll):
+    #         return(remap(ll, visit_path_key))
+
+    #     att_list = [remap_path_key(getattr(i, attribute)) for i in self.members ]
+    #     #att_list = [ i.hydro_namelist['nudging_nlist']['nlastobs'] for i in self.members ]
+    #     return(return_list)
+
+
+#Ens:
+#Run method checks run dir name differences
+#Run dir names
+#Print differences across all fields, incl namelists
+#Job array submission
+#Operations on data.
+#Bulk edit of name lists: Run start and end times, etc.
+#Forcing source and run dirs (preprocess the run forcings for the run period)
+
diff --git a/wrfhydro/one_off_scripts/example_1.py b/wrfhydro/one_off_scripts/example_1.py
@@ -0,0 +1,121 @@
+WRF_HYDRO_NWM_PATH=/Users/${USER}/WRF_Hydro/wrf_hydro_nwm_myFork
+WRF_HYDRO_PY_PATH=/Users/${USER}/WRF_Hydro/wrf_hydro_py
+
+docker create --name croton wrfhydro/domains:croton_NY
+## The complement when youre done with it:
+## docker rm -v sixmile_channel-only_test
+
+docker run -it \
+    -v ${WRF_HYDRO_NWM_PATH}:/wrf_hydro_nwm \
+    -v ${WRF_HYDRO_PY_PATH}:/home/docker/wrf_hydro_py \
+    --volumes-from croton \
+    wrfhydro/dev:conda
+
+#######################################################
+cp -r /wrf_hydro_nwm /home/docker/wrf_hydro_nwm
+python
+
+#######################################################
+import sys
+from pprint import pprint
+sys.path.insert(0, '/home/docker/wrf_hydro_py/wrfhydro')
+from wrf_hydro_model import *
+from utilities import *
+
+# ######################################################
+# Model Section
+# There are implied options here
+# What is the argument? Are there more arguments?
+theModel = WrfHydroModel('/home/docker/wrf_hydro_nwm/trunk/NDHMS')
+
+# The attributes of the model object.
+# Note: menus of both compile options and namelists (run-time options) are now in the
+# repository. These menus of namelists come in with the creation of the model
+# object. Note that while the compile time options can be used in the compile method
+# on the model object, the namelists are used only in simulation objects (where
+# the model is actually run on a domain).
+
+# Compile options are not yet version/configed in the json namelist
+pprint(theModel.compile_options)
+
+pprint(theModel.hrldas_namelists)
+
+pprint(theModel.hydro_namelists)
+
+pprint(theModel.source_dir)
+pprint(theModel.version)
+
+# The only method on the model (it is independent of domain).
+# Should be able to pass version/configuration to the compile. Currently not args.
+# What are other arguments here? Might just show help.
+theModel.compile('gfort')
+# The compilation results in the following new attributes/slots
+## {'__slotnames__', 'compile_dir', 'compiler', 'wrf_hydro_exe', 'table_files', 'configure_log', 'object_id', 'compile_log'}
+pprint(theModel.compiler)
+pprint(theModel.compile_dir)
+# Resulting binary
+pprint(theModel.wrf_hydro_exe)
+# The parameter table files which result from compiling.
+pprint(theModel.table_files)
+# Logs of config and compile
+print(theModel.configure_log.stdout.decode('utf-8'))
+print(theModel.configure_log.stderr.decode('utf-8'))
+print(theModel.compile_log.stdout.decode('utf-8'))
+prtin(theModel.compile_log.stderr.decode('utf-8'))
+# An object that needs some description......
+pprint(theModel.object_id)
+
+# ######################################################
+# Domain Section
+theDomain = WrfHydroDomain(domain_top_dir='/home/docker/domain/croton_NY',
+                           model_version='v1.2.1',
+                           domain_config='NWM')
+
+# Note: The domain has no methods!
+# Examine the attributes, skip the attributes set in the call.
+
+# Each domain has 2 kinds of files which are actually independent of
+# version+configuration: Forcing and nudging files.
+pprint(theDomain.forcing_dir)
+pprint(theDomain.nudging_files)
+
+# The choice of domain+version+configuration specifices certain "patches" to the
+# base namelists that were in the model object. Note that none of these are physics
+# options, they are only domain-specific files, domain-specific times, and restart
+# output frequencies.
+# The patches are held in/with the individual domains. The patch files is
+# specified here
+pprint(theDomain.namelist_patch_file)
+# The patches are contained here
+pprint(theDomain.namelist_patches)
+
+# The specific hydro and lsm files found in the patches are listed in the following fields. 
+# These are patch fields which are files and can be opened with xarray.
+pprint(theDomain.hydro_files)
+
+# WrfHydroStatic objects can be opened via xarray?
+pprint(theDomain.lsm_files)
+
+
+# ######################################################
+# Simulation Section
+# simulation object = model object + domain object
+# Note that CHANGING THE MODEL OR DOMAIN OBJECTS WILL CHANGE THE SIMULATION
+# OBJECT ONLY BEFORE IT IS RUN. 
+theSim = WrfHydroSim(theModel, theDomain)
+
+pprint(theSim.hydro_namelist)
+pprint(theSim.namelist_hrldas)
+
+# Edit an object in theDom
+id1=theSim.model.object_id
+# '3451646a-2cae-4b1f-9c38-bd8725e1c55f'
+
+# Dress up the example to show the object is copied. A small point.
+theModel.compile('gfort', compile_options={'WRF_HYDRO_NUDGING':1})
+theModel.object_id
+theSim.model.object_id
+
+##
+theRun = theSim.run('/home/docker/testRun1', overwrite=True)
+theRun.chanobs.open()