1) Improve debug logging, 2) Update input/output database overwrite f…

…eature, 3) Include Zenodo json (addresses #7)
USDA-ARS-NWRC · Oct 1, 2019 · f321849 · f321849
1 parent 67ff234
commit f321849
Show file tree

Hide file tree

Showing 19 changed files with 377 additions and 157 deletions.
diff --git a/.zenodo.json b/.zenodo.json
@@ -0,0 +1,22 @@
+{
+
+    "license": "other-open",
+    "title": "Snow and Water Model Analysis and Visualization (SNOWAV)",
+    "version": "v0.10.3",
+    "upload_type": "software",
+    "keywords": [
+        "snow modeling",
+        "water resources",
+		    "energy balance",
+        "water supply forecasting",
+        "automated water supply model"
+    ],
+    "creators": [
+        {"affiliation": "USDA Agricultural Research Service",
+         "name": "Mark Robertson",
+			   "orcid": "0000-0003-4621-0161"}
+    ],
+
+    "access_right": "open"
+
+}
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ SNOWAV was developed at the USDA Agricultural Research Service in Boise, Idaho.
 
 ## Requirements
 Currently snowav requires:
-- awsm model results in awsm_daily format, including output files in the paths ```.../runs/runYYYYMMDD/snow.nc``` and ```.../runs/runYYYYMMDD/em.nc```
+- awsm model results in awsm_daily format, including output files in the paths ```.../runs/runYYYYMMDD/snow.nc``` and ```.../runs/runYYYYMMDD/em.nc```, and input files in the paths ```.../data/dataYYYYMMDD/smrfOutputs/```
 - topo.nc files that have been created by the basin_setup package
 - correct date information in all snow.nc files
 
@@ -77,11 +77,55 @@ database:           mysql+mysqlconnector://<user>:<pwd>@172.17.0.2/snowav
 Config field [snowav] *masks* can be left blank, and will default to the long_name fields in the topo.nc file. To subset the number of basins processed and plotted, use *masks* with a list: <br/> *masks: San Joaquin River Basin, Main, South Fork* <br/>
 To replace the plot labels, use the *plotlabels* field in combination with *masks*: <br/> *masks: San Joaquin River Basin, Main, South Fork* <br/> *plotlabels: San Joaquin, Mammoth, South Fork*
 
-Setting config option [inflow] *inflow: True* triggers reading in operator-generated inflow and the inflow figure. Currently only configured to work with the Tuolumne, and 'FORM11' excel sheets.
+- Setting config option [inflow] *inflow: True* triggers reading in operator-generated inflow and the inflow figure. Currently only configured to work with the Tuolumne, and 'FORM11' excel sheets.
 
-## SNOWAV processing utility
+- Config option [diagnostics] *inputs_table: True* and associated fields will trigger summary processing of smrf input data (see CoreConfig.ini for more information). Use [plots] *inputs: True* to make figures for inputs data.
+
+## SNOWAV Processing Utility
 The snowav processing utility snowav.utils.utilities.calculate can be used to make simple calculations on snow.nc and em.nc files. See scripts.sample_process.py for an example.
 
+## DataFrame from Existing Database Records
+This simple sample script shows pulling a DataFrame of existing database records. Use this in combination with a snowav query to find and pull records that you want.
+
+```
+from datetime import datetime
+from snowav.database.database import connect, collect
+from snowav.utils.utilities import masks
+
+# These settings will change depending on the basin, time frame, run, and value
+# you are interested in
+dempath = '/home/ops/wy2019/kings/topo/topo.nc'
+start_date = datetime(2019,3,1)
+end_date = datetime(2019,7,30)
+value = 'swe_vol'
+run_name = 'kings_wy2019_ops'
+
+# These are logins for the snowav database, and shouldn't need to change
+sql = 'snowav'
+user = ''
+password = ''
+host = '172.17.0.2'
+port  = '3306'
+
+value_options = ['swe_vol','swe_z','swi_vol','swi_z','precip_vol','precip_z',
+                 'density','coldcont','depth','evap_z']
+
+if value not in value_options:
+    raise Exception("'value' must be one of {}".format(value_options))
+
+# Get the list of basins we want by reading the topo.nc file
+out = masks(dempath, False)
+basin_list = out['plotorder']
+
+# Establish the snowav database connection and get the 'basins' dictionary we
+# need for pulling results
+basins, cnx, out = connect(None,sql,basin_list,user,password,host,port,True)
+
+# Get snowav database results in a DataFrame
+df = collect(cnx, basin_list, basins, start_date, end_date, value, run_name,
+             'total', 'daily')
+```
+
 ## Figures from Existing Database Records
 If results have already been processed and put onto a database, figures can be created outside of a snowav processing run (see also scripts/sample_figure.py). See snowav.framework.figures for templates for additional figure creation. Also, if a standard snowav run is processed with [plots] *print_args_dict: True*, the full input dictionary for each figure will be printed to the screen.
 

diff --git a/snowav/config/CoreConfig.ini b/snowav/config/CoreConfig.ini
@@ -50,19 +50,14 @@ run_name:       type = string,
                 match with individual model runs and should generally match
                 what the user intends to process in run directory field.
 
-log_level:      default = debug,
+log_level:      default = info,
                 options = [debug info error],
                 description = Logging.
 
 log_to_file:    default = True,
                 type = bool,
                 description = Logging.
 
-print_db_connection: default = False,
-                type = bool,
-                description = Print basins dictionary and database connection
-                string. Intended for debugging.
-
 [database]
 user:           type = string,
                 default = None,
@@ -103,6 +98,11 @@ add_basins:     type = bool,
                 in via the topo.nc file do not already exist in the database
                 they will be added as a new basin.
 
+overwrite:      type = bool,
+                default = False,
+                description = Overwrite existing records on the database. This
+                applies to outputs as well as inputs.
+
 [run]
 directory:      default = None,
                 type = CriticalDirectory,
@@ -252,18 +252,18 @@ inputs_variables: type = string list,
 inputs_methods: type = string list,
                 options = [std min max mean median percentile nanstd nanmin
                            nanmax nanmean nanmedian nanpercentile],
-                default = [nanstd nanmin nanmax nanmean nanpercentile],
+                default = [nanmean nanpercentile],
                 description = Inputs to summarize.
 
 inputs_percentiles: type = int list,
-                default = [5 95],
+                default = [25 75],
                 description = Percentiles to use if inputs_methods percentile
                 or nanpercentile is used.
 
 inputs_basins: type = string list,
                default = None,
                description = List of basins to use in inputs plot. If
-               left blank the default is all basins. If basins are supplied
+               left blank the default is the full basin. If basins are supplied
                they must match the masks field in the snowav section.
 
 [plots]
@@ -347,10 +347,6 @@ basin_total:    default = True,
                 type = bool,
                 description = Boolean for making figure.
 
-pixel_swe:      default = False,
-                type = bool,
-                description = Boolean for making figure.
-
 stn_validate:   default = False,
                 type = bool,
                 description = Boolean for making figure. Must also supply
@@ -429,7 +425,7 @@ update_numbers: default = None,
                 in update_file will be used. Dates of flights are also applied
                 as vertical lines in basin_total plot.
 
-write_properties: default = None,
+write_properties: default = [swe_vol swi_vol],
                 type = string,
                 options = [swe_vol swe_avail swe_unavail swe_z swi_vol swi_z
                            precip_vol precip_z depth density rain_z evap_z

diff --git a/snowav/database/database.py b/snowav/database/database.py
@@ -14,12 +14,12 @@
 from sys import exit
 import warnings
 
-
+# Fix these two by pulling smrf and awsm versions from netcdf
 try:
     import smrf
     smrf_version = smrf.__version__
 except:
-    print('Could not import smrf')
+    print('Could not import smrf, database smrf version will be "unknown"')
     smrf_version = 'unknown'
 
 try:
@@ -29,7 +29,7 @@
     awsm_version = awsm.__version__
 
 except:
-    print('Could not import awsm')
+    print('Could not import awsm, database smrf version will be "unknown"')
     awsm_version = 'unknown'
 
 def make_session(connector):
@@ -176,7 +176,7 @@ def collect(connector, plotorder, basins, start_date, end_date, value,
                     df = df.rename(columns={'value':bid})
 
                 else:
-                    df.loc[e.index,bid] = e['value'].values
+                    df[bid] = e['value']
 
             df.sort_index(inplace=True)
 
@@ -190,7 +190,11 @@ def collect(connector, plotorder, basins, start_date, end_date, value,
                                     'elev={}, {} to {}'. format(run_name, elev,
                                     start_date, end_date))
                 else:
-                    df.loc[elev,bid] = e['value'].values
+                    if e['value'].values[0] is None:
+                        df.loc[elev,bid] = np.nan
+                    else:
+                        df.loc[elev,bid] = e['value'].values
+
 
         if method == 'difference':
             for elev in edges:
@@ -217,7 +221,7 @@ def collect(connector, plotorder, basins, start_date, end_date, value,
                                     'elev={}, {} to {}'. format(run_name, elev,
                                     start_date, end_date))
                 else:
-                    df.loc[elev,bid] = e['value'].sum()
+                    df.loc[elev,bid] = e['value'].sum(skipna=False)
 
     return df
 
@@ -304,7 +308,7 @@ def delete(connector, basins, start_date, end_date, bid, run_name):
     basin_id = int(basins[bid]['basin_id'])
     session = make_session(connector)
 
-    logger.append(' Deleting existing database records for {}, {}, {} '.format(
+    logger.append(' Deleting existing records for {}, {}, {} '.format(
                   bid, run_name, start_date.date()))
 
     # Get the run_id
@@ -467,7 +471,7 @@ def run_metadata(self, run_name):
         qry = session.query(VariableUnits).filter(VariableUnits.run_id == self.run_id)
         session.close()
         df = pd.read_sql(qry.statement, qry.session.connection())
-        self.vid[v] = df['id'].values[0]
+        self.vid[v] = df[df['variable'] == v]['id'].values[0]
 
     # snow_line
     variables = {'run_id':self.run_id,
@@ -504,7 +508,8 @@ def run_metadata(self, run_name):
         qry = session.query(VariableUnits).filter(VariableUnits.run_id == self.run_id)
         session.close()
         df = pd.read_sql(qry.statement, qry.session.connection())
-        self.vid[v] = df['id'].values[0]
+
+        self.vid[v] = df[df['variable'] == v]['id'].values[0]
 
 
 def connect(sqlite = None, sql = None, plotorder = None, user = None,

diff --git a/snowav/framework/figures.py b/snowav/framework/figures.py
@@ -83,6 +83,9 @@ def figures(self):
 
         self.flight_diff_fig_names, self.flight_delta_vol_df = flt_image_change(args, self._logger)
 
+        if self.flight_diff_fig_names == []:
+            self.flt_flag = False
+
     if self.swi_flag:
         image = np.zeros_like(self.outputs['swi_z'][0])
         for n in range(self.ixs,self.ixe):
@@ -214,10 +217,10 @@ def figures(self):
         swe_per = collect(connector, args['plotorder'], args['basins'],
                          args['start_date'], args['end_date'], 'swe_z',
                          args['run_name'], args['edges'], 'daily')
-        density = collect(connector, args['plotorder'], args['basins'],
+        rho = collect(connector, args['plotorder'], args['basins'],
                          wy_start, args['end_date'], 'density',
                          args['run_name'], args['edges'], 'daily')
-        density_per = collect(connector, args['plotorder'], args['basins'],
+        rho_per = collect(connector, args['plotorder'], args['basins'],
                          args['start_date'], args['end_date'], 'density',
                          args['run_name'], args['edges'], 'daily')
         snow_line = collect(connector, args['plotorder'], args['basins'],
@@ -238,10 +241,10 @@ def figures(self):
         first_row = swe_per.iloc[[0]].values[0]
         swe_per = swe_per.apply(lambda row: row - first_row, axis=1)
 
-        density = density.fillna(0)
-        density_per = density_per.fillna(0)
-        first_row = density_per.iloc[[0]].values[0]
-        density_per = density_per.apply(lambda row: row - first_row, axis=1)
+        rho = rho.fillna(0)
+        rho_per = rho_per.fillna(0)
+        first_row = rho_per.iloc[[0]].values[0]
+        rho_per = rho_per.apply(lambda row: row - first_row, axis=1)
 
         precip = precip.fillna(0)
         precip_per = precip_per.fillna(0)
@@ -259,8 +262,8 @@ def figures(self):
         args['precip_per'] = precip_per
         args['swe'] = swe
         args['swe_per'] = swe_per
-        args['density'] = density
-        args['density_per'] = density_per
+        args['density'] = rho
+        args['density_per'] = rho_per
         args['elevlbl'] = self.elevlbl
 
         diagnostics(args, self._logger)
@@ -349,7 +352,6 @@ def figures(self):
 
         if self.mysql is not None:
             dbs = 'sql'
-
         else:
             dbs = 'sqlite'
 

diff --git a/snowav/framework/framework.py b/snowav/framework/framework.py
@@ -8,6 +8,7 @@
 from snowav.report.report import report
 from snowav.database.database import run_metadata
 from snowav.inflow.inflow import excel_to_csv
+from datetime import datetime
 
 class snowav(object):
 
@@ -56,8 +57,8 @@ def __init__(self, config_file = None, external_logger = None, awsm = None):
         self.precip_flag, out, pre, rain, density = process(self.pargs)
 
         # gather process() outputs
-        for log in out:
-            self._logger.info(log)
+        # for log in out:
+        #     self._logger.info(log)
 
         self.density = density
         self.rain_total = rain
@@ -109,4 +110,6 @@ def __init__(self, config_file = None, external_logger = None, awsm = None):
         if self.report_flag:
             report(self)
 
-        self._logger.info(' Completed snowav processing!')
+        elapsed = str(datetime.now() - self.proc_time_start)
+
+        self._logger.info(' Completed snowav processing, elapsed time: {}'.format(elapsed))
diff --git a/snowav/framework/outputs.py b/snowav/framework/outputs.py
@@ -8,7 +8,7 @@
 import netCDF4 as nc
 
 def outputs(run_dirs = None, start_date = None, end_date = None,
-            filetype = None, wy = None, flight_dates = None):
+            filetype = None, wy = None, flight_dates = None, loglevel = None):
     '''
     This uses start_date and end_date to load the snow.nc and em.nc of interest
     within a report period to the outputs format that will be used in process().
@@ -47,6 +47,7 @@ def outputs(run_dirs = None, start_date = None, end_date = None,
 
     '''
 
+    log = []
     rdict = {}
     dirs = copy.deepcopy(run_dirs)
     outputs = {'swi_z':[], 'evap_z':[], 'snowmelt':[], 'swe_z':[],'depth':[],
@@ -61,6 +62,9 @@ def outputs(run_dirs = None, start_date = None, end_date = None,
         for path in dirs:
             snowfile = os.path.join(path, 'snow.nc')
 
+            if loglevel == 'DEBUG':
+                log.append(' Reading date: {}'.format(snowfile))
+
             # Consider making this a warning, with an else: .remove(path)
             # to catch other files that are in these directories
             if not os.path.isfile(snowfile):
@@ -83,6 +87,8 @@ def outputs(run_dirs = None, start_date = None, end_date = None,
                 # Only load the rundirs that we need
                 if (t.date() >= start.date()) and (t.date() <= end.date()):
 
+                    log.append(' Loading: {}'.format(snowfile))
+
                     st_hr = calculate_wyhr_from_date(start)
                     en_hr = calculate_wyhr_from_date(end)
 
@@ -134,4 +140,4 @@ def outputs(run_dirs = None, start_date = None, end_date = None,
                     outputs['dates'].append(output.dates[idx])
                     outputs['time'].append(output.time[idx])
 
-    return outputs, dirs, run_dirs, rdict
+    return outputs, dirs, run_dirs, rdict, log