Merge pull request #184 from scotthavens/gridded_data

Input data overhaul
USDA-ARS-NWRC · Jul 16, 2020 · 8b65889 · 8b65889
2 parents b12a27b + bdd04a5
commit 8b65889
Show file tree

Hide file tree

Showing 46 changed files with 1,410 additions and 1,889 deletions.
diff --git a/docs/api/smrf.data.rst b/docs/api/smrf.data.rst
@@ -4,34 +4,50 @@ smrf.data package
 Submodules
 ----------
 
-smrf.data.loadData module
--------------------------
+smrf.data.csv module
+--------------------
 
-.. automodule:: smrf.data.loadData
+.. automodule:: smrf.data.csv
    :members:
    :undoc-members:
    :show-inheritance:
 
-smrf.data.loadGrid module
--------------------------
+smrf.data.hrrr\_grib module
+---------------------------
 
-.. automodule:: smrf.data.loadGrid
+.. automodule:: smrf.data.hrrr_grib
    :members:
    :undoc-members:
    :show-inheritance:
 
-smrf.data.loadTopo module
--------------------------
+smrf.data.load\_data module
+---------------------------
 
-.. automodule:: smrf.data.loadTopo
+.. automodule:: smrf.data.load_data
    :members:
    :undoc-members:
    :show-inheritance:
 
-smrf.data.mysql\_data module
-----------------------------
+smrf.data.load\_topo module
+---------------------------
 
-.. automodule:: smrf.data.mysql_data
+.. automodule:: smrf.data.load_topo
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+smrf.data.netcdf module
+-----------------------
+
+.. automodule:: smrf.data.netcdf
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+smrf.data.wrf module
+--------------------
+
+.. automodule:: smrf.data.wrf
    :members:
    :undoc-members:
    :show-inheritance:

diff --git a/docs/getting_started/run_smrf.rst b/docs/getting_started/run_smrf.rst
@@ -30,7 +30,7 @@ below is the function :mod:`run_smrf <smrf.framework.model_framework.run_smrf>`.
         s.loadTopo()
 
         # initialize the distribution
-        s.initializeDistribution()
+        s.create_distribution()
 
         # initialize the outputs if desired
         s.initializeOutput()

diff --git a/docs/user_guide/auto_config.rst b/docs/user_guide/auto_config.rst
@@ -112,115 +112,15 @@ csv
 | 
 
 
-mysql
------
-
-| **air_temp**
-| 	name of the table column containing station air temperature
-| 		*Default: air_temp*
-| 		*Type: string*
-| 
-
-| **cloud_factor**
-| 	name of the table column containing station cloud factor
-| 		*Default: cloud_factor*
-| 		*Type: string*
-| 
-
-| **data_table**
-| 	name of the database table containing station data
-| 		*Default: tbl_level2*
-| 		*Type: string*
-| 
-
-| **database**
-| 	name of the database containing station data
-| 		*Default: weather_db*
-| 		*Type: string*
-| 
-
-| **host**
-| 	IP address to server.
-| 		*Default: None*
-| 		*Type: string*
-| 
-
-| **metadata**
-| 	name of the database table containing station metadata
-| 		*Default: tbl_metadata*
-| 		*Type: string*
-| 
-
-| **password**
-| 	password used for database login.
-| 		*Default: None*
-| 		*Type: password*
-| 
-
-| **port**
-| 	Port for MySQL database.
-| 		*Default: 3606*
-| 		*Type: int*
-| 
-
-| **precip**
-| 	name of the table column containing station precipitation
-| 		*Default: precip_accum*
-| 		*Type: string*
-| 
-
-| **solar**
-| 	name of the table column containing station solar radiation
-| 		*Default: solar_radiation*
-| 		*Type: string*
-| 
-
-| **station_table**
-| 	name of the database table containing client and source
-| 		*Default: tbl_stations*
-| 		*Type: string*
-| 
-
-| **stations**
-| 	List of station IDs to use for distributing any of the variables
-| 		*Default: None*
-| 		*Type: station*
-| 
-
-| **user**
-| 	username for database login.
-| 		*Default: None*
-| 		*Type: string*
-| 
-
-| **vapor_pressure**
-| 	name of the table column containing station vapor pressure
-| 		*Default: vapor_pressure*
-| 		*Type: string*
-| 
-
-| **wind_direction**
-| 	name of the table column containing station wind direction
-| 		*Default: wind_direction*
-| 		*Type: string*
-| 
-
-| **wind_speed**
-| 	name of the table column containing station wind speed
-| 		*Default: wind_speed*
-| 		*Type: string*
-| 
-
-
 gridded
 -------
 
 | **data_type**
 | 	Type of gridded input data
-| 		*Default: hrrr_netcdf*
+| 		*Default: hrrr_grib*
 | 		*Type: string*
 | 		*Options:*
- *wrf hrrr_grib netcdf hrrr_netcdf*
+ *wrf hrrr_grib netcdf*
 | 
 
 | **hrrr_directory**
@@ -235,6 +135,14 @@ gridded
 | 		*Type: bool*
 | 
 
+| **hrrr_load_method**
+| 	Method to load the HRRR data either load all data first or for each timestep
+| 		*Default: first*
+| 		*Type: string*
+| 		*Options:*
+ *first timestep*
+| 
+
 | **netcdf_file**
 | 	Path to the netCDF file containing weather data
 | 		*Default: None*

diff --git a/docs/user_guide/input_data.rst b/docs/user_guide/input_data.rst
@@ -129,44 +129,8 @@ Example data files can be found in the ``tests`` directory for RME.
 MySQL Database
 ``````````````
 
-The MySQL database is more flexible than CSV files but requires more effort to setup. However,
-SMRF will only import the data and stations that were requested without loading in additional
-data that isn't required. See :mod:`smrf.data.mysql_data` for more information.
-
-The data table contains all the measurement data with a single row representing a measurement
-time for a station.  The date column (i.e. ``date_time``) must be a ``DATETIME`` data type with
-a unique constraint on the ``date_time`` column and ``primary_id`` column.
-
-================  ==========  ====  ====  === =====
-date_time         primary_id  var1  var2  ... varN
-================  ==========  ====  ====  === =====
-10/01/2008 00:00  ID_1        5.2   13.2  ... -1.3
-10/01/2008 00:00  ID_2        1.1   0     ... -10.3
-10/01/2008 01:00  ID_1        6.3   NAN   ... -2.5
-10/01/2008 01:00  ID_2        0.3   7.1   ...  9.4
-================  ==========  ====  ====  === =====
-
-The metadata table is the same format as the CSV files, with a primary_id, X, Y, and elevation
-column. A benefit to using MySQL is that we can use a ``client`` as a way to group multiple
-stations to be used for a given model run.  For example, we can have a client named BRB, which
-will have all the station ID's for the stations that would be used to run SMRF.  Then we can
-specify the client in the configuration file instead of listing out all the station ID's.  To use
-this feature, a table must be created to hold this information. Then the station  ID's matching
-the client will only be imported.  The following is how the table should be setup. Source is used
-to track where the data is coming from.
-
-==========  ======   ======
-station_id  client   source
-==========  ======   ======
-ID_1        BRB      Mesowest
-ID_2        BRB      Mesowest
-ID_3        TUOL     CDEC
-...         ...      ...
-ID_N        BRB      Mesowest
-==========  ======   ======
-
-Visit the `Weather Database GitHub page <https://github.com/USDA-ARS-NWRC/weather_database>`_ if you'd
-like to use a MySQL database.
+The MySQL database has been deprecated as of SMRF v0.11.0. If that feature is needed,
+we recommend using v0.9.X or export the tables to csv format.
 
 
 Weather Research and Forecasting (WRF)

diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,6 @@
 coloredlogs
 Cython>=0.28.4
 inicheck>=0.9.0,<0.10.0
-mysql-connector-python-rf==2.2.2
 netCDF4>=1.2.9
 numpy>=1.14.0,<1.19.0
 pandas>=0.23.0

diff --git a/smrf/__init__.py b/smrf/__init__.py
@@ -9,6 +9,7 @@
     __version__ = get_distribution(__name__).version
 except DistributionNotFound:
     __version__ = 'unknown'
+
 __core_config__ = os.path.abspath(
     os.path.dirname(__file__) + '/framework/CoreConfig.ini')
 __recipes__ = os.path.abspath(os.path.dirname(
@@ -21,7 +22,6 @@
     "time": "Dates to run model",
     "stations": "Stations to use",
     "csv": "CSV section configurations",
-    "mysql": "MySQL database",
     "gridded": "Gridded datasets configurations",
     "air_temp": "Air temperature distribution",
     "vapor_pressure": "Vapor pressure distribution",
@@ -36,7 +36,7 @@
     "system": "System variables and Logging"
 }
 
-# from . import data, distribute, envphys, framework, output, spatial, utils  # isort:skip
+from . import utils, data, distribute, envphys, framework, output, spatial  # isort:skip
 
 __config_header__ = "Config File for SMRF {0}\n" \
                     "For more SMRF related help see:\n" \

diff --git a/smrf/data/__init__.py b/smrf/data/__init__.py
@@ -1,3 +1,9 @@
 # -*- coding: utf-8 -*-
 # flake8: noqa
-from . import loadData, loadGrid, loadTopo, mysql_data
+from .csv import InputCSV
+from .hrrr_grib import InputGribHRRR
+from .load_topo import Topo
+from .netcdf import InputNetcdf
+from .wrf import InputWRF
+
+from .load_data import InputData  # isort:skip
diff --git a/smrf/data/csv.py b/smrf/data/csv.py
@@ -0,0 +1,81 @@
+import logging
+
+import pandas as pd
+
+from smrf.utils.utils import check_station_colocation
+
+
+class InputCSV():
+
+    DATA_TYPE = 'csv'
+
+    def __init__(self, start_date, end_date, stations=None, config=None):
+
+        self.start_date = start_date
+        self.end_date = end_date
+        self.stations = stations
+        self.config = config
+        self.time_zone = start_date.tzinfo
+
+        self._logger = logging.getLogger(__name__)
+
+        if self.stations is not None:
+            self._logger.debug('Using only stations {0}'.format(
+                ", ".join(self.stations)))
+
+    def load(self):
+        """
+        Load the data from a csv file
+        Fields that are operated on
+        - metadata -> dictionary, one for each station,
+        must have at least the following:
+        primary_id, X, Y, elevation
+        - csv data files -> dictionary, one for each time step,
+        must have at least the following columns:
+        date_time, column names matching metadata.primary_id
+        """
+
+        self._logger.info('Reading data coming from CSV files')
+
+        variable_list = list(self.config.keys())
+        variable_list.remove('stations')
+
+        self._logger.debug('Reading {}...'.format(self.config['metadata']))
+        metadata = pd.read_csv(
+            self.config['metadata'],
+            index_col='primary_id')
+        # Ensure all stations are all caps.
+        metadata.index = [s.upper() for s in metadata.index]
+        self.metadata = metadata
+        variable_list.remove('metadata')
+
+        for variable in variable_list:
+            filename = self.config[variable]
+
+            self._logger.debug('Reading {}...'.format(filename))
+
+            df = pd.read_csv(
+                filename,
+                index_col='date_time',
+                parse_dates=[0])
+            df = df.tz_localize(self.time_zone)
+            df.columns = [s.upper() for s in df.columns]
+
+            if self.stations is not None:
+                df = df[df.columns[(df.columns).isin(self.stations)]]
+
+            # Only get the desired dates
+            df = df[self.start_date:self.end_date]
+
+            if df.empty:
+                raise Exception("No CSV data found for {0}"
+                                "".format(variable))
+
+            setattr(self, variable, df)
+
+    def check_colocation(self):
+        # Check all sections for stations that are colocated
+        colocated = check_station_colocation(metadata=self.metadata)
+        if colocated is not None:
+            self._logger.error(
+                "Stations are colocated: {}".format(','.join(colocated[0])))