iss #387 merged latest dev in this branch

brightwind-dev · May 30, 2023 · ed1e872 · ed1e872
2 parents 21d0a1e + 718f436
commit ed1e872
Show file tree

Hide file tree

Showing 11 changed files with 371 additions and 149 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -41,6 +41,7 @@ _line_colors_, _legend_ and _figure_size_. (Issue #[349](https://github.com/brig
 18. Fixed bug for `SpeedSort` where the `sector_predict` function was not interpolating data using two fit lines. (Issue #[377](https://github.com/brightwind-dev/brightwind/issues/377)).
 19. Updated `_ColorPalette` to automatically update color_list, color_map, color_map_cyclical and adjusted lightness color variables when main colors (primary, secondary etc.) are changed. (Issue #[381](https://github.com/brightwind-dev/brightwind/issues/381)).
 19. Allow `momm` function to derive a seasonal adjusted mean of monthly mean, if user sets `seasonal_adjustment` to true, and allow to apply a `coverage_threshold` (Issue #[298](https://github.com/brightwind-dev/brightwind/issues/298))
+20. Updated `slice_data`, `offset_timestamps`, `_LoadBWPlatform.get_data` functions to use 'less than' data_to if provided as input. (Issue #[385](https://github.com/brightwind-dev/brightwind/issues/385))
 
 
 

diff --git a/brightwind/analyse/analyse.py b/brightwind/analyse/analyse.py
@@ -377,7 +377,7 @@ def _mean_of_monthly_means_seasonal_adjusted(var_series):
     return result
 
 
-def momm(data, date_from: str = '', date_to: str = '', seasonal_adjustment=False, coverage_threshold=None):
+def momm(data, date_from=None, date_to=None, seasonal_adjustment=False, coverage_threshold=None):
     """
     Calculates and returns the mean of monthly mean speed. This accepts a DataFrame with timestamps as index column and
     another column with wind speed. You can also specify date_from and date_to to calculate the mean of monthly
@@ -398,10 +398,18 @@ def momm(data, date_from: str = '', date_to: str = '', seasonal_adjustment=False
     is not derived and the function will raise an error.
 
     :param data:                Pandas DataFrame or Series with timestamp as index and a column with wind speed
-    :type data:                 pd.DataFrame or pd.Series
-    :param date_from:           Start date as string in format YYYY-MM-DD
+    :type data:                 pandas.DataFrame or pandas.Series
+    :param date_from:           Start date as string in format YYYY-MM-DD or YYYY-MM-DD hh:mm. Start date is included in
+                                the sliced data. If format of date_from is YYYY-MM-DD, then the first timestamp of the
+                                date is used (e.g if date_from=2023-01-01 then 2023-01-01 00:00 is the first timestamp
+                                of the sliced data). If date_from is not given then the sliced data are taken from the
+                                first timestamp of the dataset.
     :type:                      str
-    :param date_to:             End date as string in format YYYY-MM-DD
+    :param date_to:             End date as string in format YYYY-MM-DD or YYYY-MM-DD hh:mm. End date is not included in
+                                the sliced data. If format date_to is YYYY-MM-DD, then the last timestamp of the
+                                previous day is used (e.g if date_to=2023-02-01 then 2023-01-31 23:50 is the last
+                                timestamp of the sliced data). If date_to is not given then the sliced data are taken up
+                                to the last timestamp of the dataset.
     :type:                      str
     :param seasonal_adjustment: Optional, False by default. If True, returns the mean of monthly mean seasonally
                                 adjusted
@@ -418,7 +426,7 @@ def momm(data, date_from: str = '', date_to: str = '', seasonal_adjustment=False
                                 Default value is None, except when 'seasonal_adjustment'=True when it is 0.8.
     :type coverage_threshold:   int, float or None
     :returns:                   Long term reference speed
-    :rtype:                     panda.Dataframe
+    :rtype:                     pandas.Dataframe
 
     **Example usage**
     ::

diff --git a/brightwind/analyse/plot.py b/brightwind/analyse/plot.py
@@ -471,18 +471,24 @@ def _timeseries_subplot(x, y, x_label=None, y_label=None, x_limits=None, y_limit
     return ax
 
 
-def plot_timeseries(data, date_from='', date_to='', x_label=None, y_label=None, y_limits=None,
+def plot_timeseries(data, date_from=None, date_to=None, x_label=None, y_label=None, y_limits=None,
                     x_tick_label_angle=25, line_colors=None, legend=True, figure_size=(15, 8)):
     """
     Plot a timeseries of data.
 
     :param data:                    Data in the form of a Pandas DataFrame/Series to plot.
     :type data:                     pd.DataFrame, pd.Series
-    :param date_from:               Start date used for plotting, if not specified the first timestamp of data is
-                                    considered. Should be in yyyy-mm-dd format
+    :param date_from:               Start date as string in format YYYY-MM-DD or YYYY-MM-DD hh:mm. Start date is
+                                    included in the sliced data. If format of date_from is YYYY-MM-DD, then the first
+                                    timestamp of the date is used (e.g if date_from=2023-01-01 then 2023-01-01 00:00
+                                    is the first timestamp of the sliced data). If date_from is not given then the
+                                    sliced data are taken from the first timestamp of the dataset.
     :type date_from:                str
-    :param date_to:                 End date used for plotting, if not specified last timestamp of data is considered.
-                                    Should be in yyyy-mm-dd format
+    :param date_to:                 End date as string in format YYYY-MM-DD or YYYY-MM-DD hh:mm. End date is not
+                                    included in the sliced data. If format date_to is YYYY-MM-DD, then the last
+                                    timestamp of the previous day is used (e.g if date_to=2023-02-01 then
+                                    2023-01-31 23:50 is the last timestamp of the sliced data). If date_to is not given
+                                    then the sliced data are taken up to the  last timestamp of the dataset.
     :type date_to:                  str
     :param x_label:                 Label for the x-axis. Default is None.
     :type x_label:                  str, None
@@ -1695,6 +1701,49 @@ def plot_TI_by_sector(turbulence, wdir, ti):
 
 
 def plot_shear_by_sector(scale_variable, wind_rose_data, calc_method='power_law'):
+    """
+    Plot shear by directional sectors and wind rose.
+
+    :param scale_variable:  Shear values by directional sectors derived with brightwind.Shear.BySector().
+    :type scale_variable:   pandas.Series
+    :param wind_rose_data:  Wind speed %frequency distribution by sectors, with wind direction sector as row indexes.
+                            This distribution is derived using brightwind.dist_by_dir_sector() function.
+    :type wind_rose_data:   pandas.Series
+    :param calc_method:     Method to use for calculation, either 'power_law' (returns alpha) or 'log_law'
+                            (returns the roughness coefficient).
+    :type calc_method:      str
+    :return:                Plots shear values by directional sectors & distribution of wind speed by directional bins.
+
+    **Example usage**
+        ::
+            import brightwind as bw
+            data = bw.load_csv(bw.demo_datasets.demo_data)
+
+            alpha = pd.Series({'345.0-15.0': 0.216, '15.0-45.0': 0.196, '45.0-75.0': 0.170, '75.0-105.0': 0.182,
+                     '105.0-135.0': 0.148, '135.0-165.0': 0.129, '165.0-195.0': 0.156, '195.0-225.0': 0.159,
+                     '225.0-255.0': 0.160, '255.0-285.0': 0.169, '285.0-315.0': 0.187, '315.0-345.0': 0.188})
+
+            roughness = pd.Series({'345.0-15.0': 0.537, '15.0-45.0': 0.342, '45.0-75.0': 0.156, '75.0-105.0': 0.231,
+                         '105.0-135.0': 0.223, '135.0-165.0': 0.124, '165.0-195.0': 0.135,
+                         '195.0-225.0': 0.145, '225.0-255.0': 0.108, '255.0-285.0': 0.149,
+                         '285.0-315.0': 0.263, '315.0-345.0': 0.275})
+
+            wind_rose_plot, wind_rose_dist = bw.analyse.analyse.dist_by_dir_sector(data.Spd80mS, data.Dir78mS,
+                                                    direction_bin_array=None,
+                                                    sectors=12,
+                                                    direction_bin_labels=None,
+                                                    return_data=True)
+
+            # Plots shear by directional sectors with calculation method as 'power law'.
+            bw.analyse.plot.plot_shear_by_sector(scale_variable=alpha, wind_rose_data=wind_rose_dist,
+            calc_method='power_law')
+
+            # Plots shear by directional sectors with calculation method as 'log law'.
+            bw.analyse.plot.plot_shear_by_sector(scale_variable=roughness, wind_rose_data=wind_rose_dist,
+            calc_method='log_law')
+
+
+    """
     result = wind_rose_data.copy(deep=False)
     radians = np.radians(utils._get_dir_sector_mid_pts(scale_variable.index))
     sectors = len(result)
@@ -1715,7 +1764,7 @@ def plot_shear_by_sector(scale_variable, wind_rose_data, calc_method='power_law'
 
     scale_variable_y = np.append(scale_variable, scale_variable[0])
     plot_x = np.append(radians, radians[0])
-    scale_to_fit = max(scale_variable) / max(result / 100)
+    scale_to_fit = max(scale_variable[np.isfinite(scale_variable)]) / max(result / 100)
     wind_rose_r = (result / 100) * scale_to_fit
     bin_edges = np.array(bin_edges)
     width = pd.Series([], dtype='float64')
@@ -1732,7 +1781,8 @@ def plot_shear_by_sector(scale_variable, wind_rose_data, calc_method='power_law'
            edgecolor=[COLOR_PALETTE.secondary for i in range(len(result))],
            alpha=0.8, label='Wind_Directional_Frequency')
 
-    maxlevel = (max(scale_variable_y)) + max(scale_variable_y) * .1
+    maxlevel = (max(scale_variable_y[np.isfinite(scale_variable_y)])) + max(
+        scale_variable_y[np.isfinite(scale_variable_y)]) * .1
     ax.set_thetagrids(radians * 180 / np.pi)
     ax.plot(plot_x, scale_variable_y, color=COLOR_PALETTE.primary, linewidth=4, label=label)
     ax.set_ylim(0, top=maxlevel)

diff --git a/brightwind/load/load.py b/brightwind/load/load.py
@@ -1606,14 +1606,16 @@ def get_data(measurement_location_uuid, from_date=None, to_date=None):
         """
         Retrieve measurement data from the brightwind platform and return it in a DataFrame with index as Timestamp.
 
-        :param measurement_location_uuid: The measurement location uuid.
-        :type measurement_location_uuid: str or uuid
-        :param from_date: Datetime representing the start of the measurement period you want.
-        :type from_date: datetime or str
-        :param to_date: Datetime representing the end of the measurement period you want.
-        :type to_date: datetime or str
-        :return: DataFrame with index as a timestamp.
-        :rtype: pd.DataFrame
+        :param measurement_location_uuid:   The measurement location uuid.
+        :type measurement_location_uuid:    str or uuid
+        :param from_date:                   Datetime representing the start of the measurement period you want
+                                            (included).
+        :type from_date:                    datetime or str
+        :param to_date:                     Datetime representing the end of the measurement period you want
+                                            (not included).
+        :type to_date:                      datetime or str
+        :return:                            DataFrame with index as a timestamp.
+        :rtype:                             pd.DataFrame
 
         **Example usage**
         ::
@@ -1645,7 +1647,7 @@ def get_data(measurement_location_uuid, from_date=None, to_date=None):
         if isinstance(from_date, str):
             from_date = parse(from_date)
         if isinstance(to_date, str):
-            to_date = parse(to_date)
+            to_date = parse(to_date) - datetime.timedelta(seconds=1)
 
         response = requests.get(_LoadBWPlatform._base_url + '/api/resource-data-measurement-location', params={
             'measurement_location_uuid': measurement_location_uuid,
@@ -1820,20 +1822,22 @@ def load_cleaning_file(filepath, date_from_col_name='Start', date_to_col_name='S
     | Spd80m | 2018-10-23 12:30:00 | 2018-10-25 14:20:00
     | Dir78m | 2018-12-23 02:40:00 |
 
-    :param filepath:  File path of the file which contains the the list of sensor names along with the start and
-           end timestamps of the periods that are flagged.
+    :param filepath:            File path of the file which contains the the list of sensor names along with the start
+                                and end timestamps of the periods that are flagged.
     :type filepath: str
-    :param date_from_col_name: The column name of the date_from or the start date of the period to be cleaned.
-    :type date_from_col_name: str, default 'Start'
-    :param date_to_col_name: The column name of the date_to or the end date of the period to be cleaned.
-    :type date_to_col_name: str, default 'Stop'
-    :param dayfirst: If your timestamp starts with the day first e.g. DD/MM/YYYY then set this to true. Pandas defaults
-            to reading 10/11/12 as 2012-10-11 (11-Oct-2012). If True, pandas parses dates with the day
-            first, eg 10/11/12 is parsed as 2012-11-10. More info on pandas.read_csv parameters.
-    :type dayfirst: bool, default False
-    :param kwargs: All the kwargs from pandas.read_csv can be passed to this function.
-    :return: A DataFrame where each row contains the sensor name and the start and end timestamps of the flagged data.
-    :rtype: pandas.DataFrame
+    :param date_from_col_name:  The column name of the date_from or the start date of the period to be cleaned.
+    :type date_from_col_name:   str, default 'Start'
+    :param date_to_col_name:    The column name of the date_to or the end date of the period to be cleaned.
+    :type date_to_col_name:     str, default 'Stop'
+    :param dayfirst:            If your timestamp starts with the day first e.g. DD/MM/YYYY then set this to true.
+                                Pandas defaults to reading 10/11/12 as 2012-10-11 (11-Oct-2012). If True, pandas parses
+                                dates with the day first, eg 10/11/12 is parsed as 2012-11-10. More info on
+                                pandas.read_csv parameters.
+    :type dayfirst:             bool, default False
+    :param kwargs:              All the kwargs from pandas.read_csv can be passed to this function.
+    :return:                    A DataFrame where each row contains the sensor name and the start and end timestamps of
+                                the flagged data.
+    :rtype:                     pandas.DataFrame
 
     **Example usage**
     ::
@@ -1870,32 +1874,33 @@ def apply_cleaning(data, cleaning_file_or_df, inplace=False, sensor_col_name='Se
     | Spd80m | 2018-10-23 12:30:00 | 2018-10-25 14:20:00
     | Dir78m | 2018-12-23 02:40:00 |
 
-    :param data: Data to be cleaned.
-    :type data: pandas.DataFrame
-    :param cleaning_file_or_df: File path of the csv file or a pandas DataFrame which contains the list of sensor
-                                names along with the start and end timestamps of the periods that are flagged.
-    :type cleaning_file_or_df: str, pd.DataFrame
-    :param inplace: If 'inplace' is True, the original data, 'data', will be modified and and replaced with the cleaned
-                    data. If 'inplace' is False, the original data will not be touched and instead a new object
-                    containing the cleaned data is created. To store this cleaned data, please ensure it is assigned
-                    to a new variable.
+    :param data:                    Data to be cleaned.
+    :type data:                     pandas.DataFrame
+    :param cleaning_file_or_df:     File path of the csv file or a pandas DataFrame which contains the list of sensor
+                                    names along with the start and end timestamps of the periods that are flagged.
+    :type cleaning_file_or_df:      str, pd.DataFrame
+    :param inplace:                 If 'inplace' is True, the original data, 'data', will be modified and and replaced
+                                    with the cleaned data. If 'inplace' is False, the original data will not be touched
+                                    and instead a new object containing the cleaned data is created. To store this
+                                    cleaned data, please ensure it is assigned to a new variable.
     :type inplace: Boolean
-    :param sensor_col_name: The column name which contains the list of sensor names that have flagged periods.
-    :type sensor_col_name: str, default 'Sensor'
-    :param date_from_col_name: The column name of the date_from or the start date of the period to be cleaned.
-    :type date_from_col_name: str, default 'Start'
-    :param date_to_col_name: The column name of the date_to or the end date of the period to be cleaned.
-    :type date_to_col_name: str, default 'Stop'
-    :param all_sensors_descriptor: A text descriptor that represents ALL sensors in the DataFrame.
-    :type all_sensors_descriptor: str, default 'All'
-    :param replacement_text: Text used to replace the flagged data.
-    :type replacement_text: str, default 'NaN'
-    :param dayfirst: If your timestamp starts with the day first e.g. DD/MM/YYYY then set this to true. Pandas defaults
-            to reading 10/11/12 as 2012-10-11 (11-Oct-2012). If True, pandas parses dates with the day
-            first, eg 10/11/12 is parsed as 2012-11-10. More info on pandas.read_csv parameters.
-    :type dayfirst: bool, default False
-    :return: DataFrame with the flagged data removed.
-    :rtype: pandas.DataFrame
+    :param sensor_col_name:         The column name which contains the list of sensor names that have flagged periods.
+    :type sensor_col_name:          str, default 'Sensor'
+    :param date_from_col_name:      The column name of the date_from or the start date of the period to be cleaned.
+    :type date_from_col_name:       str, default 'Start'
+    :param date_to_col_name:        The column name of the date_to or the end date of the period to be cleaned.
+    :type date_to_col_name:         str, default 'Stop'
+    :param all_sensors_descriptor:  A text descriptor that represents ALL sensors in the DataFrame.
+    :type all_sensors_descriptor:   str, default 'All'
+    :param replacement_text:        Text used to replace the flagged data.
+    :type replacement_text:         str, default 'NaN'
+    :param dayfirst:                If your timestamp starts with the day first e.g. DD/MM/YYYY then set this to true.
+                                    Pandas defaults to reading 10/11/12 as 2012-10-11 (11-Oct-2012). If True, pandas
+                                    parses dates with the day first, eg 10/11/12 is parsed as 2012-11-10.
+                                    More info on pandas.read_csv parameters.
+    :type dayfirst:                 bool, default False
+    :return:                        DataFrame with the flagged data removed.
+    :rtype:                         pandas.DataFrame
 
     **Example usage**
     ::