Skip to content

Commit

Permalink
Merge pull request #436 from brightwind-dev/dev
Browse files Browse the repository at this point in the history
Pull request for v2.2.0 release
  • Loading branch information
stephenholleran committed May 14, 2024
2 parents 968b20c + 470c15a commit 834be32
Show file tree
Hide file tree
Showing 12 changed files with 578 additions and 155 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ Given a version number MAJOR.MINOR.PATCH, increment the:

Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format.

## [2.2.0]
1. Modify `Correl.OrdinaryLeastSquares()` to force the intercept to pass through the origin (Issue [#412](https://github.com/brightwind-dev/brightwind/issues/412)).
1. Update `LoadBrightHub.get_data()` to use a new API (Issue [#419](https://github.com/brightwind-dev/brightwind/issues/419)).
1. Added new function `LoadBrightHub.get_cleaning_log()` to pull the cleaning log for a particular measurement station on BrightHub (Issue [#405](https://github.com/brightwind-dev/brightwind/issues/405)).
1. Added new function `LoadBrightHub.get_reanalysis()` to pull reanalysis datasets from BrightHub (Issue [#431](https://github.com/brightwind-dev/brightwind/issues/431)).
1. Modify `load.apply_cleaning()` and `apply_cleaning_windographer()` to clean columns specified in cleaning file by
matching the sensor name from the beginning of the string. (Issue [#249](https://github.com/brightwind-dev/brightwind/issues/249)).



## [2.1.0]
1. Update behaviour of `time_continuity_gaps` to find any gap that
is not equal to the derived temporal resolution.
Expand Down
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2023 Stephen Holleran
Copyright (c) 2024 Stephen Holleran

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
2 changes: 1 addition & 1 deletion brightwind/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@

__all__ = ['analyse', 'transform', 'export', 'load', 'demo_datasets']

__version__ = '2.1.0'
__version__ = '2.2.0'
11 changes: 7 additions & 4 deletions brightwind/analyse/analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1325,10 +1325,13 @@ def time_continuity_gaps(data):
The gaps are defined by showing the start and end timestamps just before and after the missing data periods.
A missing data period is one where data is not available for some consecutive timestamps. This breaks
time continuity of the data. The function derives the temporal resolution of the data by
finding the most common time difference between consecutive timestamps. Then it searches where the time
difference between consecutive timestamps does not match the resolution, this is the missing data period.
A missing data period is one where data is not available for some consecutive timestamps. Also, where a timestamp
exists for a row of data, but where all values in that row are NaN's, this row will also be considered a time
continuity gap as it represents a break in the ordinary functioning of the logging unit.
The function derives the temporal resolution of the data by finding the most common time difference between
consecutive timestamps. Then it searches where the time difference between consecutive timestamps does not match
the resolution, this is the missing data period.
It returns a DataFrame where the first column is the starting timestamp of the missing period (timestamp recorded
immediately before the gap) and the second column is the end date of the missing period (timestamp recorded
Expand Down
64 changes: 49 additions & 15 deletions brightwind/analyse/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

class CorrelBase:
def __init__(self, ref_spd, target_spd, averaging_prd, coverage_threshold=None, ref_dir=None, target_dir=None,
sectors=12, direction_bin_array=None, ref_aggregation_method='mean', target_aggregation_method='mean'):
sectors=12, direction_bin_array=None, ref_aggregation_method='mean', target_aggregation_method='mean',
forced_intercept_origin=False):

self.ref_spd = ref_spd
self.ref_dir = ref_dir
Expand All @@ -28,6 +29,7 @@ def __init__(self, ref_spd, target_spd, averaging_prd, coverage_threshold=None,
self.coverage_threshold = coverage_threshold
self.ref_aggregation_method = ref_aggregation_method
self.target_aggregation_method = target_aggregation_method
self.forced_intercept_origin = forced_intercept_origin
# Get the name of the columns so they can be passed around
self._ref_spd_col_name = ref_spd.name if ref_spd is not None and isinstance(ref_spd, pd.Series) else None
self._ref_spd_col_names = ref_spd.columns if ref_spd is not None and isinstance(ref_spd, pd.DataFrame) else None
Expand Down Expand Up @@ -142,10 +144,16 @@ def plot(self, figure_size=(10, 10.2)):
line_of_slope_1=True, figure_size=figure_size)

@staticmethod
def _get_r2(target_spd, predict_spd):
def _get_r2(target_spd, predict_spd, forced_intercept_origin):
"""Returns the r2 score of the model"""
return 1.0 - (sum((target_spd - predict_spd) ** 2) /
(sum((target_spd - target_spd.mean()) ** 2)))
if forced_intercept_origin:
x = np.nan_to_num(predict_spd.values.flatten()[:, np.newaxis])
y = np.nan_to_num(target_spd.values.flatten())
p, res = lstsq(x, y)[0:2]
return 1 - res / (y.size * y.var())
if not forced_intercept_origin:
return 1.0 - (sum((target_spd - predict_spd) ** 2) /
(sum((target_spd - target_spd.mean()) ** 2)))

@staticmethod
def _get_logic_dir_sector(ref_dir, sector_min, sector_max):
Expand Down Expand Up @@ -345,6 +353,8 @@ class OrdinaryLeastSquares(CorrelBase):
median, product, summation, standard deviation, variance, maximum and minimum
respectively.
:type target_aggregation_method: str
:param forced_intercept_origin: Default False; if set to True will force the regression to pass through [0; 0]
:type forced_intercept_origin: boolean
:returns: An object representing ordinary least squares fit model
**Example usage**
Expand Down Expand Up @@ -398,35 +408,56 @@ class OrdinaryLeastSquares(CorrelBase):
averaging_prd='1H', coverage_threshold=0,
ref_aggregation_method='min', target_aggregation_method='min')
# Correlate wind speeds on a monthly basis and force the intercept through the origin.
ols_cor = bw.Correl.OrdinaryLeastSquares(m2_ne['WS50m_m/s'], data['Spd80mN'], averaging_prd='1M',
coverage_threshold=0.95, forced_intercept_origin=True)
# Correlate by directional sector, using 36 sectors.
ols_cor = bw.Correl.OrdinaryLeastSquares(m2_ne['WS50m_m/s'], data['Spd80mN'],
ref_dir=m2_ne['WD50m_deg'], averaging_prd='1D',
coverage_threshold=0.9, sectors=36)
# Correlate by directional sector forcing the intercept through the origin.
ols_cor = bw.Correl.OrdinaryLeastSquares(m2_ne['WS50m_m/s'], data['Spd80mN'],
ref_dir=m2_ne['WD50m_deg'], averaging_prd='1H',
coverage_threshold=0.9, forced_intercept_origin=True)
"""
def __init__(self, ref_spd, target_spd, averaging_prd, coverage_threshold=0.9, ref_dir=None, sectors=12,
direction_bin_array=None, ref_aggregation_method='mean', target_aggregation_method='mean'):
direction_bin_array=None, ref_aggregation_method='mean', target_aggregation_method='mean',
forced_intercept_origin=False):
CorrelBase.__init__(self, ref_spd, target_spd, averaging_prd, coverage_threshold, ref_dir=ref_dir,
sectors=sectors, direction_bin_array=direction_bin_array,
ref_aggregation_method=ref_aggregation_method,
target_aggregation_method=target_aggregation_method)
target_aggregation_method=target_aggregation_method,
forced_intercept_origin=forced_intercept_origin)

def __repr__(self):
return 'Ordinary Least Squares Model ' + str(self.params)

@staticmethod
def _leastsquare(ref_spd, target_spd):
p, res = lstsq(np.nan_to_num(ref_spd.values.flatten()[:, np.newaxis] ** [1, 0]),
np.nan_to_num(target_spd.values.flatten()))[0:2]
return p[0], p[1]
def _leastsquare(ref_spd, target_spd, forced_intercept_origin=False):
if forced_intercept_origin:
x = np.nan_to_num(ref_spd.values.flatten()[:, np.newaxis])
y = np.nan_to_num(target_spd.values.flatten())
p, res = lstsq(x, y)[0:2]
return p[0], 0
elif not forced_intercept_origin:
p, res = lstsq(np.nan_to_num(ref_spd.values.flatten()[:, np.newaxis] ** [1, 0]),
np.nan_to_num(target_spd.values.flatten()))[0:2]
return p[0], p[1]

def run(self, show_params=True):
if self.ref_dir is None:

slope, offset = self._leastsquare(ref_spd=self.data[self._ref_spd_col_name],
target_spd=self.data[self._tar_spd_col_name])
target_spd=self.data[self._tar_spd_col_name],
forced_intercept_origin=self.forced_intercept_origin)

self.params = dict([('slope', slope), ('offset', offset)])
self.params['r2'] = self._get_r2(target_spd=self.data[self._tar_spd_col_name],
predict_spd=self._predict(ref_spd=self.data[self._ref_spd_col_name]))
predict_spd=self._predict(ref_spd=self.data[self._ref_spd_col_name]),
forced_intercept_origin=self.forced_intercept_origin)

self.params['num_data_points'] = self.num_data_pts
elif type(self.ref_dir) is pd.Series:
self.params = []
Expand All @@ -435,11 +466,13 @@ def run(self, show_params=True):
# print('Processing sector:', sector)
if len(group) > 1:
slope, offset = self._leastsquare(ref_spd=group[self._ref_spd_col_name],
target_spd=group[self._tar_spd_col_name])
target_spd=group[self._tar_spd_col_name],
forced_intercept_origin=self.forced_intercept_origin)
predict_ref_spd_sector = self._predict(ref_spd=group[self._ref_spd_col_name],
slope=slope, offset=offset)
r2 = self._get_r2(target_spd=group[self._tar_spd_col_name],
predict_spd=predict_ref_spd_sector)
predict_spd=predict_ref_spd_sector,
forced_intercept_origin=self.forced_intercept_origin)
else:
slope = np.nan
offset = np.nan
Expand Down Expand Up @@ -576,7 +609,8 @@ def run(self, show_params=True):
output = model.run()
self.params = dict([('slope', output.beta[0]), ('offset', output.beta[1])])
self.params['r2'] = self._get_r2(target_spd=self.data[self._tar_spd_col_name],
predict_spd=self._predict(ref_spd=self.data[self._ref_spd_col_name]))
predict_spd=self._predict(ref_spd=self.data[self._ref_spd_col_name]),
forced_intercept_origin=False)
self.params['num_data_points'] = self.num_data_pts
# print("Model output:", output.pprint())
if show_params:
Expand Down

0 comments on commit 834be32

Please sign in to comment.