Skip to content

Commit

Permalink
Merge 0397698 into 6a10efa
Browse files Browse the repository at this point in the history
  • Loading branch information
vkhodygo committed Aug 27, 2020
2 parents 6a10efa + 0397698 commit efb4320
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 100 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
language: python
cache: pip
python:
- "2.7"
- "3.6"
- "3.7"
- "3.8"
Expand Down
175 changes: 76 additions & 99 deletions pwlf/pwlf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import print_function
# import libraries
import numpy as np
from scipy.optimize import differential_evolution
Expand Down Expand Up @@ -189,39 +188,29 @@ def __init__(self, x, y, disp_res=False, lapack_driver='gelsd', degree=1,

self.print = disp_res
self.lapack_driver = lapack_driver
# x and y should be numpy arrays
# if they are not convert to numpy array
if isinstance(x, np.ndarray) is False:
x = np.array(x)
if isinstance(y, np.ndarray) is False:
y = np.array(y)
x, y = self._switch_to_np_array(x), self._switch_to_np_array(y)

self.x_data = x
self.y_data = y
self.x_data, self.y_data = x, y

# calculate the number of data points
self.n_data = x.size

# set the first and last break x values to be the min and max of x
self.break_0 = np.min(self.x_data)
self.break_n = np.max(self.x_data)
self.break_0, self.break_n = np.min(self.x_data), np.max(self.x_data)

if degree < 12 and degree >= 0:
if 12 > degree >= 0:
# I actually don't know what the upper degree limit should
self.degree = int(degree)
else:
not_suported = "degree = " + str(degree) + " is not supported."
raise ValueError(not_suported)
raise ValueError(f"degree = {degree} is not supported.")

self.y_w = None
self.weights = None
# self.weights2 = None # the squared weights vector
if weights is not None:
if isinstance(weights, np.ndarray) is False:
weights = np.array(weights)
self.weights = self._switch_to_np_array(weights)
# self.weights2 = weights*weights
self.weights = weights
self.y_w = np.dot(self.y_data, np.eye(self.n_data)*self.weights)
self.y_w = np.dot(self.y_data, np.eye(self.n_data) * self.weights)

# initialize all empty attributes as None
self.fit_breaks = None
Expand All @@ -238,6 +227,44 @@ def __init__(self, x, y, disp_res=False, lapack_driver='gelsd', degree=1,
self.intercepts = None
self.se = None

@staticmethod
def _switch_to_np_array(input_):
r"""
Check the input, if it's not a Numpy array transform it to one.
Parameters
----------
input_ : array_like
The object that requires a check.
Returns
-------
input_ : ndarray
The input data that's been transformed when required.
"""
if isinstance(input_, np.ndarray) is False:
input_ = np.array(input_)
return input_

def _get_breaks(self, input_):
r"""
Use input to form a ndarray containing breakpoints
Parameters
----------
input_ : array_like
The object containing some of the breakpoints
Returns
-------
b_ : ndarray
All the line breaks
"""
v = np.sort(input_)
b_ = np.zeros(len(v) + 2)
b_[0], b_[1:-1], b_[-1] = self.break_0, v.copy(), self.break_n
return b_

def assemble_regression_matrix(self, breaks, x):
r"""
Assemble the linear regression matrix A
Expand Down Expand Up @@ -268,9 +295,8 @@ def assemble_regression_matrix(self, breaks, x):
>>> A = assemble_regression_matrix(breaks, self.x_data)
"""
# Check if breaks in ndarray, if not convert to np.array
if isinstance(breaks, np.ndarray) is False:
breaks = np.array(breaks)

breaks = self._switch_to_np_array(breaks)

# Sort the breaks, then store them
breaks_order = np.argsort(breaks)
Expand Down Expand Up @@ -350,9 +376,7 @@ def fit_with_breaks(self, breaks):
"""

# Check if breaks in ndarray, if not convert to np.array
if isinstance(breaks, np.ndarray) is False:
breaks = np.array(breaks)
breaks = self._switch_to_np_array(breaks)

A = self.assemble_regression_matrix(breaks, self.x_data)

Expand Down Expand Up @@ -426,15 +450,10 @@ def fit_with_breaks_force_points(self, breaks, x_c, y_c):
"""

# check if x_c and y_c are numpy array, if not convert to numpy array
if isinstance(x_c, np.ndarray) is False:
x_c = np.array(x_c)
if isinstance(y_c, np.ndarray) is False:
y_c = np.array(y_c)
x_c, y_c = self._switch_to_np_array(x_c), self._switch_to_np_array(y_c)
# sort the x_c and y_c data points, then store them
x_c_order = np.argsort(x_c)
self.x_c = x_c[x_c_order]
self.y_c = y_c[x_c_order]
self.x_c, self.y_c = x_c[x_c_order], y_c[x_c_order]
# store the number of constraints
self.c_n = len(self.x_c)

Expand All @@ -443,9 +462,7 @@ def fit_with_breaks_force_points(self, breaks, x_c, y_c):
' not supported since these have a tendency '
'of being numerically instable.')

# Check if breaks in ndarray, if not convert to np.array
if isinstance(breaks, np.ndarray) is False:
breaks = np.array(breaks)
breaks = self._switch_to_np_array(breaks)

A = self.assemble_regression_matrix(breaks, self.x_data)
L = self.conlstsq(A)
Expand Down Expand Up @@ -501,9 +518,7 @@ def predict(self, x, beta=None, breaks=None):
self.n_parameters = len(self.fit_breaks)
self.n_segments = self.n_parameters - 1

# check if x is numpy array, if not convert to numpy array
if isinstance(x, np.ndarray) is False:
x = np.array(x)
x = self._switch_to_np_array(x)

A = self.assemble_regression_matrix(self.fit_breaks, x)

Expand Down Expand Up @@ -549,12 +564,7 @@ def fit_with_breaks_opt(self, var):
values of x.
"""

var = np.sort(var)
breaks = np.zeros(len(var) + 2)
breaks[1:-1] = var.copy()
breaks[0] = self.break_0
breaks[-1] = self.break_n

breaks = self._get_breaks(input_=var)
A = self.assemble_regression_matrix(breaks, self.x_data)

# try to solve the regression problem
Expand Down Expand Up @@ -612,12 +622,7 @@ def fit_force_points_opt(self, var):
at the min and max values of x.
"""

var = np.sort(var)
breaks = np.zeros(len(var) + 2)
breaks[1:-1] = var.copy()
breaks[0] = self.break_0
breaks[-1] = self.break_n

breaks = self._get_breaks(input_=var)
# Sort the breaks, then store them
breaks_order = np.argsort(breaks)
breaks = breaks[breaks_order]
Expand Down Expand Up @@ -712,16 +717,11 @@ def fit(self, n_segments, x_c=None, y_c=None, bounds=None, **kwargs):

# if you've provided both x_c and y_c
if x_c is not None and y_c is not None:
# check if x_c and y_c are numpy array
# if not convert to numpy array
if isinstance(x_c, np.ndarray) is False:
x_c = np.array(x_c)
if isinstance(y_c, np.ndarray) is False:
y_c = np.array(y_c)
x_c = self._switch_to_np_array(x_c)
y_c = self._switch_to_np_array(y_c)
# sort the x_c and y_c data points, then store them
x_c_order = np.argsort(x_c)
self.x_c = x_c[x_c_order]
self.y_c = y_c[x_c_order]
self.x_c, self.y_c = x_c[x_c_order], y_c[x_c_order]
# store the number of constraints
self.c_n = len(self.x_c)
# Use a different function to minimize
Expand Down Expand Up @@ -761,12 +761,7 @@ def fit(self, n_segments, x_c=None, y_c=None, bounds=None, **kwargs):

self.ssr = res.fun

# pull the breaks out of the result
var = np.sort(res.x)
breaks = np.zeros(len(var) + 2)
breaks[1:-1] = var.copy()
breaks[0] = self.break_0
breaks[-1] = self.break_n
breaks = self._get_breaks(input_=res.x)

# assign values
if x_c is None and y_c is None:
Expand Down Expand Up @@ -887,7 +882,7 @@ def fitfast(self, n_segments, pop=2, bounds=None, **kwargs):
f[i] = resf
d.append(resd)
if self.print is True:
print(i + 1, 'of ' + str(pop) + ' complete')
print(f"{i + 1} of {pop} complete")

# find the best result
best_ind = np.nanargmin(f)
Expand All @@ -899,12 +894,7 @@ def fitfast(self, n_segments, pop=2, bounds=None, **kwargs):

self.ssr = best_val

# obtain the breakpoint locations from the best result
var = np.sort(best_break)
breaks = np.zeros(len(var) + 2)
breaks[1:-1] = var.copy()
breaks[0] = self.break_0
breaks[-1] = self.break_n
breaks = self._get_breaks(input_=best_break)

# assign parameters
self.fit_with_breaks(breaks)
Expand Down Expand Up @@ -997,12 +987,7 @@ def fit_guess(self, guess_breakpoints, bounds=None, **kwargs):

self.ssr = resf

# pull the breaks out of the result
var = np.sort(resx)
breaks = np.zeros(len(var) + 2)
breaks[1:-1] = var.copy()
breaks[0] = self.break_0
breaks[-1] = self.break_n
breaks = self._get_breaks(input_=resx)

# assign values
self.fit_with_breaks(breaks)
Expand Down Expand Up @@ -1058,16 +1043,11 @@ def use_custom_opt(self, n_segments, x_c=None, y_c=None):
# calculate the number of variables I have to solve for
self.nVar = self.n_segments - 1
if x_c is not None or y_c is not None:
# check if x_c and y_c are numpy array
# if not convert to numpy array
if isinstance(x_c, np.ndarray) is False:
x_c = np.array(x_c)
if isinstance(y_c, np.ndarray) is False:
y_c = np.array(y_c)
x_c = self._switch_to_np_array(x_c)
y_c = self._switch_to_np_array(y_c)
# sort the x_c and y_c data points, then store them
x_c_order = np.argsort(x_c)
self.x_c = x_c[x_c_order]
self.y_c = y_c[x_c_order]
self.x_c, self.y_c = x_c[x_c_order], y_c[x_c_order]
# store the number of constraints
self.c_n = len(self.x_c)
if self.weights is not None:
Expand Down Expand Up @@ -1103,11 +1083,12 @@ def calc_slopes(self):
"""
y_hat = self.predict(self.fit_breaks)
self.slopes = np.zeros(self.n_segments)
for i in range(self.n_segments):
self.slopes[i] = (y_hat[i+1]-y_hat[i]) / \
(self.fit_breaks[i+1]-self.fit_breaks[i])
self.intercepts = y_hat[0:-1] - self.slopes*self.fit_breaks[0:-1]
self.slopes = np.divide(
(y_hat[1:self.n_segments + 1] -
y_hat[:self.n_segments]),
(self.fit_breaks[1:self.n_segments + 1] -
self.fit_breaks[:self.n_segments]))
self.intercepts = y_hat[0:-1] - self.slopes * self.fit_breaks[0:-1]
return self.slopes

def standard_errors(self, method='linear', step_size=1e-4):
Expand Down Expand Up @@ -1201,7 +1182,7 @@ def standard_errors(self, method='linear', step_size=1e-4):
# vary beta and keep breaks constant
f = self.predict(self.x_data, beta=temp_beta,
breaks=orig_breaks)
A[:, i] = (f-f0) / step_size
A[:, i] = (f - f0) / step_size
# append differentials due to break points
for i in range(self.beta.size, nb):
# 'ind' ignores first and last entry in self.fit_breaks since
Expand All @@ -1212,14 +1193,14 @@ def standard_errors(self, method='linear', step_size=1e-4):
# vary break and keep betas constant
f = self.predict(self.x_data, beta=orig_beta,
breaks=temp_breaks)
A[:, i] = (f-f0) / step_size
A[:, i] = (f - f0) / step_size
e = f0 - self.y_data
# reset beta and breaks back to original values
self.beta = orig_beta
self.fit_breaks = orig_breaks

else:
errmsg = "Error: method='" + method + "' is not supported!"
errmsg = f"Error: method=\'{method}\' is not supported!"
raise ValueError(errmsg)
# try to solve for the standard errors
try:
Expand Down Expand Up @@ -1295,17 +1276,13 @@ def prediction_variance(self, x):
raise AttributeError(errmsg)

ny = self.n_data

# check if x is numpy array, if not convert to numpy array
if isinstance(x, np.ndarray) is False:
x = np.array(x)
x = self._switch_to_np_array(x)

# Regression matrix on training data
Ad = self.assemble_regression_matrix(self.fit_breaks, self.x_data)

# try to solve for the unbiased variance estimation
try:

y_hat = np.dot(Ad, self.beta)
e = y_hat - self.y_data
# solve for the unbiased estimate of variance
Expand Down Expand Up @@ -1464,10 +1441,10 @@ def p_values(self, method='linear', step_size=1e-4):
# calculate my t-value
t = parameters / self.se
else:
errmsg = "Error: method='" + method + "' is not supported!"
errmsg = f"Error: method=\'{method}\' is not supported!"
raise ValueError(errmsg)
# calculate the p-values
p = 2.0 * stats.t.sf(np.abs(t), df=n-k-1)
p = 2.0 * stats.t.sf(np.abs(t), df=n - k - 1)
return p

def lstsq(self, A):
Expand Down

0 comments on commit efb4320

Please sign in to comment.