Skip to content

Commit

Permalink
Remove fitting method from GP, because it is not very robust
Browse files Browse the repository at this point in the history
  • Loading branch information
jhamrick committed Dec 31, 2013
1 parent f013961 commit a4c416a
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 163 deletions.
111 changes: 0 additions & 111 deletions gp/gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,114 +653,3 @@ def plot(self, ax=None, xlim=None, color='k', markercolor='r'):
ax.plot(X, mean, lw=2, color=color)
ax.plot(x, y, 'o', ms=7, color=markercolor)
ax.set_xlim(*xlim)

def fit_MLII(self, params_to_fit, randf=None, nrestart=5, verbose=False):
"""
Fit parameters of the gaussian process using MLII (maximum
likelihood) estimation.
Note that this method modifies the gaussian process in
place. After the method is run, the `GP` object will have new
parameters set to the best ones found during the optimization.
The optimization algorithm used is L-BFGS-B.
Parameters
----------
params_to_fit : boolean array_like
A list of booleans corresponding to the gaussian process
parameters, indicating which parameters should be
fit. Parameters which are not fit keep their current value.
randf : list of functions (optional)
A list of functions to give an initial starting value for
each parameter that is being fit. The functions should
take no arguments, and return a numpy.float64. If not
specified, the functions default to ``lambda:
abs(numpy.random.normal())``.
nrestart : int (optional)
Number of random restarts to use. The best parameters out of
all the random restarts are used.
verbose : bool (optional)
Whether to print information about the optimization.
"""

# original parameter list
params = list(self.params)
# boolean array of which parameters to fit
fitmask = np.array(params_to_fit, dtype='bool')

# default for randf
if randf is None:
randf = tuple(
lambda: np.abs(np.random.normal())
for p in params_to_fit if p)

# figure out the indices of the params we are fitting
j = 0
iparam = []
for i in xrange(len(params)):
if params_to_fit[i]:
iparam.append(j)
j += 1
else:
iparam.append(None)

# update the GP object with new parameter values
def new_params(theta):
out = [params[i] if j is None else theta[j]
for i, j in enumerate(iparam)]
return out

# negative log likelihood
def f(theta):
params = new_params(theta)
try:
self.params = params
except ValueError:
out = np.inf
else:
out = -self.log_lh
return out

# jacobian of the negative log likelihood
def df(theta):
params = new_params(theta)
try:
self.params = params
except ValueError:
out = np.empty(len(theta))
out.fill(np.inf)
else:
out = -self.dloglh_dtheta[fitmask]
return out

# run the optimization a few times to find the best fit
args = np.empty((nrestart, len(randf)))
fval = np.empty(nrestart)
for i in xrange(nrestart):
p0_i = tuple(r() for r in randf)
self.params = new_params(p0_i)
logger.debug("p0 = %s", p0_i)
popt = optim.minimize(
fun=f, x0=p0_i, jac=df, method='L-BFGS-B',
options={'disp': verbose})

args[i] = popt['x']
fval[i] = popt['fun']

logger.debug("-MLL(%s) = %f", args[i], fval[i])
if not popt['success'] or np.isinf(fval[i]):
logger.warning("MLII failed, p=%s" % args[i])
fval[i] = np.nan

# choose the parameters that give the best MLL
if np.isnan(fval).all():
raise RuntimeError("Could not find MLII parameter estimates")

# update our parameters
self.params = new_params(args[np.nanargmin(fval)])
logger.info("Best parameters: %s", self.params)
52 changes: 0 additions & 52 deletions gp/tests/test_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,55 +348,3 @@ def test_plot():
gp.plot(xlim=[0, 1])

plt.close('all')


def test_fit_MLII():
gp = make_gp()
gp.fit_MLII([True, False, False])
gp.fit_MLII([False, True, False])
gp.fit_MLII([False, False, True])
gp.fit_MLII([True, True, False])
gp.fit_MLII([True, True, True])

f = lambda: np.abs(np.random.normal())
gp.fit_MLII([True, False, False], randf=(f,))
gp.fit_MLII([False, True, False], randf=(f,))
gp.fit_MLII([False, False, True], randf=(f,))
gp.fit_MLII([True, True, False], randf=(f, f))
gp.fit_MLII([True, True, True], randf=(f, f, f))

x = np.array([0.0, 0.3490658503988659, 0.6981317007977318,
1.0471975511965976, 1.3962634015954636, 1.7453292519943295,
2.0943951023931953, 0.41968261, 0.97349106, 1.51630532,
1.77356282, 2.07011378, 2.87018553, 3.70955074, 3.96680824,
4.50962249, 4.80617345, 5.06343095, 5.6062452])

y = np.array([-5.297411814764175e-16, 2.2887507861169e-16,
1.1824308893126911e-15, 1.9743321560961036e-15,
3.387047586844716e-15, 3.2612801348363973e-15,
2.248201624865942e-15, -3.061735126365188e-05,
2.1539042816804896e-05, -3.900581031467468e-05,
4.603140942399664e-05, 0.00014852070373963522,
-0.011659908151004955, -0.001060998167383152,
-0.0002808538329216448, -8.057870658869265e-06,
-7.668984947838558e-07, -7.910215881378919e-08,
-3.2649468298271893e-10])

h, w = (0.53356762300842819, 2.1479780263878867)
gp = GP(kernel(h, w), x, y, s=0)
fh = lambda: h
fw = lambda: w
with pytest.raises(RuntimeError):
gp.fit_MLII([True, True, False], randf=(fh, fw), nrestart=1)


@pytest.mark.xfail
def test_fit_same():
params = None
for i in xrange(10):
gp = make_gp()
seed()
gp.fit_MLII([True, True, False])
if params is None:
params = gp.params.copy()
assert (params == gp.params).all()

0 comments on commit a4c416a

Please sign in to comment.