# WORKBOOK: FIGURING OUT TANH SOFT WALLS FOR PARAMETER MINIMIZATION

<h1 id="tocheading">TABLE OF CONTENTS</h1>
<div id="toc"></div>

**Updates to the table of contents are periodic, but run the cell below to first start or force an update.**

In [1]:
macro javascript_str(s) display("text/javascript", s); end

javascript"""
$.getScript('https://sites.google.com/site/brodylabhome/files/make_table_of_contents.js')
"""

# WORKBOOK: Improving and implementing constrained_parabolic_minimization and bbox_Hessian_keyword_minimization


In [2]:
#########################################
#                                       #
#                                       #
#     PRELIMINARIES                     #
#                                       #
#                                       #
#########################################


using PyCall
using PyPlot
using ForwardDiff
using DiffBase

pygui(true)

import Base.convert
convert(::Type{Float64}, x::ForwardDiff.Dual) = Float64(x.value)
function convert(::Array{Float64}, x::Array{ForwardDiff.Dual}) 
    y = zeros(size(x)); 
    for i in 1:prod(size(x)) 
        y[i] = convert(Float64, x[i]) 
    end
    return y
end

include("general_utils.jl")
include("hessian_utils.jl")

"""
We define functions to convert Duals, the variable types used by ForwardDiff, 
to Floats. This is useful if we want to print out the value of a variable 
(since print doesn't know how to Duals). Note that after being converted to a Float, no
differentiation by ForwardDiff can happen!  e.g. after
    x = convert(Float64, y)
ForwardDiff can still differentiate y, but it can't differentiate x
"""



"We define functions to convert Duals, the variable types used by ForwardDiff, \nto Floats. This is useful if we want to print out the value of a variable \n(since print doesn't know how to Duals). Note that after being converted to a Float, no\ndifferentiation by ForwardDiff can happen!  e.g. after\n    x = convert(Float64, y)\nForwardDiff can still differentiate y, but it can't differentiate x\n"

# basic_gradient_descent()

Just here as a simpler minimization version than bbox_Hessian_keywork_minimization

In [25]:
function basic_gradient_descent(func, args, seed, bbox; start_eta=0.01, verbose=true, maxiter=100, report_every=1)

    function wallwrap(bdict, pdict)
        pdict = copy(pdict)
        if typeof(pdict)<:Array
            pdict = Dict(pdict)
        end

        allkeys = keys(bdict)

        for k in allkeys
            local bbox = bdict[k]
            d = 0.5*(bbox[2] - bbox[1])
            m = 0.5*(bbox[2] + bbox[1])

            pdict[k] = bbox[1] + d*(tanh((pdict[k]-m)/d)+1)
        end
        return pdict
    end


    """
    Given bdict, a dictionary of symbols to [minval, maxval] vectors, args, an array of strings representing
    symbols, and params, an array of values corresponding to the args list, puts each param that has an entry 
    in bdict through the tanh-walling mechanism, and returns the result. Does not modify the contents of the 
    original params vector (or bdict or args).
    """
    function vector_wrap(bbox, args, eparams)        
        params = copy(eparams)
        pdict = wallwrap(bbox, make_dict(args, params))
        i=1; j=1
        for i=1:length(args)
            if typeof(args[i])<:Array
                params[j:j+args[i][2]-1] = pdict[Symbol(args[i][1])]
            j += args[i][2]-1
            else
                params[j] = pdict[Symbol(args[i])]
            end
        j = j+1
        end
        return params
    end

    
    params = seed
    eta = start_eta
    
    cost, grad, hess = keyword_vgh((;pars...)->func(;wallwrap(bbox, pars)...), args, params)
    
    for i=1:maxiter
        new_params = params - eta*grad/norm(grad)
        
        new_cost, new_grad, new_hess = keyword_vgh((;pars...)->func(;wallwrap(bbox, pars)...), args, new_params)
        
        if new_cost > cost
            eta = eta/2
        else
            eta = eta*1.1
            params = new_params
            cost   = new_cost
            grad   = new_grad
            hess   = new_hess
        end

        if verbose && rem(i, report_every)==0
            @printf("%d: eta=%g, cost=%g, ps=", i, eta, cost); print_vector_g(params); print("\n")
        end
    end

    # Report the walled-in result:
    return vector_wrap!(bbox, args, params)
end



basic_gradient_descent (generic function with 1 method)

# constrained_parabolic_minimization

In [4]:
"""
function constrained_parabolic_minimization(H, G, r; tol=1e-6, min_only=true)

Given a Hessian matrix, a gradient vector, and a desired radius from the origin, finds the vector 
that minimizes the parabola defined by the Hessian and the gradient, subject to the constraint that the
vector's length equals the desired radius.

PARAMETERS:
===========

H      A square symmetric matrix. It should have all positive eigenvalues.

G      A vector, length equal to the size(H,2)

r      desired radius

OPTIONAL PARAMETERS:
====================

tol=1e-6        Numerical tolerance on the computations

min_only=true   Return only the minimum, or, if false, all xs for all lambdas that match x'*x = r^2

efactor=3       The initial exploration of lambdas will go from -efactor(max(absolute(eig(H)))) to +efactor(max(absolute(eig(H))))

lambdastepsize=0.003    The step size for initial exploration of lambdas, un units of efactor. It sshould
                probably scale with the smallest difference in the eigenvalues of H; that has not been implemented yet.

RETURNS:
========

x        The vector that minimizes 0.5*x'*H*x + x'*G subject to x'*x = r
 
J        0.5*x'*H*x + x'*G at the returned x

lambda   value of the Lagrange multiplier at which the radius constraint is satisfied

c        The squared difference between the length of x and r. Should be small, otherwise somthing went wrong!

"""
function constrained_parabolic_minimization(H, G, r; tol=1e-6, min_only=true, 
    do_plot=false, verbose=false, efactor=3.0, max_efactor_tries=10, 
    lambdastepsize=0.003, minimum_tol=1e-24, tol_delta=1e-3)

    #  --- First a couple of helper functions ----
    
    """
    function x_of_lambda(lambda)

    Given square matrix H, vector G, and passed scalar lambda, returns the vector x that minimizes
    
    0.5 x'*H*x + x'*G - lambda *x'*x

    """
    function x_of_lambda(lambda)
        return inv(H - lambda*eye(size(H,1)))*(-G)
    end
    
    
    """
    function q(lambda, r)

    Returns the squared difference between r and the norm of x_of_lambda(lambda).
    """
    function q(lambda, r)
        return (r - norm(x_of_lambda(lambda)))^2
    end


    # efactor is the factor that multiplies the biggest eigenvalue of H, to determine the range over which we'll
    # look for a lambda that satisfies the norm(x)==r requirement. If we don't find a solution, we iteratively 
    # increase efactor to try to get there, for a maximum of max_efactor_tries
    for m=1:max_efactor_tries
        # First scan lambda to find good candidates for minimizing the parabolic 
        # surface under the x'*x = r^2 constraint
        L = eig(H)[1]
        L0 = maximum(abs(L))
        lambdas = L0*efactor*[-1.0:lambdastepsize:1.0;]
        costs = zeros(size(lambdas))
        for i in [1:length(lambdas);]
            try 
                costs[i] = q(lambdas[i], r)
            catch
                costs[i] = Inf
            end
        end

        if do_plot
            figure(2); clf();
            plot(lambdas, costs, "b.-")
            xlabel("lambda")
            ylabel("cost")
        end

        # Take all candidates where the derivative of costs changes sign 
        # from negative to positive (those would be minima),
        # plus the smallest and the largest lambdas tested, as candidates
        g = append!(prepend!(find(diff(sign(diff(costs))) .> 0.99), [1]), [length(lambdas)])
        if verbose
            @printf("cpm: g (candidate indices) are : ");           print_vector_g(g);        print("\n")
            @printf("cpm: and their corresponding costs are : ");   print_vector(costs[g]);   print("\n");
            @printf("cpm: and their corresponding lambdas are : "); print_vector(lambdas[g]); print("\n");
        end
        # found_it_flag = 0  # A flag for when we've found at least one lambda that satisfies the r constraint
        mytol = tol

        while mytol > minimum_tol
            lambdas_out = zeros(size(g))
            costs_out   = zeros(size(g))
            for i in [1:length(g);]
                lambdas_out[i], costs_out[i] = one_d_minimizer(lambdas[g[i]], x -> q(x[1], r), start_eta=1, tol=mytol)
            end

            # Eliminate any lambdas where x'*x doesn't match our desired value r
            I = find(costs_out .< tol)
            lambdas_out = lambdas_out[I]; costs_out = costs_out[I];

            if length(I) > 0; break; end

            mytol *= tol_delta
        end
        if verbose
            @printf("%d : After searching for lambdas with efactor=%g, we found these : ", m, efactor)
            print_vector_g(lambdas_out); print("\n")
        end
        if length(lambdas_out) > 0; break; end;
        efactor = efactor*4
    end
    
    # Eliminate any repeated lambdas, to within the specified numerical tolerance.
    I = setdiff(1:length(lambdas_out), find(diff(lambdas_out) .< tol))
    lambdas_out = lambdas_out[I]; costs_out = costs_out[I];
    
    # Find the parabolic estimate of the cost function at these points
    J  = zeros(size(lambdas_out))
    xs = zeros(length(G), length(lambdas_out))
    for i in [1:length(J);]
        xs[:,i] = x_of_lambda(lambdas_out[i])
        J[i] = (0.5*xs[:,i]'*H*xs[:,i] + xs[:,i]'*G)[1]
    end

    # Find and return only the x that has the smallest J
    if min_only
        I = indmin(J)    
    else
        I = 1:length(J)
    end
    return xs[:,I], J[I], lambdas_out[I], costs_out[I]
end




constrained_parabolic_minimization

# bbox_Hessian_keyword_minimization()

In [47]:
"""
function bbox_Hessian_keyword_minimization(seed, args, bbox, func; wallwidth=NaN, start_eta=10, tol=1e-6, 
    maxiter=400, verbose=false)

Like constrained_Hessian_minimization, but uses keyword_hessian!(). 

PARAMETERS:
===========

seed        column vector, representing the starting value of the parameters.

args        List of strings identifying parameters for differentiation, e.g., ["const_E", "w_self]

bbox        If softbox=true (the default), should then be a Dict of Symbol=>[minval maxval] entries. An entry
            in this Dict indicates that the corresponding parameter is to be bounded, as indicated by the associated 
            [minval maxval] vector. The bbox dictionary can have fewer entries than the number of parameters, and its
            default value is Dict(), indicating an unbounded search.

            If softbox=false, then bbox should be an nargs-by-2 matrix indicating the range for each argument,
            with the minima (first column) and maxima (second column), and entries for ALL parameters.

func        func must take only optional keyword args, and must 
            take nderivs=0, difforder=0  and declare any new matrices using ForwardDiffZeros() instead of zeros()


OPTIONAL PARAMETERS:
====================

start_eta    Starting value of the radius.  It's good to start with somethibg biggish, if it is
             too much, it'll quickly get cut down.

tol=1e-6     Numerical tolerance. If a proposed jump produces a change in func that is less than
             this, the minimization stops.

maxiter=400  Maximum number of iterations to do before stopping

verbose=false   If true, print out a report on each iteration of iteration number, radius size (eta),
                what type jump was proposed ("Newton" means going straight to global min, "constrained" means jump has 
                norm eta, failed means that finding the minimum at a given radius somehow didn't work). Will also
                print out the cosine of the angle between the proposed jump and the gradient.

verbose_level   If less than 2, regular verbose output, if 2 or greater, very verbose, for debugging.

softbox         If true, then bbox must be a Dict() and we use the tanh() mechanism for putting a fixed limit
                on the parameters.

hardbox=false   If true, ignores wallwidth, and just rests parameter values to the bounding box if they go outside it.
                If false, adds cost function "walls" to implement the bounding box.

walldith=NaN     Used for putting up cost function "walls" that implement the bounding box limits. Can be NaN.
                If it is NaN, then the wallwidth is a constant factor of the range width for each argument. If not NaN, must
                be an nargs-long vector that indicates the actual wall widths.

wallwidth_factor=0.18   Only relevant if wallwidth is NaN, otherwise ignored. For each arg, the wall width
                is going to be wall_width_factor*(bbox[i,2] - bbox[i,1])


RETURNS:
========

params       A vector the size of seed that has the last values of the minimizing parameters for func
trajectory   A (2+length(params))-by-nsteps matrix. Each column corresponds to an iteration step, and contains
                 the value of eta used, the cost, and the value of the parameters at that iteration
cost         Final value of objective function


EXAMPLE:
========

function tester(;x=5, y=10, z=20, nderivs=0, difforder=0)
    return x^2*y + z/tanh(y)
end

params, trajectory = bbox_Hessian_keyword_minimization([0.5, 0.5], ["x", "y"], [1.1 2 ; 1.1 4], tester, 
    verbose=true, tol=1e-12, start_eta=1);



"""
function bbox_Hessian_keyword_minimization(seed, args, bbox, func; start_eta=0.1, tol=1e-6, maxiter=400,
    verbose=false, verbose_level=1, verbose_every=1, 
    softbox=true, hardbox=false, wallwidth=NaN, wallwidth_factor=0.18)

    """
    Given bdict, a dictionary of symbols to [minval, maxval] vectors, and pdict, a dictionary of symbols
    to values (or, alternatively, an Array of (Symbol, value) tuples], goes through each of the symbols in 
    bdict and modifies the corresponding value in pdict putting it through a tanh so the final output lies 
    within the limits in bdict.  Returns the new pdict.  Makes a copy of pdict so as not to modify the original.
    """
    function wallwrap(bdict, pdict)
        pdict = copy(pdict)
        if typeof(pdict)<:Array
            pdict = Dict(pdict)
        end

        allkeys = keys(bdict)

        for k in allkeys
            local bbox = bdict[k]
            d = 0.5*(bbox[2] - bbox[1])
            m = 0.5*(bbox[2] + bbox[1])

            pdict[k] = bbox[1] + d*(tanh((pdict[k]-m)/d)+1)
        end
        return pdict
    end

    
    """
    Given bdict, a dictionary of symbols to [minval, maxval] vectors, args, an array of strings representing
    symbols, and params, an array of values corresponding to the args list, puts each param that has an entry 
    in bdict through the tanh-walling mechanism, and returns the result. Does not modify the contents of the 
    original params vector (or bdict or args).
    """
    function vector_wrap(bbox, args, eparams)        
        params = copy(eparams)
        pdict = wallwrap(bbox, make_dict(args, params))
        i=1; j=1
        for i=1:length(args)
            if typeof(args[i])<:Array
                params[j:j+args[i][2]-1] = pdict[Symbol(args[i][1])]
            j += args[i][2]-1
            else
                params[j] = pdict[Symbol(args[i])]
            end
        j = j+1
        end
        return params
    end


    """
    Given args, a list of string representing the arguments of interest, a bounding box for each,
    and a Symbol=>value dictionary with the corresponding parameters, computes and returns a high cost for 
    being outside the bounding box
    """
    function wall_cost(args, bbox; wallwidth=NaN, nderivs=0, difforder=0, pars...) 
        myparams = ForwardDiffZeros(length(pars), 1, nderivs=nderivs, difforder=difforder)
        pars2 = Dict()
        for i in [1:length(pars);]
            pars2[string(pars[i][1])] = pars[i][2]
        end
        for i in [1:length(args);]
            myparams[i] = pars2[args[i]]
        end
        
        if isnan(wallwidth)
            # We know that we're going to be taking hessian for params, so declare zeros accordingly:
            wallwidth = ForwardDiffZeros(length(myparams), 1, nderivs=nderivs, difforder=difforder)

            for i in [1:length(myparams);]
                wallwidth[i] = wallwidth_factor*(bbox[i,2]-bbox[i,1])
            end
        end

        retval = 0
        for i in [1:length(myparams);]
            if myparams[i]<bbox[i,1]
                retval += cosh((bbox[i,1]-myparams[i])/wallwidth[i])-1.0
            elseif bbox[i,2] < myparams[i]
                retval += cosh((myparams[i]-bbox[i,2])/wallwidth[i])-1.0                
            end
        end

        return 2*retval
    end

    traj_increment = 100
    params = seed
    eta = start_eta
    trajectory = zeros(2+length(params), traj_increment)

    if verbose
        @printf "%d: eta=%g ps=" 0 eta 
        print_vector(params)
        @printf "\n"
    end
    
    if softbox
        if !(typeof(bbox)<:Dict); error("bhm: If softbox=true, then bbox must eb a Dict"); end
        cost, grad, hess = keyword_vgh((;pars...)->func(;wallwrap(bbox, pars)...), args, params)
    elseif hardbox
        cost, grad, hess = keyword_vgh((;pars...) -> func(;pars...), args, params)
    else
        cost, grad, hess = keyword_vgh((;pars...) -> func(;pars...) + wall_cost(args, bbox; wallwidth=wallwidth, pars...),
            args, params)        
    end
        
    chessdelta = zeros(size(params))

    for i in [1:maxiter;]
        if i > size(trajectory, 2)
            trajectory = [trajectory zeros(2+length(params), traj_increment)]
        end
        trajectory[1:2, i]   = [eta;cost]
        trajectory[3:end, i] = params
        
        hessdelta  = - inv(hess)*grad
        try
            if verbose && verbose_level >= 2
                @printf("bhm: about to try cpm with grad : "); print_vector_g(grad); print("\n")
                @printf("bhm:   hess :"); print_vector_g(hess[:]); print("\n");
            end
            if verbose && verbose_level >= 2
                chessdelta = constrained_parabolic_minimization(hess, grad'', eta, do_plot=true, verbose=true)[1]
            else
                chessdelta = constrained_parabolic_minimization(hess, grad'', eta)[1]
            end
            jumptype = "not failed"
        catch y
            jumptype = "failed"
            if verbose
                @printf "Constrained parabolic minimization failed with error %s\n" y
                @printf "\n"
                @printf "eta was %g\n" eta
                @printf "grad was\n"
                print_vector(grad)
                @printf "\n\nhess was\n"
                for k in [1:length(grad);]
                    print_vector(hess[k,:])
                    @printf "\n"
                end
                @printf "\n"
            end
        end

        if norm(hessdelta) <= eta
            new_params = params + hessdelta
            jumptype = "Newton"
        elseif jumptype != "failed" 
            new_params = params + chessdelta
            jumptype  = "constrained"
        end

        if jumptype != "failed"
            if softbox
                new_cost, new_grad, new_hess = 
                    keyword_vgh((;pars...) -> func(;wallwrap(bbox, pars)...), args, new_params)
                if verbose && verbose_level >=2
                    @printf("bhm: had new_params = : "); print_vector_g(params); print("\n");
                    @printf("bhm: and my bbox was : "); print(bbox); print("\n")
                    @printf("bhm: and my wallwrap output was : "); print(wallwrap(bbox, make_dict(args, new_params))); print("\n")
                    @printf("bhm: and this produced new_grad : "); print_vector_g(new_grad); print("\n")
                    @printf("bhm:   new_hess :"); print_vector_g(new_hess[:]); print("\n");                                        
                end
            elseif hardbox
                for p in [1:length(new_params);]
                    if new_params[p] < bbox[p,1]; new_params[p] = bbox[p,1]; end
                    if bbox[p,2] < new_params[p]; new_params[p] = bbox[p,2]; end
                 end        
                
                new_cost, new_grad, new_hess = keyword_vgh((;pars...) -> func(;pars...), args, new_params)
            else
                new_cost, new_grad, new_hess = keyword_vgh((;pars...) -> func(;pars...) + 
                        wall_cost(args, bbox; wallwidth=wallwidth, pars...),
                    args, new_params)                
            end
            
            if abs(new_cost - cost) < tol || eta < tol
                trajectory = trajectory[:,1:i]
                break
            end
        end

        if jumptype == "failed" || new_cost >= cost  
            eta = eta/2
            costheta = NaN
            if eta < tol
                trajectory = trajectory[:,1:i]
                break
            end
        else
            eta = eta*1.1
            costheta = dot(new_params-params, grad)/(norm(new_params-params)*norm(grad))

            params = new_params
            cost = new_cost
            grad = new_grad
            hess = new_hess
        end

        if verbose
            if rem(i, verbose_every)==0
                @printf "%d: eta=%g cost=%g jtype=%s costheta=%.3f ps=" i eta cost jumptype costheta
                print_vector_wrap(bbox, args, params)
                @printf "\n"
            end
        end
    end
    
    return vector_wrap(bbox, args, params), trajectory, cost
end




bbox_Hessian_keyword_minimization

# ACTUAL EXAMPLE: Basic tanh-fitting example (no dynamics)

In [7]:
npoints = 1000; srand(400)
args = ["baseline", "amplitude", "threshold", "slope"]

# Generating values for our four params:
params = [1 5 0.5 0.8]

# Make some points and plot them
x = rand(npoints, 1)*6-3
y = params[1] + params[2]*0.5*(tanh((x-params[3])/params[4])+1) + randn(npoints,1)*2
figure(1); clf();
plot(x, y, ".")

# Starting values for the four params. Plot the corresponding curve they generate
seed = [8, 3.1, 0, 0.01]
xx = -3:0.01:3
plot(xx, seed[1] + seed[2]*0.5*(tanh((xx-seed[3])/seed[4])+1), "g-")

# Cost function.  Note that it takes nderivs and difforder.
function JJ(x, y; baseline=0, amplitude=1, threshold=0, slope=1, do_plot=false, fignum=1, clearfig=true,
    nderivs=0, difforder=0)

    if do_plot
        figure(fignum);
        if clearfig; clf(); end;
        xx = -3:0.01:3; x2=ForwardDiffZeros(size(xx,1), size(xx,2), nderivs=nderivs, difforder=difforder)
        for i=1:length(xx); x2[i]=xx[i]; end; xx= x2
        
        plot(x, y, ".")
        plot(xx, baseline + amplitude*0.5*(tanh((xx-threshold)/slope)+1), "r-")
    end

    yhat =  baseline + amplitude*0.5*(tanh((x-threshold)/slope)+1) 
    err = yhat - y
    return sum(err.*err)
end


# Now choose between simple adaptive gradient minimization, or constrained Hessian minimization. 
# Both here bound the value of the baseline parameter in the Dict() below.  Play with those bounds at will.

if true
    opars = @time(basic_gradient_descent((;pars...) -> JJ(x, y; do_plot=false, pars...), 
    ["baseline", "amplitude", "threshold", "slope"], [8, 3.1, -0.5, 0.04], Dict(),
    # Dict(:baseline=>[2, 10.1], :slope=>[0.001 0.02]), 
    verbose=true, report_every=100, start_eta=0.001, maxiter=1000))
else
    bbox = [
        -20  20 ; 
        -20  20 ;
        -20  20 ; 
        -20  20 ;
    ]
    opars = @time(bbox_Hessian_keyword_minimization(seed, args, Dict(:baseline=>[-2, 10], :slope=>[0.001 5]), 
        (;pars...) -> JJ(x, y; do_plot=false, pars...),
    verbose=false, verbose_level=1, softbox=true, start_eta=1)[1])
end

# Plot the resulting curve, and report both final and generating params
plot(xx, opars[1] + opars[2]*0.5*(tanh((xx-opars[3])/opars[4])+1), "r-")
[opars' ; params]

I have args = String["baseline","amplitude","threshold","slope"]
I have params = [8, 3.1, -0.5, 0.04]
I have bbox = Dict{Any,Any}()
I have wallwrap(bbox, make_dict(args, params)) = Dict{Any,Any}(Pair{Any,Any}(:amplitude,3.1),Pair{Any,Any}(:threshold,-0.5),Pair{Any,Any}(:slope,0.04),Pair{Any,Any}(:baseline,8.0))




100: eta=0.0251123, cost=3795.66, ps=[1.05716, 4.94554, 0.498406, 0.655728]
200: eta=2.34987e-05, cost=3794.26, ps=[1.01254, 5.05359, 0.504565, 0.693853]
300: eta=2.18872e-14, cost=3794.26, ps=[1.01252, 5.05366, 0.50456, 0.693876]
400: eta=5.11849e-17, cost=3794.26, ps=[1.01252, 5.05366, 0.50456, 0.693876]
500: eta=6.46889e-17, cost=3794.26, ps=[1.01252, 5.05366, 0.50456, 0.693876]
600: eta=6.80955e-17, cost=3794.26, ps=[1.01252, 5.05366, 0.50456, 0.693876]
700: eta=7.29267e-17, cost=3794.26, ps=[1.01252, 5.05366, 0.50456, 0.693876]
800: eta=7.57057e-17, cost=3794.26, ps=[1.01252, 5.05366, 0.50456, 0.693876]
900: eta=8.97576e-17, cost=3794.26, ps=[1.01252, 5.05366, 0.50456, 0.693876]
1000: eta=8.96092e-17, cost=3794.26, ps=[1.01252, 5.05366, 0.50456, 0.693876]
  2.172437 seconds (874.35 k allocations: 1.557 GB, 7.40% gc time)


2×4 Array{Float64,2}:
 1.01252  5.05366  0.50456  0.693876
 1.0      5.0      0.5      0.8     

# ACTUAL EXAMPLE: Same basic tanh-fitting, but packing two params into a vector

In [35]:
npoints = 1000; 
args = [["baseline" 2], "threshold", "slope"]

# Generating values for our four params:
params = [1 5 0.5 0.8]

# Make some points and plot them
x = rand(npoints, 1)*6-3
y = params[1] + params[2]*0.5*(tanh((x-params[3])/params[4])+1) + randn(npoints,1)*2
figure(1); clf();
plot(x, y, ".")

# Starting values for the four params. Plot the corresponding curve they generate
seed = [8, 3.1, 0, 0.01]
xx = -3:0.01:3
plot(xx, seed[1] + seed[2]*0.5*(tanh((xx-seed[3])/seed[4])+1), "g-")

# Cost function.  Note that it takes nderivs and difforder.
function JJ(x, y; baseline=[0 1], threshold=0, slope=1, do_plot=false, fignum=1, clearfig=true,
    nderivs=0, difforder=0)

    if do_plot
        figure(fignum);
        if clearfig; clf(); end;
        xx = -3:0.01:3; x2=ForwardDiffZeros(size(xx,1), size(xx,2), nderivs=nderivs, difforder=difforder)
        for i=1:length(xx); x2[i]=xx[i]; end; xx= x2
        
        plot(x, y, ".")
        plot(xx, baseline[1] + baseline[2]*0.5*(tanh((xx-threshold)/slope)+1), "r-")
    end

    yhat =  baseline[1] + baseline[2]*0.5*(tanh((x-threshold)/slope)+1) 
    err = yhat - y
    return sum(err.*err)
end


# Now choose between simple adaptive gradient minimization, or constrained Hessian minimization. 
# Both here bound the value of the baseline parameter in the Dict() below.  Play with those bounds at will.

if true
    opars = @time(basic_gradient_descent((;pars...) -> JJ(x, y; do_plot=false, pars...), 
    args, [8, 3.1, -0.5, 0.04], Dict(:baseline=>[-2, 10.1], :slope=>[0.001 0.02]), 
    verbose=true, report_every=100, start_eta=0.001, maxiter=1000))
else
    opars = @time(bbox_Hessian_keyword_minimization(seed, args, Dict(:baseline=>[-2, 10], :slope=>[0.001 5]), 
        (;pars...) -> JJ(x, y; do_plot=false, pars...),
    verbose=false, verbose_level=1, softbox=true, start_eta=1)[1])
end

# Plot the resulting curve, and report both final and generating params
plot(xx, opars[1] + opars[2]*0.5*(tanh((xx-opars[3])/opars[4])+1), "r-")
[opars' ; params]

100: eta=0.0114147, cost=4420.02, ps=[1.42829, 3.76419, 0.551434, 0.0490457]




200: eta=0.00252822, cost=4404.96, ps=[1.31427, 3.93787, 0.557999, 0.050302]
300: eta=0.00123194, cost=4403.68, ps=[1.27977, 3.98762, 0.559051, 0.0512706]
400: eta=0.00027286, cost=4403.53, ps=[1.26832, 4.00398, 0.558817, 0.0520889]
500: eta=0.000132958, cost=4403.51, ps=[1.26456, 4.00934, 0.558526, 0.0527807]
600: eta=2.07486e-05, cost=4403.51, ps=[1.26329, 4.01115, 0.558581, 0.0533823]
700: eta=3.4869e-05, cost=4403.51, ps=[1.26286, 4.01176, 0.558581, 0.0539201]
800: eta=6.86217e-06, cost=4403.51, ps=[1.26271, 4.01197, 0.558594, 0.0543969]
900: eta=1.35904e-05, cost=4403.51, ps=[1.26266, 4.01204, 0.558604, 0.0548577]
1000: eta=1.78778e-05, cost=4403.51, ps=[1.26265, 4.01206, 0.558605, 0.0552563]
  1.931105 seconds (633.27 k allocations: 1.541 GB, 10.35% gc time)


2×4 Array{Float64,2}:
 1.44444  4.01206  0.558605  0.0199985
 1.0      5.0      0.5       0.8      

# SANDLOT from here on

In [17]:
    function wallwrap(bdict, pdict)
        pdict = copy(pdict)
        if typeof(pdict)<:Array
            pdict = Dict(pdict)
        end

        allkeys = keys(bdict)

        for k in allkeys
            local bbox = bdict[k]
            d = 0.5*(bbox[2] - bbox[1])
            m = 0.5*(bbox[2] + bbox[1])

            pdict[k] = bbox[1] + d*(tanh((pdict[k]-m)/d)+1)
        end
        return pdict
    end


pdict = Dict(:a=>[1.1 25 300], :b=>-2000.1)
bdict = Dict(:a=>[12, 51])

newdict = wallwrap(bdict, pdict)
newdict

Dict{Symbol,Any} with 2 entries:
  :a => [13.6525 25.2305 51.0]
  :b => -2000.1

In [41]:
bdict

Dict{Any,Any} with 0 entries

In [37]:
    """
    Given bdict, a dictionary of symbols to [minval, maxval] vectors, args, an array of strings representing
    symbols, and params, an array of values corresponding to the args list, puts each param that has an entry 
    in bdict through the tanh-walling mechanism, and returns the result. Does not modify the contents of the 
    original params vector (or bdict or args).
    """
    function vector_wrap(bbox, args, eparams)        
        params = copy(eparams)
        pdict = wallwrap(bbox, make_dict(args, params))
        i=1; j=1
        for i=1:length(args)
            if typeof(args[i])<:Array
                params[j:j+args[i][2]-1] = pdict[Symbol(args[i][1])]
            j += args[i][2]-1
            else
                params[j] = pdict[Symbol(args[i])]
            end
        j = j+1
        end
        return params
    end




vector_wrap

In [40]:
params = [100.1 25 300 -444.4]
params = vector_wrap(bdict, [["a" 3], "b"], params)
params

1×4 Array{Float64,2}:
 100.1  25.0  300.0  -444.4

In [46]:
bdict = Dict(:baseline=>[10 20])
args = [["baseline" 2], "threshold", "slope"]
params = [1 5 0.5 0.8]

vector_wrap(bdict, args, params)


1×4 Array{Float64,2}:
 10.0368  10.1799  0.5  0.8

In [45]:
wallwrap(bdict, make_dict(args, params))

Dict{Any,Any} with 3 entries:
  :threshold => 0.5
  :slope     => 0.8
  :baseline  => [10.0368,10.1799]

In [58]:
results = matread("Results.mat")["results"][2:end,:]
figure(1); clf()
plot(results[:,1], results[:,2], ".")
ylim(0, 1)

(0,1)