<h1 id="tocheading">TABLE OF CONTENTS</h1>
<div id="toc"></div>

**Updates to the table of contents are periodic, but run the cell below to first start or force an update.**

In [2]:
macro javascript_str(s) display("text/javascript", s); end

javascript"""
$.getScript('https://sites.google.com/site/brodylabhome/files/make_table_of_contents.js')
"""



In [3]:

using PyCall
using PyPlot
using ForwardDiff
using DiffBase

pygui(true)

import Base.convert
convert(::Type{Float64}, x::ForwardDiff.Dual) = Float64(x.value)
function convert(::Array{Float64}, x::Array{ForwardDiff.Dual}) 
    y = zeros(size(x)); 
    for i in 1:prod(size(x)) 
        y[i] = convert(Float64, x[i]) 
    end
    return y
end

include("general_utils.jl")
include("hessian_utils.jl")

"""
We define functions to convert Duals, the variable types used by ForwardDiff, 
to Floats. This is useful if we want to print out the value of a variable 
(since print doesn't know how to Duals). Note that after being converted to a Float, no
differentiation by ForwardDiff can happen!  e.g. after
    x = convert(Float64, y)
ForwardDiff can still differentiate y, but it can't differentiate x
"""



"We define functions to convert Duals, the variable types used by ForwardDiff, \nto Floats. This is useful if we want to print out the value of a variable \n(since print doesn't know how to Duals). Note that after being converted to a Float, no\ndifferentiation by ForwardDiff can happen!  e.g. after\n    x = convert(Float64, y)\nForwardDiff can still differentiate y, but it can't differentiate x\n"

# Setup -- definitions of forwardModel() and backwardsModel()

In [4]:
"""
o = g(z)    squashing tanh function, running from 0 to 1, is equal to 0.5 when input is 0.
"""
function g(z)
    return 0.5*tanh.(z)+0.5
end
    
"""
z = g^-1(o)    inverse of squashing tanh function, input must be in (0, 1), output is zero when passed 0.5.
"""
function ginverse(z)
    return 0.5*log.(z./(1-z))
end


"""
forwardModel(startU; dt=0.01, tau=0.1, nsteps=100, input=[0.1, 0], noise=[], W=[0 -5;-5 0], 
    init_add=0, const_add=0, sigma=0, gleak=1, U_rest=0, 
    do_plot=false, nderivs=0, difforder=0, clearfig=true, fignum=1, dUdt_mag_only=false)

Runs a tanh() style-network forwards in time, given its starting point, using simple Euler integration
    tau dU/dt = -U + W*V + I
    V = 0.5*tanh(U)+ 0.5

**PARAMETERS:**

startU     A column vector, nunits-by-1, indicating the values of U at time zero


**OPTIONAL PARAMETERS**

dt      Scalar, timestep size

tau     Scalar, in seconds

gleak   
        dUdt will have a term equal to gleak*(U_rest - U)
U_rest

nsteps  Number of timesteps to run, including time=0.

input   Either an nunits-by-1 vector, in which case inputs to each unit are constant
        across time, or a matrix, nunits-by-nsteps, indicating input for each unit at each timepoint.

W       Weight matrix, nunits-by-nunits

init_add    Vector or scalar that gets added to U at very first timestep, U[:,1]

const_add   Scalar that gets added to U after every timestep

sigma       After each timestep, add sigma*sqrt(dt)*randn() to each element of U

do_plot   Default false, if true, plots V of up to the first two dimensions

fignum     Figure number on which to plot

clrearfig  If true, the figure is first cleared, otherwise any plot ois overlaid

nderivs, difforder     Required for making sure function can create its own arrays and 
                       still be differentiated

dUdt_mag_only  If true, returns |dUdt|^2 from the first timestep only, then stops.

** RETURNS:**

Uend Vend       nunits-by-1 vectors representing the final values of U and V that were found.
U, V            nunits-by-nsteps matrices containing the full trajectories

"""
function forwardModel(startU; dt=0.01, tau=0.1, nsteps=100, input=[], noise=[], W=[0 -5;-5 0], 
    init_add=0, const_add=0, do_plot=false, nderivs=0, difforder=0, clearfig=true, fignum=1,
    dUdt_mag_only=false, sigma=0, g_leak=1, U_rest=0, theta=0, beta=1, other_unused_params...)

    my_input = ForwardDiffZeros(size(input,1), size(input,2), nderivs=nderivs, difforder=difforder)
    for i=1:prod(size(input)); my_input[i] = input[i]; end
    input = my_input;
    
    nunits = length(startU)
    if size(startU,2) > size(startU,1)
        error("startU must be a column vector")
    end
    
    # --- formatting input ---
    if ~(typeof(input)<:Array) || prod(size(input))==1  # was a scalar
        input = input[1]*(1+ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder))
    elseif length(input)==0 # was the empty matrix
        input = ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder)
    elseif size(input,2)==1     # was a column vector
        input = input*(1+ForwardDiffZeros(1, nsteps, nderivs=nderivs, difforder=difforder))
    end    
    # --- formatting noise ---
    if ~(typeof(noise)<:Array) || prod(size(noise))==1  # was a scalar
        noise = noise*(1+ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder))
    elseif length(noise)==0 # was the empty matrix
        noise = ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder)
    elseif size(noise,2)==1     # was a column vector
        noise = noise*(1+ForwardDiffZeros(1, nsteps, nderivs=nderivs, difforder=difforder))
    end    
    
    U = ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder)
    V = ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder)
    
    if ~(typeof(W)<:Array); W = [W]; end

    W     = reshape(W, nunits, nunits)
    U     = reshape(U, nunits, nsteps)
    V     = reshape(V, nunits, nsteps)
    input = reshape(input, nunits, nsteps)
    noise = reshape(noise, nunits, nsteps)

    input[:,1] += init_add
    input      += const_add

    #@printf("size(U) is (%d,%d), and size(startU) is (%d,%d) and size(noise) is (%d,%d)", 
    #    size(U,1), size(U,2), size(startU,1), size(startU,2), size(noise,1), size(noise,2))
    # @printf("U[1]=%g, noise[1]=%g\n", startU, noise[1])
    U[:,1] = startU + noise[:,1]; # @printf("Resulting U=%g\n", U[1])
    V[:,1] = g((U[:,1]-theta)/beta); # @printf("Resulting V=%g\n", V[1])
    
    for i=2:nsteps
        dUdt = g_leak*(U_rest -U[:,i-1]) + W*V[:,i-1] + input[:,i-1]
        if dUdt_mag_only; return sum(dUdt.*dUdt); end;
        # @printf("dUdt=%g\n", dUdt[1])
        # @printf("i=%g\n", i)
        # @printf("noise[2]=%g\n", noise[2])
        U[:,i] = U[:,i-1] + (dt/tau)*dUdt + noise[:,i] + sigma*sqrt(dt)*randn(size(U,1),1)
        # @printf("Resulting U[2]=%g\n", U[2])
        V[:,i] = g((U[:,i]-theta)/beta)
        # @printf("Resulting V[2]=%g\n", V[2])
    end

    if do_plot
        figure(fignum)
        if length(startU)==1
            if clearfig; clf(); end;
            t = (0:nsteps-1)*dt
            plot(t, V[1,:], "b-")
            plot(t[1], V[1,1], "g.")
            plot(t[end], V[1,end], "r.")
            xlabel("t"); ylabel("V1"); ylim([-0.01, 1.01])
        elseif length(startU)>=2
            if clearfig; clf(); end;
            plot(V[1,:], V[2,:], "b-")
            plot(V[1,1], V[2,1], "g.")
            plot(V[1,end], V[2,end], "r.")
            xlabel("V1"); ylabel("V2"); 
            xlim([-0.01, 1.01]); ylim([-0.01, 1.01])
        end
    end

    return U[:,end], V[:,end], U, V
end


"""
backwardsModel(endU; dt=0.01, tau=0.1, nsteps=100, input=[0],noise=[],  W=[0 -5;-5 0], 
    do_plot=false, nderivs=0, difforder=0, clearfig=true, fignum=1, tol=1e-15, start_eta=10)

Runs a tanh() style-network BACKWARDS in time, given its ending point, by making a backwards
guess at each timepoint and then using Hessian minimization to find the backwards vector that correctly
leads to the current timestep value.  Uses forwardModel() . The forwards equations are:

    tau dU/dt = -U + W*V + I
    V = 0.5*tanh(U)+ 0.5

**PARAMETERS:**

endU     A column vector, nunits-by-1, indicating the values of U at time=end


**OPTIONAL PARAMETERS:**

dt      Scalar, timestep size

tau     Scalar, in seconds

nsteps  Number of timesteps to run, including time=0.

input   Either an nunits-by-1 vector, in which case inputs to each unit are constant
        across time, or a matrix, nunits-by-nsteps, indicating input for each unit at each timepoint.

W       Weight matrix, nunits-by-nunits

do_plot   Default false, if true, plots V of up to the first two dimensions

tol       Tolerance in the minimization procedure for finding each backwards timestep. Passed on
          to trust_region_Hessian_minimization()

start_eta   Passed on to trust_region_Hessian_minimization()

fignum     Figure number on which to plot

clrearfig  If true, the figure is first cleared, otherwise any plot ois overlaid

nderivs, difforder     Required for making sure function can create its own arrays and 
                       still be differentiated



** RETURNS:**

Ustart Vstart   nunits-by-1 vectors representing the starting values of U and V that were found.
U, V            nunits-by-nsteps matrices containing the full trajectories
costs           1-by-nsteps vector with the final cost from the minimization procedure for each
                timestep. This is the squared difference between the U[t+1] produced by the U[t] 
                guess and the actual U[t+1]

"""
function backwardsModel(endU; nsteps=100, start_eta=10, tol=1e-15, maxiter=400, 
    do_plot=false, init_add=0, input=[], noise=[], nderivs=0, difforder=0, clearfig=false, fignum=1, params...)    

    nunits = length(endU)

    # --- formatting input ---
    if ~(typeof(input)<:Array) || prod(size(input))==1  # was a scalar
        input = input[1]*(1+ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder))
    elseif length(input)==0 # was the empty matrix
        input = ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder)
    elseif size(input,2)==1     # was a column vector
        input = input*(1+ForwardDiffZeros(1, nsteps, nderivs=nderivs, difforder=difforder))
    end    
    # --- formatting noise ---
    if ~(typeof(noise)<:Array)  # was a scalar
        noise = noise*(1+ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder))
    elseif length(noise)==0 # was the empty matrix
        noise = ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder)
    elseif size(noise,2)==1     # was a column vector
        noise = noise*(1+ForwardDiffZeros(1, nsteps, nderivs=nderivs, difforder=difforder))
    end    
    
    function J(U1, U2; nderivs=0, difforder=0, noise=[], inputs=[], pars...)
        U2hat = forwardModel(U1; nsteps=2, noise=noise, input=input, nderivs=nderivs, difforder=difforder, pars...)[1]
        U2hat = U2hat
        DU = U2hat - U2
    
        return sum(DU.*DU)
    end
    
    if length(noise)==0
        noise = ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder)
    end

    U = ForwardDiffZeros(nunits, nsteps, nderivs=nderivs, difforder=difforder)
    U = reshape(U, nunits, nsteps)
    costs = ForwardDiffZeros(nsteps, 1, nderivs=nderivs, difforder=difforder)    
    
    U[:,end] = endU
    for i=(nsteps-1):-1:1
        if i==1
            my_init_add = init_add
        else
            my_init_add = 0
        end
        
        U[:,i], costs[i] = trust_region_Hessian_minimization(U[:,i+1], 
            (x) -> J(x, U[:,i+1]; nderivs=length(endU), difforder=2, 
            input=input[:,i:i+1], noise = noise[:,i:i+1], init_add=my_init_add, params...); 
            verbose=false, start_eta=start_eta, tol=tol, maxiter=maxiter)
        U[:,i] += noise[:,i]
    end
    
    
    V = g(U)
    
    if do_plot
        figure(fignum)        
        if length(endU)==1
            if clearfig; clf(); end;
            t = (0:nsteps-1)*dt
            plot(t, V[1,:], "m-")
            plot(t[1], V[1,1], "go")
            plot(t[end], V[1,end], "ro")            
            ylim([-0.01, 1.01])
        elseif length(endU)>=2
            if clearfig; clf(); end;            
            plot(V[1,:], V[2,:], "m-")
            plot(V[1,1], V[2,1], "go")
            plot(V[1,end], V[2,end], "ro")
            xlim([-0.01, 1.01]); ylim([-0.01, 1.01])
        end
    end
    
    return U[:,1], V[:,1], U, V, costs
end

backwardsModel

# Example of getting stuck

In [5]:
function JJ(initUs; theta1=0.15, theta2=0.2, beta=0.003, verbose=false, nderivs=0, difforder=0, 
    do_plot=false, pre_string="", zero_last_sigmas=0, seedrand=NaN, params...)

    if ~isnan(seedrand); srand(seedrand); end
    
    Vend = ForwardDiffZeros(size(initUs,1), size(initUs,2), nderivs=nderivs, difforder=difforder)

    if do_plot; clf(); end;
    
    for i=1:size(initUs,1)
        if false # i>size(initUs,1) - zero_last_sigmas
            Ue, Ve, U, V = forwardModel(initUs[i,:]; sigma=0, nderivs=nderivs, difforder=difforder, 
                do_plot=do_plot, clearfig=false, params...)            
        else
            Ue, Ve, U, V = forwardModel(initUs[i,:]; nderivs=nderivs, difforder=difforder, 
                do_plot=do_plot, clearfig=false, params...)
        end
        Vend[i,:] = Ve
    end
    
    hits = 0.5*(1 + tanh.((Vend[:,1]-Vend[:,2])/theta1))
    diffs = tanh.((Vend[:,1]-Vend[:,2])/theta2).^2
    
    cost1 = (mean(hits) - 0.75).^2 
    cost2 = -beta*mean(diffs)
    
    if verbose
        @printf("%s", pre_string)
        @printf("-- cost=%g,   cost1=%g, cost2=%g :  mean(hits)=%g, mean(diffs)=%g\n", 
            convert(Float64, cost1+cost2), convert(Float64, cost1), convert(Float64, cost2),
            convert(Float64, mean(hits)), convert(Float64, mean(diffs)))
    end
    
    return cost1 + cost2
end


# The following sequence leads to a situation where having only [-0.8, -0.8] as the single finalFluxPoint 
# leads to the minimization getting stuck.  Adding further finalFluxPoints solves the problem
#
srand(11)
startU=randn(100,2)-3
startU=randn(100,2)-3

dt = 0.02
t = 0:dt:1
tau = 0.1
nsteps = length(t)
t = t[1:nsteps]

W = -4
noise = 0
input = 0
sigma = 0


model_params = Dict(:dt=>dt, :tau=>tau, :W=>[0 W; W 0], :nsteps=>nsteps, 
:noise=>noise, :input=>input, :sigma=>sigma, :const_add=>0, :init_add=>0)


# WORKING gradient:
# ForwardDiff.gradient((x)->JJ(startU; do_plot=true, nderivs=length(x), difforder=1, 
#    make_dict([["init_add" 2], "const_add"], x, model_params)...), [2.9, -2.9, 0.1])



# The backward and costfunc functions should turn a single-scalar parameter W into the matrix W
# backward always runs with no within-forward noise, i.e., sigma=0
backward = (endpoint; do_plot=false, pars...) -> begin
    pars = Dict(pars)
    if haskey(pars, :W); 
        W=pars[:W];   # mess with it only if it is not already a matrix:
        if length(W)==1; pars=make_dict(["W"], [[0 W;W 0]], pars); end;
    end;     
    backwardsModel(endpoint; do_plot=do_plot, make_dict(["sigma"], [0], pars)...)[1]
end


beta = 0.0001;
beta = 0.003;
beta = 0.003;
beta=0

costfunc = (startpoints; do_plot=false, verbose=false, nderivs=0, difforder=0, sr=26, pars...) -> begin
    pars = Dict(pars)
    if haskey(pars, :W); 
        W=pars[:W];   # mess with it only if it is not already a matrix:
        if length(W)==1; pars=make_dict(["W"], [[0 W;W 0]], pars); end;
    end;         
    JJ(startpoints; seedrand=sr, beta=beta, 
        do_plot=do_plot, verbose=verbose, nderivs=nderivs, difforder=difforder, pars...)
end

  


if beta==0.003;     cost_limit = -0.00288
elseif beta<0.001;  cost_limit = -0.0008
elseif beta==0.001; cost_limit = -0.000935
elseif beta==0.05;  cost_limit = -0.0485
else
    error("Don't know what cost limit goes with beta %g\n", beta)
end

fluxFinalPoint = [-0.8 -0.8; -0.6 -0.6 ; -0.4 -0.4; -0.2 -0.2; 0 0; 0.2 0.2]
fluxFinalPoint = zeros(0,2);


args = [["init_add" 2], "const_add", "W"] # , "sigma"]
seed = [2, 2, 2.1, -1] # , 0.1]



clf()
print("seed = "); print_vector_g(seed); print("\n")
costfunc(startU; do_plot=true, verbose=true, make_dict(args, seed, model_params)...)

# :sigma=>[-0.3, 0.3] does fine but :sigma=>[-0.2, 0.2] gets stuck.
# If we fix sigma at 0 it also gets stuck, but dynamics kind of odd, W a bit to big, or decrease dt
params, traj = bbox_Hessian_keyword_minimization(seed, args, Dict(:init_add=>[-5.1, 5.1]), # , :sigma=>[-0.2, 0.2]), 
(;params...) -> costfunc(startU; do_plot=false, verbose=true, merge(model_params, Dict(params))...), 
 verbose=true, start_eta=1, tol=1e-16, hardbox=true )

# params, cost, ptraj, gtraj = fluxSense(costfunc, backward, model_params, startU, fluxFinalPoint, args, seed; 
#    start_eta=0.01, tol=1e-15, maxiter=400, verbose=true, report_every=1, do_plot=false, cost_limit=cost_limit) # cost_limit=-0.000935) # for beta=0.01

# And show the final position
clf()
costfunc(startU; do_plot=true, verbose=true, make_dict(args, params, model_params)...)
params'

seed = [2, 2, 2.1, -1]
-- cost=0.0624999,   cost1=0.0624999, cost2=0 :  mean(hits)=0.5, mean(diffs)=2.29331e-08
0: eta=1 ps=[2.000, 2.000, 2.100, -1.000]
-- cost=0.0624999,   cost1=0.0624999, cost2=0 :  mean(hits)=0.5, mean(diffs)=2.29321e-08
-- cost=0.0625,   cost1=0.0625, cost2=0 :  mean(hits)=0.5, mean(diffs)=3.67053e-09
1: eta=0.5 cost=0.0624999 jtype=Newton costheta=NaN ps=[2.000, 2.000, 2.100, -1.000]
-- cost=0.0624578,   cost1=0.0624578, cost2=0 :  mean(hits)=0.500084, mean(diffs)=2.53654e-06
2: eta=0.55 cost=0.0624578 jtype=constrained costheta=-0.780 ps=[2.271, 1.723, 1.924, -1.262]
-- cost=0.0624834,   cost1=0.0624834, cost2=0 :  mean(hits)=0.500033, mean(diffs)=3.48113e-07
3: eta=0.275 cost=0.0624578 jtype=Newton costheta=NaN ps=[2.271, 1.723, 1.924, -1.262]
-- cost=0.0624834,   cost1=0.0624834, cost2=0 :  mean(hits)=0.500033, mean(diffs)=3.48113e-07
4: eta=0.1375 cost=0.0624578 jtype=Newton costheta=NaN ps=[2.271, 1.723, 1.924, -1.262]
-- cost=0.0623676,   cost1=0.0623676, 

1×4 Array{Float64,2}:
 2.77878  0.744332  7.0538  -14.1635