In [None]:
import Pkg;Pkg.activate("optim")
Pkg.instantiate()

In [None]:
using Cuba, Distributions
using BenchmarkTools, Test
using Quadrature

@test Threads.nthreads()>1
Threads.nthreads()

In [None]:
M=5 # number of independent beta random variables
atol=1e-6
rtol=1e-3

# integrate the pdf of the joint distribution -- should always equal 1
function int(x, f)
   f[1] = pdf(Product(Beta.(1.0,2.0*ones(M))),x)
end

# multithread
function int_thread_col(x, f)
    Threads.@threads for i in 1:size(x,2)
      f[i] = pdf(Product(Beta.(1.0,2.0*ones(M))),@view(x[:,i]))
    end
end

# multithread and loop to create product distribution
function int_thread_el(x,f)
   f[1,:] .= 1.0
   Threads.@threads for j in 1:size(x,2)
       for i in 1:size(x, 1)
           f[1, j] *= pdf(Beta(1.0,2.0),@view(x[i,j]))
       end
   end
end

In [None]:
# we get the right answer
@show result, err = cuhre(int, M, 1, atol=atol, rtol=rtol);
@show result, err = cuhre(int_thread_col, M, 1, atol=atol, rtol=rtol,nvec=100);
@show result, err = cuhre(int_thread_el, M, 1, atol=atol, rtol=rtol,nvec=100);

In [None]:
#@btime cuhre($(int), $M, 1, atol=$atol, rtol=$rtol) # slow

println("multithread")
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000))

println("multithread and create product in loop")
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000));

# conclusions
# int_thread_el seems faster than int_thread_col
# nvec seems important for performance, looks like we want high nvec

In [None]:
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000));

In [None]:
# let's try without index checking
function int_thread_col_in(x, f)
    Threads.@threads for i in 1:size(x,2)
      @inbounds f[i] = pdf(Product(Beta.(1.0,2.0*ones(M))),@view(x[:,i]))
    end
end

function int_thread_el_in(x,f)
   f[1,:] .= 1.0
   Threads.@threads for j in 1:size(x,2)
       for i in 1:size(x, 1)
           @inbounds f[1, j] *= pdf(Beta(1.0,2.0),@view(x[i,j]))
       end
   end
end

nvec=10000000
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$nvec)
@btime cuhre($int_thread_col_in, $M, 1, atol=$atol, rtol=$rtol,nvec=$nvec)

@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$nvec)
@btime cuhre($int_thread_el_in, $M, 1, atol=$atol, rtol=$rtol,nvec=$nvec);

# conclusion
# using @inbounds does not seem to make a big difference

In [None]:
# monte carlo divonne

# wrong answer
@show result, err = divonne(int, M, 1, atol=atol, rtol=rtol); # output suggests increasing maxevals

In [None]:
# better
maxevals=1174707384

@show result, err = divonne(int, M, 1, atol=atol, rtol=rtol,maxevals=maxevals);
@show result, err = divonne(int_thread_col, M, 1, atol=atol, rtol=rtol,nvec=nvec,maxevals=maxevals);
@show result, err = divonne(int_thread_el, M, 1, atol=atol, rtol=rtol,nvec=nvec,maxevals=maxevals);

In [None]:
@btime divonne($(int), $M, 1, atol=$atol, rtol=$rtol,maxevals=maxevals)

println("multithread")
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000),maxevals=maxevals)

println("multithread and create product in loop")
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000),maxevals=maxevals);


In [None]:
# monte carlo suave

nvec=15000000
maxevals=300000000
nmin=2
nnew=80000
flatness=150

In [None]:
@show result, err = suave(int, M, 1, atol=atol, rtol=rtol,maxevals=maxevals,nnew=nnew,nmin=nmin,flatness=flatness); 
@show result, err = suave(int_thread_col, M, 1, atol=atol, rtol=rtol,maxevals=maxevals, nvec=nvec,nnew=nnew,nmin=nmin,flatness=flatness); 
@show result, err = suave(int_thread_el, M, 1, atol=atol, rtol=rtol,maxevals=maxevals, nvec=nvec,nnew=nnew,nmin=nmin,flatness=flatness); 

In [None]:
@btime suave($(int), $M, 1, atol=$atol, rtol=$rtol,maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness) # fast

In [None]:
println("multithread")
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 

println("multithread and create product in loop")
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 

In [None]:
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 

In [None]:
# ### Default values of parameters
# # Common arguments.
# const NVEC      = 1
# const RTOL      = 1e-4
# const ATOL      = 1e-12
# const FLAGS     = 0
# const SEED      = 0
# const MINEVALS  = 0
# const MAXEVALS  = 1000000
# const STATEFILE = ""
# const SPIN      = C_NULL

# # Vegas-specific arguments.
# const NSTART    = 1000
# const NINCREASE = 500
# const NBATCH    = 1000
# const GRIDNO    = 0

# # Suave-specific arguments.
# const NNEW     = 1000
# const NMIN     = 2
# const FLATNESS = 25.0

# # Divonne-specific arguments.
# const KEY1         = 47
# const KEY2         = 1
# const KEY3         = 1
# const MAXPASS      = 5
# const BORDER       = 0.0
# const MAXCHISQ     = 10.0
# const MINDEVIATION = 0.25
# const NGIVEN       = 0
# const LDXGIVEN     = 0
# const XGIVEN       = 0
# const NEXTRA       = 0
# const PEAKFINDER   = C_NULL

# # Cuhre-specific argument.
# const KEY = 0

In [None]:
 @cushow index
@cuprintln "thread $index: stride=$stride"
@fastmath

In [1]:
import Pkg;Pkg.activate("optim")
using Cuba, Distributions, HCubature
using BenchmarkTools, Test
using Quadrature
using Zygote, FiniteDiff, ForwardDiff
include("ExtendedPowerDist.jl"); using .ExtendedPowerDist
using DataFrames
include("NetDefs.jl"); using .NetDefs
using NLsolve, SpecialFunctions
@test Threads.nthreads()>1
Threads.nthreads()

[32m[1m  Activating[22m[39m environment at `~/SageMaker/networks_hub/SAN/code/network_simulations/optim/Project.toml`


48

In [6]:
function c_p(p_clear,z,p,x)
    c, Aᵀ = selc*z, transpose(reshape(selA*z,N,N))
    p_bar, γ = p[N+1:2N], p[3N+1]
    return p_clear - min.( (1+γ)*(Aᵀ*p_clear + (1 .- x).*c) - γ*p_bar, p_bar)
end

function sump_nlsolve(z,p,x)
    p_bar = p[N+1:2N]
    sum(nlsolve(p_clear -> c_p(p_clear,z,p,x),p_bar).zero)
end

# computes integrand sum(p(x))
function int_thread(f,x,zp)
   z, p = zp[1:M], zp[M+1:6N+1+2N*M+M^2]
   α, β = selα*z, selβ*z
   f[:] .= 1.0
   Threads.@threads for j in 1:size(x,2)
       f[j] *= sump_nlsolve(z,p,x[:,j])
       for i in 1:size(x, 1)
           f[j] *= density(ExtendedPowerDist.ExtPow(α[i],β[i]),x[i,j])
       end
   end
end

rtol=1e-2; atol=1e-2;
# integral sum(p(x))
function int_nlsolve(z,p)
    zp = vcat(z,p)
    prob = QuadratureProblem(int_thread,zeros(N),ones(N),zp;batch=100000)
    solve(prob,CubaCuhre(),reltol=rtol,abstol=atol)[1]
end

function ev(α,β)
   return 1 .- (1/2)*sqrt.(pi ./ β).*exp.( (α .+ 1).^2 ./ (4β)).*erfc.( (α .+ 1) ./ (2sqrt.(β)) ) 
end

function obj_Ecx(z)
    c, α, β = selc*z, selα*z, selβ*z
    return sum(c.*ev(α,β))
end

# objective function E[c*x - sum(p(x))]
function obj_nlsolve(z,p)
    return obj_Ecx(z) - int_nlsolve(z,p) # spillovers for network
end

obj_nlsolve (generic function with 1 method)

In [7]:
z0A = Array(z0); p0A = Array(p0)
zp0 = vcat(z0A,p0A)
p_clear0 = p0A[N+1:2N]

5-element Vector{Float64}:
 2.006993
 1.794448
 1.6433575
 1.207323
 1.032553

In [8]:
@show obj_nlsolve(z0A,p0A)

obj_nlsolve(z0A, p0A) = -4.093742470342109


-4.093742470342109

In [14]:
mb=40000
f0 = zeros(mb)
x0_r = rand(N,mb)
int_thread(f0,x0_r,zp0)
obj_Ecx(z0A) - mean(f0)

-4.147318627728024

In [95]:
# nlsolve rrule
using ChainRulesCore
function ChainRulesCore.rrule(config::RuleConfig{>:HasReverseMode}, ::typeof(nlsolve), f, x0; kwargs...)
    result = nlsolve(f, x0; kwargs...)
    function nlsolve_pullback(Δresult)
        Δx = Δresult[].zero
        x = result.zero
        _, f_pullback = rrule_via_ad(config, f, x)
        JT(v) = f_pullback(v)[2] # w.r.t. x
        # solve JT*Δfx = -Δx
        Δfx = nlsolve(v -> JT(v) + Δx, zero(x); kwargs...).zero
        ∂f = f_pullback(Δfx)[1] # w.r.t. f itself (implicitly closed-over variables)
        return (NoTangent(), ∂f, ZeroTangent())
    end
    return result, nlsolve_pullback
end

function dsump_nlsolve(z,p,x)
    p_bar = p[N+1:2N]
    Zygote.gradient(z->sump_nlsolve(z,p,x),z)[1]
end

function int_dsump(f,x,zp)
   z, p = zp[1:M], zp[M+1:6N+1+2N*M+M^2]
   α, β = selα*z, selβ*z
   f .= 1.0
   Threads.@threads for j in 1:size(x,2)
       f[:,j] .*= dsump_nlsolve(z,p,x[:,j])
       for i in 1:size(x, 1)
           f[:,j] .*= density(ExtendedPowerDist.ExtPow(α[i],β[i]),x[i,j])
       end
   end
end

function int_dsump(f,x,zp)
   z, p = zp[1:M], zp[M+1:6N+1+2N*M+M^2]
   α, β = selα*z, selβ*z
   f .= 1.0
   Threads.@threads for j in 1:size(x,2)
       f[:,j] .*= sump_nlsolve(z,p,x[:,j])
       for i in 1:size(x, 1)
           f[:,j] .*= density(ExtendedPowerDist.ExtPow(α[i],β[i]),x[i,j])
       end
   end
end

function dobj_nlsolve(z,p,x,df)
    g = zeros(length(z),size(x,2))
    int_dsump(g,x,vcat(z,p))
    return Zygote.gradient(z->obj_Ecx(z),z)[1] - dropdims(mean(g,dims=2),dims=2)
end

function d2obj_nlsolve(z,p,x,d2f)
    d2f .= Zygote.hessian(z->obj_Ecx(z),z)
    return d2f
end

d2obj_nlsolve (generic function with 1 method)

In [98]:
mb=10
df0 = zeros(M)
x0_r = rand(N,mb)
g0 = zeros(length(z0),size(x0_r,2))
int_dsump(g0,x0_r,vcat(z0A,p0A))
#df0 .= Zygote.gradient(z->obj_Ecx(z),z0A)[1] - dropdims(mean(g0,dims=2),dims=2)
dobj_nlsolve(z0A,p0A,x0_r,df0)

h0 = zeros(M,M)
d2obj_nlsolve(z0A,p0A,x0_r,h0)

45×45 Matrix{Float64}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …   0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0      0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0      0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0      0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0      0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …   0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0      0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0      0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0      0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0      0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …   0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0      0.0        0.0        0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0      0.0        0.0        0.0
 ⋮                        ⋮              ⋱            

In [99]:
g0

45×10 Matrix{Float64}:
 0.0       0.0          0.533786   …  0.0190917    0.604367  0.0
 0.0       0.0          0.339645      0.010617     0.479863  0.0
 0.0       0.0          0.325032      0.00552172   0.294206  0.0
 0.0       0.0          0.0836838     0.00593505   0.212128  0.0
 0.0       0.0          0.0612584     0.000853542  0.163786  0.0
 2.40267   0.00250752   0.800678   …  0.0286376    0.90655   0.0
 1.62753   0.000862284  0.509467      0.0159254    0.719795  0.0
 0.622903  0.000516387  0.487548      0.00828258   0.441308  0.0
 0.706585  0.000405496  0.125526      0.00890257   0.318192  0.0
 0.369274  0.000618497  0.0918876     0.00128031   0.245679  0.0
 3.60401   0.00376129   1.20102    …  0.0429564    1.35982   8.25792
 2.4413    0.00129343   0.764201      0.0238881    1.07969   7.38339
 0.934355  0.00077458   0.731322      0.0124239    0.661962  5.14056
 ⋮                                 ⋱                         
 1.79344   0.000330711  0.273448      0.0283902    0.67124

45×1 Matrix{Float64}:
 -0.1634711996548124
 -0.1224335481990602
 -0.09393037335127047
 -0.03885367025398878
 -0.006525423682122654
 -0.5361449449131931
 -0.39075158553876205
 -0.2956423068321641
 -0.16010011873381294
 -0.06551918313716311
 -0.8288997387181899
 -0.6081957872061624
 -0.4523912795875934
  ⋮
 -0.09877659615332718
 -0.084604413714433
  0.3976185549927378
  0.35436929337632983
  0.3330899825994633
  0.2394134791913638
  0.1977575787108351
  0.40384559390089514
  0.3599190126990379
  0.33830645012976324
  0.24316289438173166
  0.20085462768290296

In [None]:
function prod_pdf(f,x,zp)
   z, p = zp[1:M], zp[M+1:6N+1+2N*M+M^2]
   α, β = selα*z, selβ*z
   f[:] .= 1.0
   Threads.@threads for j in 1:size(x,2)
       for i in 1:size(x, 1)
           f[j] *= density(ExtendedPowerDist.ExtPow(α[i],β[i]),x[i,j])
       end
   end
end

In [None]:

rtol=1e-1; atol=1e-1;
function dint_nlsolve(z,p)
    zp = vcat(z,p)
    prob = QuadratureProblem(int_dsump,zeros(N),ones(N),zp;batch=10000,nout=2)
    solve(prob,CubaCuhre(),reltol=rtol,abstol=atol)[1]
end

In [None]:
function int_dsump_c(x,f,zp)
   z, p = zp[1:M], zp[M+1:6N+1+2N*M+M^2]
   α, β = selα*z, selβ*z
   f .= 1.0
   Threads.@threads for j in 1:size(x,2)
       f[:,j] .= f[:,j].*dsump_nlsolve(z,p,x[:,j])
       for i in 1:size(x, 1)
           f[:,j] *= density(ExtendedPowerDist.ExtPow(α[i],β[i]),x[i,j])
       end
   end
end
int_dsump_c2(x,f) = int_dsump_c(x,f,zp0)

cuhre(int_dsump_c2,N,M, atol=atol, rtol=rtol,nvec=1000)

In [None]:
mb=1000
x0_r = rand(N,mb)

In [None]:
f0=zeros(M,mb)
int_dsump(f0,x0_r,zp0)
f0

In [None]:
mean(f0,dims=2)

In [None]:

c_p(p_clear0,z0A,p0A,x0[:,1])
sump_nlsolve(z0A,p0A,x0[:,1])
int_nlsolve(z0A,p0A)
obj_Ecx(z0A)

In [None]:
obj_nlsolve(z0A,p0A)

In [None]:
function sump_contraction(z,p,x)
    mb = size(x,2)
    c, Aᵀ = selc*z, transpose(reshape(selA*z,N,N))
    p_bar, γ = p[N+1:2N], p[3N+1]
    p_bar_rep = repeat(p_bar,1,mb)
    c_rep = repeat(c,1,mb)
    p1 = min.( (1+γ)*(Aᵀ*p_bar_rep + (1 .- x).*c_rep) - γ*p_bar_rep, p_bar_rep)
    p2 = min.( (1+γ)*(Aᵀ*p1        + (1 .- x).*c_rep) - γ*p_bar_rep, p_bar_rep)
    p3 = min.( (1+γ)*(Aᵀ*p2        + (1 .- x).*c_rep) - γ*p_bar_rep, p_bar_rep)
    p4 = min.( (1+γ)*(Aᵀ*p3        + (1 .- x).*c_rep) - γ*p_bar_rep, p_bar_rep)
    #p_n = min.( (1+γ)*(Aᵀ*p_{n-1} + (1 .- x).*c_rep) - γ*p_bar_rep, p_bar_rep)
    return sum(p4,dims=1)
end

function obj_contraction_uniform2(z,p,x)
    # x drawn from uniform distribution
    c, α, β = selc*z, selα*z, selβ*z
    return sum(c.*ev(α,β)) - mean(sump_contraction(z,p,x).*extpow_prod_pdf(x,α,β)) # spillovers for network
end

In [None]:
mb=100000
x0_r = rand(N,mb)
obj_contraction_uniform2(z0A,p0A,x0_r)

In [None]:
mb=25000
x0_r = rand(N,mb)
f0=zeros(mb)
f1=zeros(mb)
int_thread(f0,x0_r,zp0)
int_thread(f1,x0_r,zp0)
obj_Ecx(z0A) - mean(f0)
obj_Ecx(z0A) - (mean(f0)+mean(f1))/2


In [None]:
d1=Zygote.gradient(obj_Ecx,z0A)[1]
h1=Zygote.hessian(obj_Ecx,z0A)

In [None]:
d1=ForwardDiff.gradient(obj_Ecx,z0A)[1]
h1=ForwardDiff.hessian(obj_Ecx,z0A)

In [None]:
d1=Zygote.gradient(z->sump_nlsolve(z,p0A,x0_r[:,1]),z0A)[1]

In [None]:
d2=FiniteDiff.finite_difference_gradient(z->sump_nlsolve(z,p0A,x0_r[:,1]),z0A)

In [None]:
d3=Zygote.gradient(z->sump_contraction(z,p0A,x0_r[:,1])[1],z0A)[1]

In [None]:
d4=ForwardDiff.gradient(z->sump_contraction(z,p0A,x0_r[:,1])[1],z0A)

In [None]:
mb=20000
x0_r = rand(N,mb)

@show d1
@show d2

In [None]:
Zygote.jacobian(αβ->ev(αβ[1:N],αβ[N+1:2N]),fill(0.5,2N))[1]
Zygote.gradient(cαβ->sum(cαβ[1:N].*ev(cαβ[N+1:2N],cαβ[2N+1:3N])),fill(0.5,3N))[1]


In [None]:
@btime Zygote.gradient(z->obj_nlsolve(z,p0A,x0_r[:,1]),z0A)[1];

In [None]:
@btime FiniteDiff.finite_difference_gradient(z->obj_nlsolve(z,p0A,x0_r[:,1]),z0A);

In [None]:
obj_nlsolve(z0A,p0A)

In [None]:
df1=Zygote.gradient(z->obj_nlsolve(z,p0A),z0A)[1]

In [None]:
df2 = FiniteDiff.finite_difference_gradient(z->obj_nlsolve(z,p0A),z0A)

In [None]:
# multithread and loop to create product distribution
function int_thread_elq(f,x,zp)
   z, p = zp[1:M], zp[M+1:6N+1+2N*M+M^2]
   α, β = selα*z, selβ*z
   f[1,:] .= 1.0
   Threads.@threads for j in 1:size(x,2)
       f[1, j] *= obj_contraction(z,p,x[:,j])
       for i in 1:size(x, 1)
           f[1, j] *= density(ExtendedPowerDist.ExtPow(α[i],β[i]),x[i,j])
       end
   end
end

# integrate the pdf of the joint distribution -- should always equal 1
function int(f,x,zp)
   z, p = zp[1:M], zp[M+1:6N+1+2N*M+M^2]
   α, β = selα*z, selβ*z
   f[1] = (obj_contraction(z,p,x)*prod(density.(ExtendedPowerDist.ExtPow.(α, β),x),dims=1))[1]
end

In [None]:
f0=[0.0]
int(f0,ones(N)/10,zp0)
f0

In [None]:
q0 = [0.0]
int_thread_elq(q0,ones(N)/10,zp0)
q0

In [None]:
function diff_obj(z,p)
    zp = vcat(z,p)
    prob = QuadratureProblem(int_thread_elq,zeros(N),ones(N),zp;batch=10000)
    solve(prob,CubaCuhre(),reltol=rtol,abstol=atol)[1]
end

function diff_objH(z,p)
    zp = vcat(z,p)
    prob = QuadratureProblem(int,zeros(N),ones(N),zp;batch=0)
    solve(prob,HCubatureJL(),reltol=rtol,abstol=atol)[1]
end


In [None]:
diff_obj(z0A,p0A)


In [None]:
diff_objH(z0A,p0A)

In [None]:
dp1 = Zygote.gradient(z->diff_obj(z,p0A),z0A)

In [None]:
dp2 = FiniteDiff.finite_difference_gradient(z->diff_obj(z,p0A),z0A)

In [None]:
dp3 = ForwardDiff.gradient(z->diff_obj(z,p0A),z0A)

In [None]:
ExtendedPowerDist.moment(ExtendedPowerDist.ExtPow(p[1],p[2]), 1)

In [None]:
# multithread and loop to create product distribution
function int_thread_el(x,f)
   f[1,:] .= 1.0
   Threads.@threads for j in 1:size(x,2)
       for i in 1:size(x, 1)
           f[1, j] *= pdf(Beta(1.0,2.0),@view(x[i,j]))
       end
   end
end
cuhre(int_thread_el, M, 1, atol=atol, rtol=rtol,nvec=(100000));

In [None]:
using ForwardDiff
f(x,p) = p[2]*sum(sin.(x .* p))
lb = ones(2)
ub = 3ones(2)
p = [1.5,2.0]

function testf(p)
    prob = QuadratureProblem(f,lb,ub,p)
    sin(solve(prob,CubaCuhre(),reltol=1e-6,abstol=1e-6)[1])
end
dp3 = ForwardDiff.gradient(testf,p)


In [None]:
function p_fp(p_clear,z,p,x)
    c, Aᵀ = selc*z, transpose(reshape(selA*z,N,N))
    p_bar, γ = p[N+1:2N], p[3N+1]
    p1 = min.( (1+γ)*(Aᵀ*p_clear + (1 .- x).*c) - γ*p_bar, p_bar)
    #p2 = min.( (1+γ)*(Aᵀ*p1        + (1 .- x).*c_rep) - γ*p_bar_rep, p_bar_rep)
    #p3 = min.( (1+γ)*(Aᵀ*p2        + (1 .- x).*c_rep) - γ*p_bar_rep, p_bar_rep)
    #p_n = min.( (1+γ)*(Aᵀ*p_{n-1} + (1 .- x).*c_rep) - γ*p_bar_rep, p_bar_rep)
    return p_clear - p1
end
p_s(p_clear) = p_fp(p_clear,z0A,p0A,ones(N)/10)

In [None]:

p_clear0 = p0A[N+1:2N];
pc(z) = nlsolve(p_clear -> p_fp(p_clear,z,p0A,ones(N)/10),p_clear0).zero
obj(z) = sum(-pc(z))

In [None]:
obj(z0A)

In [None]:
bla=Zygote.gradient(obj,z0A)[1]

In [None]:
FiniteDiff.finite_difference_jacobian(z->Zygote.gradient(obj,z)[1],z0A)

In [None]:
function ChainRulesCore.frule((_, _, _, ṗ), nlsolve::typeof(nlsolve), f, u0, p; kwargs...)
    u = nlsolve(f, u0, p; kwargs...)
    fu, fp = Base.Fix2(f, p), Base.Fix1(f, u)

    pushforward = -dfdu(fu, u) \ (dfdp(fp, u, p) * ṗ) # TODO: jvp
    return u, pushforward
end

In [None]:
ForwardDiff.gradient(obj,z0A)

In [None]:
numerical_nlsolve(p_clear -> p_fp(p_clear,z,p0A,ones(N)/10),p_clear0,z)

In [None]:
?numerical_nlsolve

In [None]:
Pkg.add("Diffractor");using Diffractor

In [None]:
using ModelingToolkit
using Zygote
using ModelingToolkit.StructuralTransformations: numerical_nlsolve
using ChainRulesCore
using ForwardDiff

dfdu(f, u) = u isa Number ? ForwardDiff.derivative(f, u) : ForwardDiff.jacobian(f, u)
dfdp(f, u, p) = u isa Number && p isa Number ? ForwardDiff.derivative(f, p) :
                u isa Number ? ForwardDiff.gradient(f, p) :
                ForwardDiff.jacobian(f, p)

function ChainRulesCore.frule((_, _, _, ṗ), nlsolve::typeof(numerical_nlsolve), f, u0, p)
    u = nlsolve(f, u0, p)
    fu, fp = Base.Fix2(f, p), Base.Fix1(f, u)

    pushforward = -dfdu(fu, u) \ (dfdp(fp, u, p) * ṗ) # TODO: jvp
    return u, pushforward
end

function ChainRulesCore.rrule(nlsolve::typeof(numerical_nlsolve), f, u0, p)
    u = nlsolve(f, u0, p)
    fu, fp = Base.Fix2(f, p), Base.Fix1(f, u)

    function nlsolve_pullback(ū)
        p̄ = -dfdp(fp, u, p)' * (dfdu(fu, u)' \  ū) # TODO: vjp
        ZeroTangent(), ZeroTangent(), ZeroTangent(), p̄
    end
    return u, nlsolve_pullback
end

function nlfun(p)
    u0 = ones(2)
    numerical_nlsolve(u0, p) do u, p
        x, y = u
        [(x+3)*(y^3-7)+p[1]
         sin(y*exp(x)-p[2])]
    end |> sum
end

Zygote.gradient(nlfun, [18, -1])[1]
ForwardDiff.gradient(nlfun, [18, -1])
#=
julia> Zygote.gradient(nlfun, [18, -1])[1]
2-element Vector{Float64}:
 -0.009532247816399934
  0.4895491994463628
julia> ForwardDiff.gradient(nlfun, [18, -1])
2-element Vector{Float64}:
 -0.009532385532365253
  0.48954879301835347
=#

In [None]:
import Pkg; Pkg.add("LinearMaps")

In [None]:
import Pkg; Pkg.activate("optim")
using NLsolve
using Zygote

using SparseArrays
using LinearAlgebra
using Random
Random.seed!(1234)

using IterativeSolvers
using LinearMaps


NN = 100
nonlin = 0.1
AA = Array(spdiagm(0 => fill(10.0, NN), 1 => fill(-1.0, NN-1), -1 => fill(-1.0, NN-1)))
p00 = randn(NN)
h(x, p) = AA*x + nonlin*x.^2 - p
solve_x(p) = nlsolve(x -> h(x, p), zeros(NN)).zero
obj(p) = sum(solve_x(p))


g_auto = Zygote.gradient(obj, p00)[1]
g_analytic = gmres((AA + Diagonal(2*nonlin*solve_x(p00)))', ones(NN))
display(g_auto)
display(g_analytic)
@show sum(abs, g_auto - g_analytic) / NN