
## Multivariate Approximation: Convergence Rates

We explore in some very simple cases the effect of choosing subset of the full tensor product Chebyushev basis. Due to software and algorithmic limitations, these tests will be restricted to very low dimension, just 2 and 3. But we can still observe some of our analytic result and more importantly how they are extremely limited. The take-away message is that approximation in high dimension is extremely subtle and requires substantially more work than in 1D.

In [None]:
include("tools.jl")
using PyCall, PyPlot, FFTW
mplcolors = pyimport("matplotlib.colors")

In [None]:
xgrid(N) = range(0, 2*π - π/N, length = 2*N)

function tensorgrid(d, x1)
    dims = ntuple(i -> length(x1), d)
    X = reshape(x1 * ones(Bool, length(x1)^(d-1))', dims)
    pdim(i, d) = (dd = collect(1:d); dd[1] = i; dd[i] = 1; tuple(dd...))
    return ntuple(i -> permutedims(X, pdim(i,d)), d)
end

xgrid(d, N) = tensorgrid(d, xgrid(N))

kgrid(N) = [0:N; -N+1:-1]

kgrid(d, N) = tensorgrid(d, kgrid(N))


function trigcoeffs(f, d, N)
    XX = xgrid(d, N)
    F = f.(XX...)
    return fft(F) / (2*N)^d
end


In [None]:
f2(x1, x2) = exp(sin(x1)*sin(2*sin(x2)))
F̂ = trigcoeffs(f2, 2, 40)
imshow(1e-12 .+ abs.(F̂), norm=mplcolors.LogNorm())

In [None]:
using StaticArrays 

struct SparsePoly{D}
    coeffs::Vector{ComplexF64}
    kk::Vector{SVector{D, Int}}
end 

Base.length(p::SparsePoly) = length(p.coeffs)

(p::SparsePoly)(x::AbstractVector) = real(sum( c * exp(im * dot(k, x)) 
                                          for (c, k) in zip(p.coeffs, p.kk) ))
(p::SparsePoly)(args...) = p(SVector(args...))

function triginterp(f, d, N) 
    F̂ = trigcoeffs(f, d, N)[:]
    KK = kgrid(d, N)
    kk = [ SVector(ntuple(i -> KK[i][n], d)...) for n = 1:length(F̂) ]
    return SparsePoly(F̂, kk)
end
    
function greedy(p::SparsePoly, M::Integer)
    I = sortperm(abs.(p.coeffs), rev=true)
    return SparsePoly(p.coeffs[I[1:M]], p.kk[I[1:M]])
end

function sparsify(p::SparsePoly, accfun)
    I = findall(accfun.(p.kk))
    return SparsePoly(p.coeffs[I], p.kk[I])
end

sparsegrid(p, N) = sparsify(p, k -> (sum(abs, k) <= N))  # ∑_a |k_a| ≤ N

hcross(p, N) = sparsify(p, k -> (prod(1 .+ abs.(k)) <= N+1))

function ferr(f, g, d, Nerr, p = Inf)
    XX = xgrid(d, Nerr)
    return norm( f.(XX...)[:] - g.(XX...)[:], Inf )
end

In [None]:
NN = 2:2:20
NNsp = 4:4:30
NNhc = 5:15:140
MM = [] 
MM_gr = 10:50:500
MM_sp = []
MM_hc = []
err_ten = []
err_gr = [] 
err_sp = [] 
err_hc = [] 

Nerr = 51

for N in NN 
    p_ten = triginterp(f2, 2, N)
    push!(err_ten, ferr(f2, p_ten, 2, Nerr))  # 201
    push!(MM, length(p_ten))
end 

p_ten_ref = triginterp(f2, 2, 2*maximum(NN))
for M in MM_gr 
    p_ten = deepcopy(p_ten_ref)
    p_gr = greedy(p_ten, M)
    push!(err_gr, ferr(f2, p_gr, 2, Nerr))
end 
for N in NNsp 
    p_ten = deepcopy(p_ten_ref)
    p_sp = sparsegrid(p_ten, N)
    push!(err_sp, ferr(f2, p_sp, 2, Nerr))
    push!(MM_sp, length(p_sp))
end
for N in NNhc 
    p_ten = deepcopy(p_ten_ref)
    p_hc = hcross(p_ten, N)
    push!(err_hc, ferr(f2, p_hc, 2, Nerr))
    push!(MM_hc, length(p_hc))

end


Plots.plot(; yaxis = (:log,"error"), xlabel="#coeffs" )
plot!(MM, err_ten, lw=2, m=:o, ms=6, label="tensor")
plot!(MM_gr, err_gr, lw=2, m=:o, ms=6, label="greedy")
plot!(MM_sp, err_sp, lw=2, m=:o, ms=6, label ="sparse")
plot!(MM_hc, err_hc, lw=2, m=:o, ms=6, label ="hyp-x")


In [None]:
f3(x1,x2,x3) = cos(sin(x1)*sin(3*cos(x2))*cos(x3)^2)

NN = 2:2:12
NNsp = 4:4:22
MM = [] 
MM_gr = [30, 60, 110, 190, 280] # , 400, 550, 750]
MM_sp = []
MM_hc = []
err_ten = []
err_gr = [] 
err_sp = [] 
err_hc = [] 

Nerr = 23

for N in NN 
    p_ten = triginterp(f3, 3, N)
    push!(err_ten, ferr(f3, p_ten, 3, Nerr))  # 201
    push!(MM, length(p_ten))
end 

p_ten_ref = triginterp(f3, 3, 2*maximum(NN))

for M in MM_gr 
    p_ten = deepcopy(p_ten_ref)
    p_gr = greedy(p_ten, M)
    push!(err_gr, ferr(f3, p_gr, 3, Nerr))
end 
for N in NNsp 
    p_ten = deepcopy(p_ten_ref)
    p_sp = sparsegrid(p_ten, N)
    push!(err_sp, ferr(f3, p_sp, 3, Nerr))
    push!(MM_sp, length(p_sp))
end
for N in NNhc 
    p_ten = deepcopy(p_ten_ref)
    p_hc = hcross(p_ten, N)
    push!(err_hc, ferr(f3, p_hc, 3, Nerr))
    push!(MM_hc, length(p_hc))

end


Plots.plot(; yaxis = (:log,"error"), xlabel="#coeffs" )
plot!(MM, err_ten, lw=2, m=:o, ms=6, label="tensor")
plot!(MM_gr, err_gr, lw=2, m=:o, ms=6, label="greedy")
plot!(MM_sp, err_sp, lw=2, m=:o, ms=6, label ="sparse")
plot!(MM_hc, err_hc, lw=2, m=:o, ms=6, label ="hyp-x")

In [None]:
# A hyperbolic cross example
fhc(x1, x2) = (abs(sin(x1)) + abs(sin(x2)))^2
F̂ = trigcoeffs(fhc, 2, 40)
imshow(1e-12 .+ abs.(F̂)[1:2:end,1:2:end], norm=mplcolors.LogNorm())

Another example comes from electron transport theory. Specifically, we consider functions of the form 
$$
  f(x_1, x_2) = \frac{g(x_1, x_2)}{x_1 - x_2 + \epsilon i},
$$
where $\epsilon i$ is a small shift into the complex plane and $g$ is smooth. For small $\epsilon$ $f$ has a singularity very close in the complex plane, or in fact an entire line of singularities and this significantly slows approximation. 

Purely for background information: 
The function is then used to evaluate a bivariate matrix function, i.e., 
$$
  f(H, H)
$$
where $H \in \mathbb{R}^{n \times n}$ with $n$ potentially large. We have to be careful about how to interpret this function. A canonical definition is via tensor products. If $f(x_1, x_2) = f_1(x_1) f_2(x_2)$, then $f(H, H) = f_1(H) \otimes f_2(H)$, which is 
a fours-dimensional tensor. Then, invoking linearity, if $f$ is a sum of tensor products, e.g., $f(x_1,x_2) = \sum_k c_k T_{k_1}(x_1) T_{k_2}(x_2)$, we can write 
$$
    f(H,H) = \sum_k c_k T_{k_1}(H) \otimes T_{k_2}(H).
$$
We may again employ the recursion formula for the Chebyshev basis to evaluate the basis. But even without going into the details it is clear that it will be crucial to minimise the number of terms!

In [None]:
# here is a periodic variant of this problem: 
# significant potential for sparsification, but 
# no sign of anything remotely similar to the sparsity 
# patterns that we looked at so far... Here we should 
# really use a greedy algorithm!
ft(x1,x2) = 1/(sin(x1) - sin(x2) + 0.1im)
F̂ = trigcoeffs(ft, 2, 300)
imshow(1e-12 .+ abs.(F̂), norm=mplcolors.LogNorm())

Final example: the multi-variate Runge function 
$$
f(x_1, \dots, x_d) = \frac{1}{1+c \sum_{i=1}^d \sin^2(x_i)}
$$

In [None]:
# this time, let us start by visualising the chebyshev coefficients right away:
fr2(x1, x2) = 1 / (1+10*(sin(x1)^2+sin(x2)^2))
F̂ = trigcoeffs(fr2, 2, 100)
imshow(1e-12 .+ abs.(F̂)[1:2:end,1:2:end], norm=mplcolors.LogNorm())

We clearly observe a radial decay of the Chebyshev coefficients = neither the total degree or hyperbolic cross that we discussed! Thus suggests that we should truncate at $|k| \leq N$ where $|k|$ is the 2-norm!

Let's try this in a higher dimension, e.g. $d = 5$? Now comparing against tensor product grids will be hopeless.

To have at least one case where we can explore the higher dimensional setting a little bit let's consider a case where the hyperbolic cross degree gives the "right" sparsification: 
$$
    f({\bf x}) = \bigg( d^{-1} \sum_{i = 1}^d |\sin(x_i)| \bigg)^2
$$

In [None]:
# e.g. in 5 dimensions: 
fhc(x1, x2, x3, x4, x5) = sum( abs ∘ sin, (x1, x2, x3, x4, x5) )^2

In [None]:
# This function constructs a hyperbolic cross grid - we can 
# in principle do it more elegantly in a dimension-agnostic way
# but with explicit loops it is easier to understand:
function hckgrid5(N)
    kk = SVector{5, Int}[]
    for k1 = -N:N
        # (1+|k1|)(1+|k2|) <= N gives a new upper bound for k2:
        N2 = ceil(Int, N / (1+abs(k1)))
        for k2 = -N2:N2
            N3 = ceil(Int, N2 / (1+abs(k2)))
            for k3 = -N3:N3 
                N4 = ceil(Int, N3 / (1+abs(k3)))
                for k4 = -N4:N4 
                    N5 = ceil(Int, N4 / (1+abs(k4)))
                    for k5 = -N5:N5 
                        k = SA[k1, k2, k3, k4, k5]
                        if prod(1 .+ abs.(k)) <= N 
                            push!(kk, k)
                        end
                    end
                end
            end
        end
    end
    return kk 
end


In [None]:
# the contrast in grid sizes is significant
for N = 3:10
    Mten = length(kgrid(5, N)[1])
    Mhc = length(hckgrid5(N))
    @show N,  Mten, Mhc
end

In [None]:
# but we can scale the hc grid to much larger N:
for N = 10:10:100
    Mhc = length(hckgrid5(N))
    @show N, Mhc
end

So now we can construct a hyperbolic cross basis quite cheaply, 
and we see that even in 5 dimensions it doesn't grow too fast
with the degree. But now how are we going to determine the parameters? 
I have no good idea here, and when we don't know what to do, 
let's just try least squares? 

In [None]:
using LinearAlgebra: I 
using Base.Threads 

Mtrain = 20_000; Mtest = 10_000
rand5() = (@SVector rand(5)) * 2 * π .- π
Xtrain = [ rand5() for _=1:Mtrain ]
Xtest = [ rand5() for _=1:Mtest ]

sparse_trig_basis(kk, x) = [ exp(im * dot(k, x)) for k in kk ]

function hc_lsqfit(f, N, X = Xtrain; λ = 1e-3)
    kk = hckgrid5(N)
    A = zeros(ComplexF64, length(X), length(kk))
    Y = zeros(ComplexF64, length(X))
    @threads for ix = 1:length(X)
        x = X[ix]
        A[ix, :] .= sparse_trig_basis(kk, x)
        Y[ix] = f(x...)
    end
    A = [A; λ * I]
    Y = [Y; zeros(length(kk))]
    c = A \ Y 
    return SparsePoly(c, kk)
end

In [None]:
fhc(x1, x2, x3, x4, x5) = (sum( abs ∘ sin, (x1, x2, x3, x4, x5) )/5)^5
NN = [5, 6, 8, 10, 12, 15, 19, 24, 30]
MM = []
errs = []

@time for N in NN
    p = hc_lsqfit(fhc, N; λ=1e-8)
    push!(MM, length(p))
    push!(errs, maximum(fhc(x...) - p(x) for x in Xtest))
end

In [None]:
errs

In [None]:
using Plots
Plots.plot(yscale = :log10, xscale = :log10, size = (500, 300), 
             yticks = [0.02, 0.04, 0.08, 0.16, 0.32, 0.64])
Plots.plot!(MM, errs, lw=3, m=:o, ms=8, label = "hyp-x")