In [19]:
using Revise

using Langevin
using Langevin.Preconditioners: MatrixMOp, BlockPreconditioner, FourierPreconditioner

using LinearAlgebra
using IterativeSolvers
using SparseArrays
using Random

import Serialization
# holstein = Serialization.deserialize("holstein_6x6_cond106.dat")
holstein = Serialization.deserialize("holstein_6x6_cdw.dat")


M = Array(Langevin.HolsteinModels.construct_M(holstein))
M_op = MatrixMOp(holstein)
P_op = BlockPreconditioner(holstein, subtol=1e-3)
PF_op = FourierPreconditioner(holstein)
;

In [11]:
cond(M)

91.15039671895502

# Accuracy measurements

In [12]:
Random.seed!(0)
b = randn(size(holstein)[1])

x_exact = M \ b
x_approx = fill!(similar(b), 0)
;

In [47]:
rhs = M' * b
fill!(x_approx, 0)
x1 = IterativeSolvers.cg!(x_approx, holstein, rhs, tol=1e-4, log=true, maxiter=1000)
println("CG iters ", x1[2].iters)
println("True error ", norm(x_approx - x_exact) / norm(x_exact))

CG iters 232
True error 0.003097070204125516


In [66]:
fill!(x_approx, 0)
x2 = IterativeSolvers.gmres!(x_approx, M_op, b, Pr=P_op, tol=2e-3, restart=10, log=true, maxiter=100, initially_zero=true)

println("GMRES with BLOCK preconditioner")
println("Mat-vec products ", x2[2].mvps)
println("True error ", norm(x_approx - x_exact) / norm(x_exact))

GMRES with BLOCK preconditioner
Mat-vec products 9
True error 0.0028636745562894915


In [63]:
fill!(x_approx, 0)
x2 = IterativeSolvers.bicgstabl!(x_approx, M_op, b, 1, Pl=P_op, tol=2e-3, log=true, max_mv_products=100, initial_zero=true)

println("BICGSTAB with BLOCK preconditioner")
println("Mat-vec products ", x2[2].mvps)
println("True error ", norm(x_approx - x_exact) / norm(x_exact))

BICGSTAB with BLOCK preconditioner
Mat-vec products 12
True error 0.0020995347433988266


In [83]:
# Langevin.Preconditioners.compute_α!(PF_op, const_V=true)

fill!(x_approx, 0)
x2 = IterativeSolvers.gmres!(x_approx, M_op, b, Pr=PF_op, tol=1e-3, restart=10, log=true, maxiter=100, initially_zero=true)

println("GMRES with RIGHT Fourier preconditioner")
println("Mat-vec products ", x2[2].mvps)
println("True error ", norm(x_approx - x_exact) / norm(x_exact))

GMRES with RIGHT Fourier preconditioner
Mat-vec products 61
True error 0.002239675656949757


In [43]:
fill!(x_approx, 0)
x2 = IterativeSolvers.gmres!(x_approx, M_op, b, Pl=PF_op, tol=2e-3, restart=10, log=true, maxiter=100, initially_zero=true)

println("GMRES with LEFT Fourier preconditioner")
println("Mat-vec products ", x2[2].mvps)
println("True error ", norm(x_approx - x_exact) / norm(x_exact))

GMRES with LEFT Fourier preconditioner
Mat-vec products 62
True error 0.00286928021452819


In [53]:
# BiCGStab uses internal randomness. :-(
# Set seed to get reproducible results.
Random.seed!(0)

fill!(x_approx, 0)
x2 = IterativeSolvers.bicgstabl!(x_approx, M_op, b, 1, Pl=PF_op, tol=1e-2, log=true, max_mv_products=100, initial_zero=true)

println("BiCGStab(1) with Fourier preconditioner")
println("Mat-vec products ", x2[2].mvps)
println("True error ", norm(x_approx - x_exact) / norm(x_exact))

BiCGStab(1) with Fourier preconditioner
Mat-vec products 100
True error 0.7359500328254452


# Benchmarking

In [54]:
using BenchmarkTools

println("CG time...")
@btime begin
    fill!(x_approx, 0)
    IterativeSolvers.cg!(x_approx, holstein, b, tol=1e-4, log=true, maxiter=1000)
end
;

CG time...
  2.651 ms (940 allocations: 91.02 KiB)


In [70]:
println("GMRES/Block time...")
@btime begin
    fill!(x_approx, 0)
    IterativeSolvers.gmres!(x_approx, M_op, b, Pr=P_op, tol=1e-3, restart=10, maxiter=100, initially_zero=true)
end
;

GMRES/Block time...
  3.917 ms (36132 allocations: 2.37 MiB)


In [74]:
println("BiCGStab(1)/Block time...")

Random.seed!(0)

@btime begin
    fill!(x_approx, 0)
    x2 = IterativeSolvers.bicgstabl!(x_approx, M_op, b, 1, Pl=P_op, tol=2e-3, max_mv_products=100, initial_zero=true)
end
;

BiCGStab(1)/Block time...
  4.001 ms (39714 allocations: 2.86 MiB)


In [57]:
println("GMRES/Fourier time...")
@btime begin
    fill!(x_approx, 0)
    IterativeSolvers.gmres!(x_approx, M_op, b, Pr=PF_op, tol=2e-3, restart=10, maxiter=100, initially_zero=true)
end
;

GMRES/Fourier time...
  3.455 ms (635 allocations: 304.08 KiB)


In [69]:
println("GMRES/Fourier time, prealloc...")

gmres = IterativeSolvers.gmres_iterable!(x_approx, M_op, b, Pr=PF_op, tol=2e-3, restart=10, maxiter=100, initially_zero=true)

@btime begin
    fill!(x_approx, 0)
    Langevin.Preconditioners.reset_gmres_iterable!(gmres, x_approx, M_op, b, tol=2e-3, initially_zero=true)
    Langevin.Preconditioners.run_gmres_iterable!(gmres)
end
;

GMRES/Fourier time, prealloc...
  3.333 ms (618 allocations: 31.81 KiB)


## For reference, cost of raw FFT and raw precond ops

In [42]:
using BenchmarkTools

L = holstein.Lτ
(N1, N2, N3) = holstein.lattice.dims
N = N1*N2*N3

s1 = randn(ComplexF64, (L, N1, N2, N3))
s2 = zeros(ComplexF64, (L, N1, N2, N3))

println("Timing FFTs")
@btime for i=1:22
    mul!(s2, PF_op.plan, s1)
end
;

Timing FFTs
  181.615 μs (0 allocations: 0 bytes)


In [41]:
r1 = randn(L*N)
r2 = zeros(L*N)

println("Timing preconditioner op")
@btime for i=1:11
    ldiv!(r2, PF_op, r1)
end

Timing preconditioner op
  399.292 μs (0 allocations: 0 bytes)


In [43]:
println("Timing matmul")
@btime for i=1:11
    mul!(r2, M_op, r1)
end

Timing matmul
  34.745 μs (0 allocations: 0 bytes)


# Profiling

For some reason, it appears that the resulting flamegraphs are not entirely trustworthy.

In [6]:
using Profile
Profile.clear()  # in case we have any previous profiling data

@profile begin
    for i = 1:100
        fill!(x_approx, 0)
        IterativeSolvers.bicgstabl!(x_approx, M_op, b, 1, Pl=PF_op, tol=1e-2, max_mv_products=100, initial_zero=true)
    end
end

In [None]:
Serialization.serialize("serialized_profile.dat", Profile.retrieve())

In [19]:
Profile.print()

In [None]:
r = Serialization.deserialize("serialized_profile.dat")
using ProfileView
ProfileView.view(r[1], lidict=r[2])
ProfileView.svgwrite("flamegraph_gmres.svg",r[1],r[2])