In [78]:
using LinearAlgebra
using Plots
using SparseArrays
using CUDA, CUDA.CUBLAS, CUDA.CUSOLVER, CUDA.CUSPARSE
using BenchmarkTools

In [2]:
# Functions 
# System Equation Linear Sparse
function system_matrix_sparse(systemDimension::Integer, matrixA::AbstractMatrix)
    vectorDiagonalMain = 3 * ones(systemDimension)
    vectorDiagonalUpperLower = -1 * ones(systemDimension - 1)
    matrixA .= convert(Matrix, Tridiagonal(vectorDiagonalUpperLower, vectorDiagonalMain, vectorDiagonalUpperLower))
    for i = 1:systemDimension
        if matrixA[i, systemDimension-i+1] == 0.0
            matrixA[i, systemDimension-i+1] = 0.5
        end
    end
    return nothing
end
# 
function vector_independent_term(systemDimension::Integer, vectorB::AbstractVector)
    vectorB[1] = 2.5
    vectorB[systemDimension] = 2.5
    position::Integer = floor(systemDimension / 2)
    for i = 2:systemDimension-1
        if i == position || i == position + 1
            vectorB[i] = 1.0
        else
            vectorB[i] = 1.5
        end
    end
    return nothing
end

vector_independent_term (generic function with 1 method)

In [60]:
N = 2048
tA = Matrix{Float64}(undef, N, N)
b = Vector{Float64}(undef, N)
x_0 = zeros(N);
system_matrix_sparse(N, tA)
vector_independent_term(N, b);
A = sparse(tA)
modulo = 8
precondicionador = "SOR"

"SOR"

In [22]:
function leastsquares(H, r)
    r′ = zeros(size(H)[1])
    r′[1] = norm(r)
    x = H \ r′
end

function reiniciarvariables(x, A, b)
    k = 1
    x₀ = x
    r = b - A*x₀
    H = zeros(2,1)
    q = [r / norm(r)]
    return x₀, r, q, k, H
end

function precondition(name, A, b)
    if name == "Jacobi"
        M_jacobi = Diagonal(A)
        return [M_jacobi]

    elseif name == "Gauss-Seidel"
        D = Diagonal(A)
        U = triu(A,1) 
        L = tril(A,-1)
        M_gauss_seidel_1 = I+(L*inv(D))
        M_gauss_seidel_2 = D+U
        return [M_gauss_seidel_1, M_gauss_seidel_2]

    elseif name == "SOR"
        α = 1.8
        D = Diagonal(A)
        U = triu(A,1) 
        L = tril(A,-1)
        M_sor_1 = I+(α*L*inv(D))
        M_sor_2 = D+α*U
        return [M_sor_1, M_sor_2]
    end
end

function reiniciarvariablesprecondicionado(x, A, b, Ms::Vector)
    k = 1
    x₀ = x
    H = zeros(2,1)

    residual₀ = b - A*x₀
    [residual₀ = M\residual₀ for M ∈ Ms]
    q = [residual₀ / norm(residual₀)]
    return x₀, residual₀, q, k, H
end

reiniciarvariablesprecondicionado (generic function with 1 method)

In [83]:
function parallelgmresreiniciado(A::SparseMatrixCSC{Float64, Int64}, b::Vector{Float64}, ϵ = 1e-5)
    x₀ = zeros(length(b))
    residual₀ = b - A*x₀
    q = [residual₀ / norm(residual₀)]
    normres_gmresrei = []
    
    k = 1
    x = x₀
    H = zeros(2,1)
    residual = residual₀
    counter = 0
    
    d_A = CuSparseMatrixCSR{Float64}(A)
    d_b = CuArray{Float64}(b)
    d_x₀ = CuArray(x₀)
    d_x = CuArray(x)    

    while norm(residual) > ϵ
        y = A*q[k]
        for j ∈ 1:k
            H[j,k] = q[j]' * y
            y -= H[j,k]*q[j]
        end
        H[k+1,k] = norm(y)
        push!(q, y/H[k+1,k])
        H = vcat(H, zeros(1, size(H)[2]))
        H = hcat(H, zeros(size(H)[1], 1))
        k += 1
       if k % modulo == 0 
            c = leastsquares(H, residual₀)
            Q = hcat(q...)

            dQ = CuArray(Q)
            dc = CuArray(c)
            
            d_x = dQ*dc + d_x₀
            residual = Array(d_A*d_x - d_b)
            x = Array(d_x)
            x₀, residual₀, q, k, H = reiniciarvariables(x, A, b)
            d_x₀ = CuArray(x₀)
            counter+=1
        end
        normres_gmresrei = [normres_gmresrei;norm(residual₀)]
    end
    return normres_gmresrei 
end

parallelgmresreiniciado (generic function with 4 methods)

In [84]:
parallelgmresreiniciado(A,b)

14-element Vector{Any}:
 67.9227502387823
 67.9227502387823
 67.9227502387823
 67.9227502387823
 67.9227502387823
 67.9227502387823
  0.0049500573704276156
  0.0049500573704276156
  0.0049500573704276156
  0.0049500573704276156
  0.0049500573704276156
  0.0049500573704276156
  0.0049500573704276156
  3.2239240141621626e-6

In [86]:
function parallelgmresprecondicionadoreiniciado(A::AbstractMatrix, b::Vector{Float64}, precondition_name::String, ϵ = 1e-5)
    x₀ = zeros(length(b))
    Ms = precondition(precondition_name, A, b)
    residual₀ = b - A*x₀
    [residual₀ = M\residual₀ for M ∈ Ms]
    q = [residual₀ / norm(residual₀)]
    normres_gmresreipre = []

    k = 1
    x = x₀
    H = zeros(2,1)
    residual = residual₀
    counter = 0

    d_A = CuSparseMatrixCSR{Float64}(A)
    d_b = CuArray{Float64}(b)
    d_x₀ = CuArray(x₀)
    d_x = CuArray(x)  

    while norm(residual) > ϵ
        ω = A*q[k]
        [ω = M\ω for M ∈ Ms]
        for j ∈ 1:k
            H[j,k] = q[j]' * ω
            ω -= H[j,k]*q[j]
        end
        H[k+1,k] = norm(ω)
        push!(q, ω/H[k+1,k])
        H = vcat(H, zeros(1, size(H)[2]))
        H = hcat(H, zeros(size(H)[1], 1))
        k += 1
        if k % modulo == 0
            c = leastsquares(H, residual₀)
            Q = hcat(q...)

            dQ = CuArray(Q)
            dc = CuArray(c)
            
            d_x = dQ*dc + d_x₀
            residual = Array(d_A*d_x - d_b)
            [residual = M\residual for M ∈ Ms]
            x = Array(d_x)
            x₀, residual₀, q, k, H = reiniciarvariablesprecondicionado(x, A, b, Ms)
            d_x₀ = CuArray(x₀)
        end
        normres_gmresreipre = [normres_gmresreipre;norm(residual₀)]
        counter+=1
    end
    return normres_gmresreipre
end

parallelgmresprecondicionadoreiniciado (generic function with 4 methods)

In [101]:
parallelgmresprecondicionadoreiniciado(A,b,precondicionador)

14-element Vector{Any}:
 84.93436786819936
 84.93436786819936
 84.93436786819936
 84.93436786819936
 84.93436786819936
 84.93436786819936
  0.0021397895448919224
  0.0021397895448919224
  0.0021397895448919224
  0.0021397895448919224
  0.0021397895448919224
  0.0021397895448919224
  0.0021397895448919224
  1.1325359608805502e-6

In [98]:
graphGmres = Plots.scatter(normres_gmres, markersize=4, label="Plano", c="red", leg=true)
Plots.scatter!(normres_gmrespre, markersize=4, label="Precondicionado", c="yellow")
Plots.scatter!(normres_gmresrei, markersize=4, label="Reiniciado", c="blue")
Plots.scatter!(normres_gmresprerei, markersize=4, label="Precondicionado Reiniciado", c="green")
plot!(xaxis=("iteraciones") , yaxis = ("residuales", :log))
plot!(title="Convergencia del Método del Residuo Mínimo |Generalizado")
savefig(graphGmres, "convergenciaGMRES")

"/home/edwin/Documents/WorkSpaces/pesadilla/convergenciaGMRES.png"

In [102]:
normres_gmrespreJ = gmresprecondicionado(A,b,"Jacobi")
normres_gmrespreS = gmresprecondicionado(A,b,"SOR")
graphGmresPre = Plots.scatter(normres_gmrespreJ, markersize=4, label="Jacobi", c="red", leg=true)
Plots.scatter!(normres_gmrespreS, markersize=4, label="SOR", c="blue")
plot!(xaxis=("iteraciones") , yaxis = ("residuales", :log))
plot!(title="Convergencia del Método del Residuo Mínimo |Generalizado")
savefig(graphGmresPre, "convergenciaGMRESPre")

"/home/edwin/Documents/WorkSpaces/pesadilla/convergenciaGMRESPre.png"

In [141]:
N = 128
tA = Matrix{Float64}(undef, N, N)
b = Vector{Float64}(undef, N)
x_0 = zeros(N);
system_matrix_sparse(N, tA)
vector_independent_term(N, b);
A = sparse(tA);

In [146]:
BenchmarkTools.DEFAULT_PARAMETERS.samples = 20
Bgmres = @benchmark gmres(A,b)
BgmresRei = @benchmark gmresreiniciado(A, b)
BgmresPre = @benchmark gmresprecondicionado(A, b, "Gauss-Seidel");
BgmresPreRei = @benchmark gmresprecondicionadoreiniciado(A, b, "Gauss-Seidel");

In [147]:
display(Bgmres)
display(BgmresRei)
display(BgmresPre)
display(BgmresPreRei)

BenchmarkTools.Trial: 20 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m121.905 μs[22m[39m … [35m239.562 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m145.076 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m159.926 μs[22m[39m ± [32m 38.712 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m█[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [34m [39m[39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m▅[39m▅[39m▁

BenchmarkTools.Trial: 20 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m68.979 μs[22m[39m … [35m160.227 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m71.072 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m77.043 μs[22m[39m ± [32m 20.270 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m█[34m▁[39m[39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[34m█[39m[39m▇[39m▆[39

BenchmarkTools.Trial: 20 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m122.449 μs[22m[39m … [35m284.284 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m125.679 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m134.229 μs[22m[39m ± [32m 35.558 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m█[34m▄[39m[39m▁[39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[34m█[39m[39m█

BenchmarkTools.Trial: 20 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m111.263 μs[22m[39m … [35m268.781 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m113.576 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m123.312 μs[22m[39m ± [32m 34.927 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m█[34m▁[39m[39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[34m█[39m[39m▅