From ac3c30c26d7937be7589a240113962a0b95345cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Fri, 10 May 2024 13:45:09 +0200 Subject: [PATCH 01/44] Test code for parallel assembly --- test/parallel_testtools.jl | 137 ++++++++++++++++++++++++++++++++++++ test/test_parallel.jl | 138 +++++++++++++++++++++++++++++++++++++ 2 files changed, 275 insertions(+) create mode 100644 test/parallel_testtools.jl create mode 100644 test/test_parallel.jl diff --git a/test/parallel_testtools.jl b/test/parallel_testtools.jl new file mode 100644 index 0000000..2a92521 --- /dev/null +++ b/test/parallel_testtools.jl @@ -0,0 +1,137 @@ +using ChunkSplitters +# Methods to test parallel assembly +# Will eventually become part of the package. + +""" + $(SIGNATURES) + +Return colored partitioing of grid made up by `X` and `Y` for work with `max(nt,4)` threads +as a vector `p` of a vector pairs of index ranges such that `p[i]` containes partions +of color i which can be assembled independently. + +The current algorithm +""" +function part2d(X,Y, nt) + nt=max(4,nt) + XP=collect(chunks(1:length(X)-1,n=nt)) + YP=collect(chunks(1:length(Y)-1,n=nt)) + partitions = [Tuple{StepRange{Int64}, StepRange{Int64}}[] for i = 1:nt] + ipart=1 + col=1 + for jp=1:nt + for ip=1:nt + push!(partitions[col], (XP[ip], YP[jp])) + col=(col -1 +1 )%nt+1 + end + col=(col -1 +2)%nt+1 + end + partitions +end + + +function showgrid(Makie, ColorSchemes, X,Y,nt) + f = Makie.Figure() + ax = Makie.Axis(f[1, 1]; aspect = 1) + p=part2d(X,Y,nt) + ncol=length(p) + @show sum(length,p), ncol + colors=get(ColorSchemes.rainbow,collect(1:ncol)/ncol) + poly=Vector{Makie.Point2f}(undef,4) + for icol = 1:ncol + for (xp, yp) in p[icol] + for j in yp + for i in xp + poly[1]=Makie.Point2f(X[i], Y[j]) + poly[2]=Makie.Point2f(X[i + 1], Y[j]) + poly[3]=Makie.Point2f(X[i + 1], Y[j + 1]) + poly[4]=Makie.Point2f(X[i], Y[j + 1]) + Makie.poly!(copy(poly),color = colors[icol]) + end + end + end + end + f +end + + +""" + $(SIGNATURES) + +Assemble edge for finite volume laplacian. +Used by [`partassemble!`](@ref). +""" +function assembleedge!(A,v,k,l) + A[k,k]+=v + A[k,l]-=v + A[l,k]-=v + A[l,l]+=v +end + +""" + $(SIGNATURES) + +Assemble finite volume Laplacian + diagnonal term +on grid cell `i,j`. +Used by [`partassemble!`](@ref). +""" +function assemblecell!(A,lindexes,X,Y,i,j,d) + hx=X[i+1]-X[i] + hy=Y[j+1]-Y[j] + ij00=lindexes[i,j] + ij10=lindexes[i+1,j] + ij11=lindexes[i+1,j+1] + ij01=lindexes[i,j+1] + + assembleedge!(A,0.5*hx/hy,ij00,ij01) + assembleedge!(A,0.5*hx/hy,ij10,ij11) + assembleedge!(A,0.5*hy/hx,ij00,ij10) + assembleedge!(A,0.5*hy/hx,ij01,ij11) + v=0.25*hx*hy + A[ij00,ij00]+=v*d + A[ij01,ij01]+=v*d + A[ij10,ij10]+=v*d + A[ij11,ij11]+=v*d +end + +""" + $(SIGNATURES) + +Assemble finite volume Laplacian + diagnonal term +on grid cells in partition described by ranges xp,yp. +Used by [`partassemble!`](@ref). +""" +function assemblepartition!(A,lindexes,X,Y,xp,yp,d) + for j in yp + for i in xp + assemblecell!(A,lindexes,X,Y,i,j,d) + end + end +end + +""" + partassemble!(A,N,np=1;xrange=(0,1),yrange=(0,1), d=0.1) + +Partitioned, cellwise, multithreaded assembly of finite difference matrix for +` -Δu + d*u=f` with homogeneous Neumann bc on grid set up by coordinate vectors +`X` and `Y` partitioned for work with `nt` threads +Does not work during structure setup. +""" +function partassemble!(A,X,Y,nt=1;d=0.1) + Nx=length(X) + Ny=length(Y) + size(A,1)==Nx*Ny || error("incompatible size of A") + size(A,2)==Nx*Ny || error("incompatible size of A") + + lindexes=LinearIndices((1:Nx,1:Ny)) + if nt==1 + assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d) + else + p=part2d(X,Y,nt) + for icol=1:length(p) + Threads.@threads for (xp, yp) in p[icol] + assemblepartition!(A,lindexes,X,Y,xp,yp,d) + end + end + end + flush!(A) +end diff --git a/test/test_parallel.jl b/test/test_parallel.jl new file mode 100644 index 0000000..1fe3f1d --- /dev/null +++ b/test/test_parallel.jl @@ -0,0 +1,138 @@ +using ExtendableSparse,SparseArrays +using DocStringExtensions +using BenchmarkTools +using Test + +include("parallel_testtools.jl") + +""" + test_correctness_update(N) + +Test correctness of parallel assembly on NxN grid during +update phase, assuming that the structure has been assembled. +""" +function test_correctness_update(N) + X=1:N + Y=1:N + A=ExtendableSparseMatrix(N^2,N^2) + allnp=[4,5,6,7,8] + + # Assembele without partitioning + # this gives the "base truth" to compare with + partassemble!(A,X,Y) + + # Save the nonzeros + nz=copy(nonzeros(A)) + for np in allnp + # Reset the nonzeros, keeping the structure intact + nonzeros(A).=0 + # Parallel assembly whith np threads + partassemble!(A,X,Y, np) + @test nonzeros(A)≈nz + end +end + +""" + test_correctness_build(N) + +Test correctness of parallel assembly on NxN grid during +build phase, assuming that no structure has been assembled. +""" +function test_correctness_build(N) + X=1:N + Y=1:N + allnp=[4,5,6,7,8] + # Get the "ground truth" + A=ExtendableSparseMatrix(N^2,N^2) + partassemble!(A,X,Y) + nz=copy(nonzeros(A)) + for np in allnp + # Make a new matrix and assemble parallel. + # this should result in the same nonzeros + A=ExtendableSparseMatrix(N^2,N^2) + partassemble!(A,X,Y, np) + @test nonzeros(A)≈nz + end +end + + +@testset "update correctness" begin + test_correctness_update(50) + test_correctness_update(100) + test_correctness_update(rand(30:200)) +end + +@testset "build correctness" begin + test_correctness_build(50) + test_correctness_build(100) + test_correctness_build(rand(30:200)) +end + +""" + speedup_update(N) + +Benchmark parallel speedup of update phase of parallel assembly on NxN grid. +Check for correctness as well. +""" +function speedup_update(N; allnp=[4,5,6,7,8,9,10]) + X=1:N + Y=1:N + A=ExtendableSparseMatrix(N^2,N^2) + partassemble!(A,X,Y) + nz=copy(nonzeros(A)) + # Get the base timing + # During setup, set matrix entries to zero while keeping the structure + t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(nonzeros($A).=0) + result=[] + for np in allnp + # Get the parallel timing + # During setup, set matrix entries to zero while keeping the structure + t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(nonzeros($A).=0) + @assert nonzeros(A)≈nz + push!(result,(np,round(t0/t,digits=2))) + end + result +end + +""" + reset!(A) + +Reset ExtenableSparseMatrix into state similar to that after creation. +""" +function reset!(A) + A.cscmatrix=spzeros(size(A)...) + A.lnkmatrix=nothing +end + +""" + speedup_build(N) + +Benchmark parallel speedup of structure build phase of parallel assembly on NxN grid. +Check for correctness as well. + +Works in the moment with locking. +""" +function speedup_build(N; allnp=[4,5,6,7,8,9,10]) + X=1:N + Y=1:N + A=ExtendableSparseMatrix(N^2,N^2) + partassemble!(A,X,Y) + nz=copy(nonzeros(A)) + reset!(A) + partassemble!(A,X,Y) + @assert nonzeros(A)≈(nz) + + # Get the base timing + # During setup, reset matrix to empty state. + t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A)) + + result=[] + for np in allnp + # Get the parallel timing + # During setup, reset matrix to empty state. + t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(reset!($A)) + @assert nonzeros(A)≈nz + push!(result,(np,round(t0/t,digits=2))) + end + result +end From 31501b2da02cdba2ae63e4ef3e3096877a4fa363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Fri, 10 May 2024 13:45:25 +0200 Subject: [PATCH 02/44] Allow for parallel assembly via locking --- src/matrix/extendable.jl | 51 +++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl index cba26b3..37cc015 100644 --- a/src/matrix/extendable.jl +++ b/src/matrix/extendable.jl @@ -18,6 +18,8 @@ mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractSparseMatrix """ lnkmatrix::Union{SparseMatrixLNK{Tv, Ti}, Nothing} + lock::Base.ReentrantLock + """ Pattern hash """ @@ -36,7 +38,7 @@ Create empty ExtendableSparseMatrix. This is equivalent to `spzeros(m,n)` for """ function ExtendableSparseMatrix{Tv, Ti}(m, n) where {Tv, Ti <: Integer} - ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0) + ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing,Base.ReentrantLock(), 0) end function ExtendableSparseMatrix(valuetype::Type{Tv}, @@ -59,7 +61,7 @@ $(SIGNATURES) """ function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} - return ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc)) + return ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc)) end """ @@ -169,10 +171,15 @@ function updateindex!(ext::ExtendableSparseMatrix{Tv, Ti}, if k > 0 ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) else - if ext.lnkmatrix == nothing - ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) + lock(ext.lock) + try + if ext.lnkmatrix == nothing + ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) + end + updateindex!(ext.lnkmatrix, op, v, i, j) + finally + unlock(ext.lock) end - updateindex!(ext.lnkmatrix, op, v, i, j) end ext end @@ -191,10 +198,15 @@ function rawupdateindex!(ext::ExtendableSparseMatrix{Tv, Ti}, if k > 0 ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) else - if ext.lnkmatrix == nothing - ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) + lock(ext.lock) + try + if ext.lnkmatrix == nothing + ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) + end + rawupdateindex!(ext.lnkmatrix, op, v, i, j) + finally + unlock(ext.lock) end - rawupdateindex!(ext.lnkmatrix, op, v, i, j) end ext end @@ -213,10 +225,15 @@ function Base.setindex!(ext::ExtendableSparseMatrix{Tv, Ti}, if k > 0 ext.cscmatrix.nzval[k] = v else - if ext.lnkmatrix == nothing - ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) + lock(ext.lock) + try + if ext.lnkmatrix == nothing + ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) + end + ext.lnkmatrix[i, j] = v + finally + unlock(ext.lock) end - ext.lnkmatrix[i, j] = v end end @@ -235,7 +252,13 @@ function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti}, elseif ext.lnkmatrix == nothing return zero(Tv) else - return ext.lnkmatrix[i, j] + v=zero(Tv) + lock(ext.lock) + try + v=ext.lnkmatrix[i, j] + finally + unlock(ext.lock) + end end end @@ -557,9 +580,9 @@ $(SIGNATURES) """ function Base.copy(S::ExtendableSparseMatrix) if isnothing(S.lnkmatrix) - ExtendableSparseMatrix(copy(S.cscmatrix), nothing, S.phash) + ExtendableSparseMatrix(copy(S.cscmatrix), nothing, Base.ReentrantLock(),S.phash) else - ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash) + ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), Base.ReentrantLock(), S.phash) end end From b2c141e4dae1f2e0dc315dd484da05d718522818 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Tue, 20 Feb 2024 16:54:29 +0100 Subject: [PATCH 03/44] t2 --- src/ExtendableSparse.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 700bbda..36dca95 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -16,6 +16,7 @@ if USE_GPL_LIBS using SuiteSparse end +@info "test2" using DocStringExtensions From 751cf6cf2828a740d48449d56023428113609a68 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Tue, 20 Feb 2024 17:48:30 +0100 Subject: [PATCH 04/44] add ExtendableSparseParallel --- Project.toml | 2 + src/ExtendableSparse.jl | 13 +- .../ExtendableSparseParallel.jl | 258 ++++++ .../preparatory.jl | 427 ++++++++++ .../struct_flush.jl | 263 ++++++ .../supersparse.jl | 788 ++++++++++++++++++ 6 files changed, 1749 insertions(+), 2 deletions(-) create mode 100644 src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl create mode 100644 src/matrix/ExtendableSparseMatrixParallel/preparatory.jl create mode 100644 src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl create mode 100644 src/matrix/ExtendableSparseMatrixParallel/supersparse.jl diff --git a/Project.toml b/Project.toml index 89ce6e1..46d6e61 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,8 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" +ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8" [weakdeps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 36dca95..372df82 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -4,6 +4,10 @@ using LinearAlgebra using Sparspak using ILUZero +using Metis +using Base.Threads +using ExtendableGrids + if !isdefined(Base, :get_extension) using Requires end @@ -16,8 +20,6 @@ if USE_GPL_LIBS using SuiteSparse end -@info "test2" - using DocStringExtensions import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros @@ -31,6 +33,13 @@ export SparseMatrixLNK, export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet + +include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl") + +export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK +export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light + + include("factorizations/factorizations.jl") export JacobiPreconditioner, diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl new file mode 100644 index 0000000..68dace8 --- /dev/null +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -0,0 +1,258 @@ +include("supersparse.jl") +include("preparatory.jl") +#include("prep_time.jl") + +mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti} + """ + Final matrix data + """ + cscmatrix::SparseMatrixCSC{Tv, Ti} + + """ + Linked list structure holding data of extension + """ + lnkmatrices::Vector{SuperSparseMatrixLNK{Tv, Ti}} + + grid::ExtendableGrid + + nnts::Vector{Ti} + + sortednodesperthread::Matrix{Ti} + + old_noderegions::Matrix{Ti} + + cellsforpart::Vector{Vector{Ti}} + + globalindices::Vector{Vector{Ti}} + + new_indices::Vector{Ti} + + rev_new_indices::Vector{Ti} + + start::Vector{Ti} + + cellparts::Vector{Ti} + + nt::Ti + + depth::Ti + + +end + + + +function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) where {Tv, Ti <: Integer} + grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) + csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid)) + lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt] + ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth) +end + + + +function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer} + if known_that_unknown + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + return + end + + if updatentryCSC2!(A.cscmatrix, i, j, v) + else + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + end +end + + +#= +function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer} + if known_that_unknown + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + return + end + + if updatentryCSC2!(A.cscmatrix, i, j, v) + else + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + end +end +=# + + +""" +`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=true) where {Tv, Ti <: Integer}` + +A[i,j] += v, using any partition. +If the partition should be specified (for parallel use), use +`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=true) where {Tv, Ti <: Integer}`. +""" +function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer} + if known_that_unknown + level, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]]) + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + return + end + + if updatentryCSC2!(A.cscmatrix, i, j, v) + else + level, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]]) + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + end +end + +#--------------------------------- + + +function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + return + else + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + updateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j, + tid) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + return + else + updateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + rawupdateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j, + tid) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + rawupdateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +function Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + i::Integer, + j::Integer) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + return ext.cscmatrix.nzval[k] + end + + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + ext.lnkmatrices[tid][i, ext.sortednodesperthread[tid, j]] + +end + +function Base.setindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + v::Union{Number,AbstractVecOrMat}, + i::Integer, + j::Integer) where {Tv, Ti} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = v + else + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + #@info typeof(tid), typeof(j) + jj = ext.sortednodesperthread[tid, j] + ext.lnkmatrices[tid][i, jj] = v + end +end + + + +#------------------------------------ + +function reset!(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer} + A.cscmatrix = spzeros(Tv, Ti, num_nodes(A.grid), num_nodes(A.grid)) + A.lnkmatrices = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt] +end + +function nnz_flush(ext::ExtendableSparseMatrixParallel) + flush!(ext) + return nnz(ext.cscmatrix) +end + +function nnz_noflush(ext::ExtendableSparseMatrixParallel) + return nnz(ext.cscmatrix), sum([ext.lnkmatrices[i].nnz for i=1:ext.nt]) +end + +function matrixindextype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer} + Ti +end + +function matrixvaluetype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer} + Tv +end + + + +function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrixParallel) + #flush!(ext) + xnnzCSC, xnnzLNK = nnz_noflush(ext) + m, n = size(ext) + print(io, + m, + "×", + n, + " ", + typeof(ext), + " with ", + xnnzCSC, + " stored ", + xnnzCSC == 1 ? "entry" : "entries", + " in CSC and ", + xnnzLNK, + " stored ", + xnnzLNK == 1 ? "entry" : "entries", + " in LNK. CSC:") + + if !haskey(io, :compact) + io = IOContext(io, :compact => true) + end + + if !(m == 0 || n == 0 || xnnzCSC == 0) + print(io, ":\n") + Base.print_array(IOContext(io), ext.cscmatrix) + end +end + +Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n) + +include("struct_flush.jl") diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl new file mode 100644 index 0000000..e14a066 --- /dev/null +++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl @@ -0,0 +1,427 @@ +""" +`function preparatory_multi_ps_less_reverse(nm, nt, depth)` + +`nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid). +`nt` is the number of threads. +`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... +To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here. +""" +function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0) + grid = getgrid(nm; x0, x1) + + if sequential + (allcells, start, cellparts) = grid_to_graph_ps_multi!(grid, nt, depth)#) + else + (allcells, start, cellparts) = grid_to_graph_ps_multi_par!(grid, nt, depth) + end + + (nnts, s, onr, gi, gc, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush( + cellparts, allcells, start, num_nodes(grid), Ti, nt + ) + + cfp = bettercellsforpart(cellparts, depth*nt+1) + return grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts +end + + +""" +`function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)` + +After the cellregions (partitioning of the grid) of the grid have been computed, other things have to be computed, such as `sortednodesperthread` a depth+1 x num_nodes matrix, here `sortednodesperthreads[i,j]` is the point at which the j-th node appears in the i-th level matrix in the corresponding submatrix. +`cellregs` contains the partiton for each cell. +Furthermore, `nnts` (number of nodes of the threads) is computed, which contain for each thread the number of nodes that are contained in the cells of that thread. +`allcells` and `start` together behave like the rowval and colptr arrays of a CSC matrix, such that `allcells[start[j]:start[j+1]-1]` are all cells that contain the j-th node. +`nn` is the number of nodes in the grid. +`Ti` is the type (Int64,...) of the elements in the created arrays. +`nt` is the number of threads. +""" +function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt) + + num_matrices = maximum(cellregs) + depth = Int(floor((num_matrices-1)/nt)) + + #loop over each node, get the cellregion of the cell (the one not in the separator) write the position of that node inside the cellregions sorted ranking into a long vector + #nnts = [zeros(Ti, nt+1) for i=1:depth+1] + nnts = zeros(Ti, nt) + #noderegs_max_tmp = 0 + old_noderegions = zeros(Ti, (depth+1, nn)) + + # Count nodes per thread: + tmp = zeros(depth+1) + for j=1:nn + cells = @view allcells[start[j]:start[j+1]-1] + sortedcellregs = unique(sort(cellregs[cells])) + #tmp = [] + tmpctr = 1 + for cr in sortedcellregs + crmod = (cr-1)%nt+1 + level = Int(ceil(cr/nt)) + #nnts[crmod] += 1 + old_noderegions[level,j] = crmod + if !(crmod in tmp[1:tmpctr-1]) + nnts[crmod] += 1 + #sortednodesperthread[crmod,j] = nnts[crmod] #nnts[i][cr] + #push!(tmp, crmod) + tmp[tmpctr] = crmod + tmpctr += 1 + end + end + end + + # Reorder inidices to receive a block structure: + # Taking the original matrix [a_ij] and mapping each i and j to new_indices[i] and new_indices[j], gives a block structure + # the reverse is also defined rev_new_indices[new_indices[k]] = k + # From now on we will only use this new ordering + counter_for_reorder = zeros(Ti, depth*nt+1) + for j=1:nn + level, reg = last_nz(old_noderegions[:, j]) + counter_for_reorder[(level-1)*nt + reg] += 1 #(reg-1)*depth + level] += 1 + end + + starts = vcat([0], cumsum(counter_for_reorder)) + counter_for_reorder2 = zeros(Ti, depth*nt+1) + new_indices = Vector{Ti}(undef, nn) + rev_new_indices = Vector{Ti}(undef, nn) + origin = Vector{Ti}(undef, nn) + for j=1:nn + level, reg = last_nz(old_noderegions[:, j]) + counter_for_reorder2[(level-1)*nt + reg] += 1 + origin[j] = reg + new_indices[j] = starts[(level-1)*nt + reg]+counter_for_reorder2[(level-1)*nt + reg] + rev_new_indices[new_indices[j]] = j + end + starts .+= 1 + + # Build sortednodesperthread and globalindices array: + # They are inverses of each other: globalindices[tid][sortednodeperthread[tid][j]] = j + # Note that j has to be a `new index` + + sortednodesperthread = zeros(Ti, (nt, nn)) #vvcons(Ti, nnts) + globalindices = vvcons(Ti, nnts) + gictrs = zeros(Ti, nt) + + for nj=1:nn + oj = rev_new_indices[nj] + cells = @view allcells[start[oj]:start[oj+1]-1] + sortedcellregs = unique(sort(cellregs[cells])) + #tmp = [] + tmpctr = 1 + for cr in sortedcellregs + crmod = (cr-1)%nt+1 + level = Int(ceil(cr/nt)) + if !(crmod in tmp[1:tmpctr-1]) + gictrs[crmod] += 1 # , level] += 1 + sortednodesperthread[crmod,nj] = gictrs[crmod] + globalindices[crmod][gictrs[crmod]] = nj + #push!(tmp, crmod) + tmp[tmpctr] = crmod + tmpctr += 1 + end + end + end + + nnts, sortednodesperthread, old_noderegions, globalindices, gictrs, new_indices, rev_new_indices, starts +end + + + + + + + + +""" +`function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes)` + +This function partitons the separator, which is done if `depth`>1 (see `grid_to_graph_ps_multi!` and/or `preparatory_multi_ps`). +`cellregs` contains the regions/partitions/colors of each cell. +`nc` is the number of cells in the grid. +`ACSC` is the adjacency matrix of the graph of the (separator-) grid (vertex in graph is cell in grid, edge in graph means two cells share a node) stored as a CSC. +`nt` is the number of threads. +`level0` is the separator-partitoning level, if the (first) separator is partitioned, level0 = 1, in the next iteration, level0 = 2... +`preparatory_multi_ps` is the number of separator-cells. +""" +function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes) + sepanodes = findall(x->x==nt+1, cellregs) + + indptr = collect(1:nc+1) + indices = zeros(Int64, nc) + rowval = zeros(Int64, nc) + + indptrT = collect(1:ctr_sepanodes+1) + indicesT = zeros(Int64, ctr_sepanodes) + rowvalT = zeros(Int64, ctr_sepanodes) + + for (i,j) in enumerate(sepanodes) + indices[j] = i + indicesT[i] = j + rowval[j] = 1 + rowvalT[i] = 1 + end + + R = SparseMatrixCSC(ctr_sepanodes, nc, indptr, indices, rowval) + RT = SparseMatrixCSC(nc, ctr_sepanodes, indptrT, indicesT, rowvalT) + prod = ACSC*dropzeros(RT) + RART = dropzeros(R)*ACSC*dropzeros(RT) + + partition2 = Metis.partition(RART, nt) + cellregs2 = copy(partition2) + + ctr_sepanodes = 0 + for (i,j) in enumerate(sepanodes) + rows = RART.rowval[RART.colptr[i]:(RART.colptr[i+1]-1)] + cellregs[j] = level0*nt + cellregs2[i] + if minimum(partition2[rows]) != maximum(partition2[rows]) + cellregs[j] = (level0+1)*nt+1 + ctr_sepanodes += 1 + end + end + + RART, ctr_sepanodes +end + + + +""" +`function grid_to_graph_ps_multi!(grid, nt, depth)` + +The function assigns colors/partitons to each cell in the `grid`. First, the grid is partitoned into `nt` partitions. If `depth` > 1, the separator is partitioned again... +`grid` is a simplexgrid. +`nt` is the number of threads. +`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... +""" +function grid_to_graph_ps_multi!(grid, nt, depth) + A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) + number_cells_per_node = zeros(Int64, num_nodes(grid)) + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + number_cells_per_node[node_id] += 1 + end + end + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + for (i,id1) in enumerate(cells) + for id2 in cells[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + ACSC = SparseArrays.SparseMatrixCSC(A) + + partition = Metis.partition(ACSC, nt) + cellregs = copy(partition) + + ctr_sepanodes = 0 + for j=1:num_cells(grid) + rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] + if minimum(partition[rows]) != maximum(partition[rows]) + cellregs[j] = nt+1 + ctr_sepanodes += 1 + end + end + RART = ACSC + for level=1:depth-1 + RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes) + end + + + return allcells, start, cellregs +end + + + +function grid_to_graph_ps_multi_par!(grid, nt, depth) + time = zeros(12) + As = [ExtendableSparseMatrix{Int64, Int64}(num_cells(grid), num_cells(grid)) for tid=1:nt] + number_cells_per_node = zeros(Int64, num_nodes(grid)) + + cn = grid[CellNodes] + + for j=1:num_cells(grid) + tmp = view(cn, :, j) + for node_id in tmp + number_cells_per_node[node_id] += 1 + end + end + + + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + + for j=1:num_cells(grid) + tmp = view(cn, :, j) + for node_id in tmp + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + node_range = get_starts(num_nodes(grid), nt) + Threads.@threads for tid=1:nt + for j in node_range[tid]:node_range[tid+1]-1 + cells = @view allcells[start[j]:start[j+1]-1] + l = length(cells) + for (i,id1) in enumerate(cells) + ce = view(cells, i+1:l) + for id2 in ce + As[tid][id1,id2] = 1 + As[tid][id2,id1] = 1 + end + end + end + ExtendableSparse.flush!(As[tid]) + end + + ACSC = add_all_par!(As).cscmatrix + + #SparseArrays.SparseMatrixCSC(A)) + + + partition = Metis.partition(ACSC, nt) + cellregs = copy(partition) + + ctr_sepanodes_a = zeros(Int64, nt) + + cell_range = get_starts(num_cells(grid), nt) + Threads.@threads :static for tid=1:nt + for j in cell_range[tid]:cell_range[tid+1]-1 + rows = @view ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] + if minimum(partition[rows]) != maximum(partition[rows]) + cellregs[j] = nt+1 + ctr_sepanodes_a[tid] += 1 + end + end + end + + ctr_sepanodes = sum(ctr_sepanodes_a) + + #= + time[10] = @elapsed for j=1:num_cells(grid) + rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] + if minimum(partition[rows]) != maximum(partition[rows]) + cellregs[j] = nt+1 + ctr_sepanodes += 1 + end + end + =# + RART = ACSC + for level=1:depth-1 + RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes) + end + + + return allcells, start, cellregs +end + + +function add_all_par!(As) + nt = length(As) + depth = Int(floor(log2(nt))) + ende = nt + for level=1:depth + + @threads :static for tid=1:2^(depth-level) + #@info "$level, $tid" + start = tid+2^(depth-level) + while start <= ende + As[tid] += As[start] + start += 2^(depth-level) + end + end + ende = 2^(depth-level) + end + As[1] + +end + + +""" +`function vvcons(Ti, lengths)` + +`lengths` is a vector of integers. +The function creates a vector of zero vectors of type `Ti` of length `lengths[i]`. +""" +function vvcons(Ti, lengths) + x::Vector{Vector{Ti}} = [zeros(Ti, i) for i in lengths] + return x +end + + +""" +`function bettercellsforpart(xx, upper)` + +`xx` are the CellRegions (i.e. the color/partition of each cell). +`upper` is the number of partitions (upper=depth*nt+1). +The function returns a vector e.g. [v1, v2, v3, v4, v5]. +The element v1 would be the list of cells that are in partition 1 etc. +The function is basically a faster findall. +""" +function bettercellsforpart(xx, upper) + ctr = zeros(Int64, upper) + for x in xx + ctr[x] += 1 + end + cfp = vvcons(Int64, ctr) + ctr .= 1 + for (i,x) in enumerate(xx) + cfp[x][ctr[x]] = i + ctr[x] += 1 + end + cfp +end + +""" +`function getgrid(nm)` + +Returns a simplexgrid with a given number of nodes in each dimension. +`nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid). +""" +function getgrid(nm; x0=0.0, x1=1.0) + if length(nm) == 2 + n,m = nm + xx = collect(LinRange(x0, x1, n)) + yy = collect(LinRange(x0, x1, m)) + grid = simplexgrid(xx, yy) + else + n,m,l = nm + xx = collect(LinRange(x0, x1, n)) + yy = collect(LinRange(x0, x1, m)) + zz = collect(LinRange(x0, x1, l)) + grid = simplexgrid(xx, yy, zz) + end + grid +end + +function get_starts(n, nt) + ret = ones(Int64, nt+1) + ret[end] = n+1 + for i=nt:-1:2 + ret[i] = ret[i+1] - Int(round(ret[i+1]/i)) #Int(round(n/nt))-1 + end + ret +end + +function last_nz(x) + n = length(x) + for j=n:-1:1 + if x[j] != 0 + return (j, x[j]) + end + end +end + diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl new file mode 100644 index 0000000..c27aab0 --- /dev/null +++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl @@ -0,0 +1,263 @@ +function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=true) + + + if !do_dense + A.cscmatrix = A.cscmatrix+sparse_flush!(A; keep_zeros) + + else + if keep_zeros + A.cscmatrix = dense_flush_keepzeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices) + else + A.cscmatrix = dense_flush_removezeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices) + end + end + + A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt] + +end + +""" +`CSC_RLNK_plusequals_less3_reordered_super!` from `plusequals.jl` +""" +function sparse_flush!(A::ExtendableSparseMatrixParallel; keep_zeros=true) + + #dropzeros!( + plus_remap(A.lnkmatrices, A.cscmatrix, A.globalindices; keep_zeros) + #) + +end + + + +""" +`CSC_RLNK_si_oc_ps_dz_less_reordered` from `conversion.jl` +""" +function dense_flush_keepzeros!( + As::Vector{SuperSparseMatrixLNK{Tv, Ti}}, + onr, s, nt, rni + ) where {Tv, Ti <: Integer} + + nnz = sum([As[i].nnz for i=1:nt]) #you could also subtract the diagonal entries from shared columns, since those are definitely double + indptr = zeros(Ti, As[1].m+1) + indices = zeros(Ti, nnz) #sum(As.nnz)) + data = zeros(Float64, nnz) #sum(As.nnz)) + ctr = 1 + eqctr = 0 + tmp = zeros(Ti, size(onr)[1]) + + for nj=1:As[1].m + indptr[nj] = ctr + oj = rni[nj] + regionctr = 1 + jc = 0 + nrr = view(onr, :, oj) + tmp .= 0 + for region in nrr #nrr #[:,j] + regmod = region #(region-1)%nt+1 + if (region > 0) & !(region in tmp) + k = s[regmod, nj] + if regionctr == 1 + while k>0 + #if As[regmod].nzval[k] != 0.0 + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + else + break + end + end + + ctr += 1 + jc += 1 + #end + k = As[regmod].colptr[k] + end + else + while k>0 + #if As[regmod].nzval[k] != 0.0 + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + elseif indices[ctr-jcc] == indices[ctr-jcc+1] + data[ctr-jcc] += data[ctr-jcc+1] + eqctr += 1 + + for jccc=1:jcc + indices[ctr-jcc+jccc] = indices[ctr-jcc+jccc+1] + data[ctr-jcc+jccc] = data[ctr-jcc+jccc+1] + end + + ctr -= 1 + jc -= 1 + + break + else + break + end + end + + ctr += 1 + jc += 1 + #end + k = As[regmod].colptr[k] + end + + end + tmp[regionctr] = region + regionctr += 1 + + end + + end + + end + + #@warn ctr/nnz + + indptr[end] = ctr + resize!(indices, ctr-1) + resize!(data, ctr-1) + + + SparseArrays.SparseMatrixCSC( + As[1].m, As[1].m, indptr, indices, data + ) + +end + + +function dense_flush_removezeros!( + As::Vector{SuperSparseMatrixLNK{Tv, Ti}}, + onr, s, nt, rni + ) where {Tv, Ti <: Integer} + + nnz = sum([As[i].nnz for i=1:nt]) #you could also subtract the diagonal entries from shared columns, since those are definitely double + indptr = zeros(Ti, As[1].m+1) + indices = zeros(Ti, nnz) #sum(As.nnz)) + data = zeros(Float64, nnz) #sum(As.nnz)) + ctr = 1 + eqctr = 0 + tmp = zeros(Ti, size(onr)[1]) + + for nj=1:As[1].m + indptr[nj] = ctr + oj = rni[nj] + regionctr = 1 + jc = 0 + nrr = view(onr, :, oj) + tmp .= 0 + for region in nrr #nrr #[:,j] + regmod = region #(region-1)%nt+1 + if (region > 0) & !(region in tmp) + k = s[regmod, nj] + if regionctr == 1 + while k>0 + if As[regmod].nzval[k] != 0.0 + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + else + break + end + end + + ctr += 1 + jc += 1 + end + k = As[regmod].colptr[k] + end + else + while k>0 + if As[regmod].nzval[k] != 0.0 + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + elseif indices[ctr-jcc] == indices[ctr-jcc+1] + data[ctr-jcc] += data[ctr-jcc+1] + eqctr += 1 + + for jccc=1:jcc + indices[ctr-jcc+jccc] = indices[ctr-jcc+jccc+1] + data[ctr-jcc+jccc] = data[ctr-jcc+jccc+1] + end + + ctr -= 1 + jc -= 1 + + break + else + break + end + end + + ctr += 1 + jc += 1 + end + k = As[regmod].colptr[k] + end + + end + tmp[regionctr] = region + regionctr += 1 + + end + + end + + end + + #@warn ctr/nnz + + indptr[end] = ctr + resize!(indices, ctr-1) + resize!(data, ctr-1) + + + SparseArrays.SparseMatrixCSC( + As[1].m, As[1].m, indptr, indices, data + ) + +end + + + + + + + diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl new file mode 100644 index 0000000..ae52f60 --- /dev/null +++ b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl @@ -0,0 +1,788 @@ + +using SparseArrays +using ExtendableSparse + +mutable struct SuperSparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti} + """ + Number of rows + """ + m::Ti + + """ + Number of columns + """ + n::Ti + + """ + Number of nonzeros + """ + nnz::Ti + + """ + Length of arrays + """ + nentries::Ti + + """ + Linked list of column entries. Initial length is n, + it grows with each new entry. + + colptr[index] contains the next + index in the list or zero, in the later case terminating the list which + starts at index 1<=j<=n for each column j. + """ + colptr::Vector{Ti} + + """ + Row numbers. For each index it contains the zero (initial state) + or the row numbers corresponding to the column entry list in colptr. + + Initial length is n, + it grows with each new entry. + """ + rowval::Vector{Ti} + + """ + Nonzero entry values correspondin to each pair + (colptr[index],rowval[index]) + + Initial length is n, it grows with each new entry. + """ + nzval::Vector{Tv} + + + collnk::Vector{Ti} + + colctr::Ti +end + + +function SparseArrays.SparseMatrixCSC(A::SuperSparseMatrixLNK{Tv, Ti})::SparseArrays.SparseMatrixCSC where {Tv, Ti <: Integer} + SparseArrays.SparseMatrixCSC(SparseMatrixLNK{Tv, Ti}(A.m, A.n, A.nnz, A.nentries, A.colptr, A.rowval, A.nzval)) + +end + +function SuperSparseMatrixLNK{Tv, Ti}(m, n) where {Tv, Ti <: Integer} + SuperSparseMatrixLNK{Tv, Ti}(m, n, 0, n, zeros(Ti, n), zeros(Ti, n), zeros(Tv, n), zeros(Ti, n), 0) +end + + +function findindex(lnk::SuperSparseMatrixLNK, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + k = j + k0 = j + while k > 0 + if lnk.rowval[k] == i + return k, 0 + end + k0 = k + k = lnk.colptr[k] + end + return 0, k0 +end + +""" +Return tuple containing size of the matrix. +""" +Base.size(lnk::SuperSparseMatrixLNK) = (lnk.m, lnk.n) + +""" +Return value stored for entry or zero if not found +""" +function Base.getindex(lnk::SuperSparseMatrixLNK{Tv, Ti}, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k == 0 + return zero(Tv) + else + return lnk.nzval[k] + end +end + +function addentry!(lnk::SuperSparseMatrixLNK, i, j, k, k0) + # increase number of entries + lnk.nentries += 1 + if length(lnk.nzval) < lnk.nentries + newsize = Int(ceil(5.0 * lnk.nentries / 4.0)) + resize!(lnk.nzval, newsize) + resize!(lnk.rowval, newsize) + resize!(lnk.colptr, newsize) + end + + # Append entry if not found + lnk.rowval[lnk.nentries] = i + + # Shift the end of the list + lnk.colptr[lnk.nentries] = 0 + lnk.colptr[k0] = lnk.nentries + + # Update number of nonzero entries + lnk.nnz += 1 + return lnk.nentries +end + +""" +Update value of existing entry, otherwise extend matrix if v is nonzero. +""" +function Base.setindex!(lnk::SuperSparseMatrixLNK, v, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + # Set the first column entry if it was not yet set. + if lnk.rowval[j] == 0 && !iszero(v) + lnk.colctr += 1 + lnk.collnk[lnk.colctr] = j + lnk.rowval[j] = i + lnk.nzval[j] = v + lnk.nnz += 1 + return lnk + end + + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = v + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = v + end + return lnk +end + +""" +Update element of the matrix with operation `op`. +It assumes that `op(0,0)==0`. If `v` is zero, no new +entry is created. +""" +function updateindex!(lnk::SuperSparseMatrixLNK{Tv, Ti}, op, v, i, j) where {Tv, Ti} + # Set the first column entry if it was not yet set. + if lnk.rowval[j] == 0 && !iszero(v) + lnk.colctr += 1 + lnk.collnk[lnk.colctr] = j + lnk.rowval[j] = i + lnk.nzval[j] = op(lnk.nzval[j], v) + lnk.nnz += 1 + return lnk + end + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +function rawupdateindex!(lnk::SuperSparseMatrixLNK{Tv, Ti}, op, v, i, j) where {Tv, Ti} + # Set the first column entry if it was not yet set. + if lnk.rowval[j] == 0 + lnk.colctr += 1 + lnk.collnk[lnk.colctr] = j + lnk.rowval[j] = i + lnk.nzval[j] = op(lnk.nzval[j], v) + lnk.nnz += 1 + return lnk + end + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +#= +mutable struct ColEntry{Tv, Ti <: Integer} + rowval::Ti + nzval::Tv +end + +# Comparison method for sorting +Base.isless(x::ColEntry, y::ColEntry) = (x.rowval < y.rowval) +=# + +function get_column!(col::Vector{ColEntry{Tv, Ti}}, lnk::SuperSparseMatrixLNK{Tv, Ti}, j::Ti)::Ti where {Tv, Ti <: Integer} + k = j + ctr = zero(Ti) + while k>0 + if abs(lnk.nzval[k]) > 0 + ctr += 1 + col[ctr] = ColEntry(lnk.rowval[k], lnk.nzval[k]) + end + k = lnk.colptr[k] + end + sort!(col, 1, ctr, Base.QuickSort, Base.Forward) + ctr +end + + +function remove_doubles!(col, coll) + #input_ctr = 1 + last = 1 + for j=2:coll + if col[j].rowval == col[last].rowval + col[last].nzval += col[j].nzval + else + last += 1 + if last != j + col[last] = col[j] + end + end + end + last +end + +function get_column_removezeros!(col::Vector{ColEntry{Tv, Ti}}, lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, js, tids, length)::Ti where {Tv, Ti <: Integer} + ctr = zero(Ti) + for i=1:length + tid = tids[i] + k = js[i] + #for (tid,j) in zip(tids, js) #j0:j1 + #tid = tids[j] + #k = j + while k>0 + if abs(lnks[tid].nzval[k]) > 0 + ctr += 1 + col[ctr] = ColEntry(lnks[tid].rowval[k], lnks[tid].nzval[k]) + end + k = lnks[tid].colptr[k] + end + end + + sort!(col, 1, ctr, Base.QuickSort, Base.Forward) + ctr = remove_doubles!(col, ctr) + #print_col(col, ctr) + ctr + +end + +function get_column_keepzeros!(col::Vector{ColEntry{Tv, Ti}}, lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, js, tids, length)::Ti where {Tv, Ti <: Integer} + ctr = zero(Ti) + for i=1:length + tid = tids[i] + k = js[i] + #for (tid,j) in zip(tids, js) #j0:j1 + #tid = tids[j] + #k = j + while k>0 + #if abs(lnks[tid].nzval[k]) > 0 + ctr += 1 + col[ctr] = ColEntry(lnks[tid].rowval[k], lnks[tid].nzval[k]) + #end + k = lnks[tid].colptr[k] + end + end + + sort!(col, 1, ctr, Base.QuickSort, Base.Forward) + ctr = remove_doubles!(col, ctr) + #print_col(col, ctr) + ctr + +end + +function merge_into!(rowval::Vector{Ti}, nzval::Vector{Tv}, C::SparseArrays.SparseMatrixCSC{Tv, Ti}, col::Vector{ColEntry{Tv, Ti}}, J::Ti, coll::Ti, ptr1::Ti) where {Tv, Ti <: Integer} + j_min = 1 + numshifts = 0 + j_last = 0 + last_row = 0 + + #@warn "MERGING $J" + + #rowval0 = copy(C.rowval[C.colptr[J]:C.colptr[J+1]-1]) + #endptr = C.colptr[J+1] + + for (di,i) in enumerate(C.colptr[J]:C.colptr[J+1]-1) + for j=j_min:coll + #if col[j].rowval == last_row + # #@info "!! col j rowval == last row" + #end + if col[j].rowval < C.rowval[i] #ptr1+di+numshifts] #i+numshifts] + if col[j].rowval == last_row + #@info "$(ptr1+di+numshifts) : backwards EQUALITY: " + nzval[ptr1+di+numshifts] += col[j].nzval + else + #@info "$(ptr1+di+numshifts) : Insert from col: j=$j" + #shift_e!(C.rowval, C.nzval, 1, i+numshifts, C.colptr[end]-1) + rowval[ptr1+di+numshifts] = col[j].rowval + nzval[ptr1+di+numshifts] = col[j].nzval + numshifts += 1 + #endptr += 1 + end + j_last = j + elseif col[j].rowval > C.rowval[i] #if col[j].rowval + #@info "$(ptr1+di+numshifts) : Insert from C: i=$i" + rowval[ptr1+di+numshifts] = C.rowval[i] + nzval[ptr1+di+numshifts] = C.nzval[i] + j_min = j + break + else + #@info "$(ptr1+di+numshifts) : normal EQUALITY: i=$i, j=$j" + rowval[ptr1+di+numshifts] = C.rowval[i] + nzval[ptr1+di+numshifts] = C.nzval[i]+col[j].nzval + #numshifts += 1 + j_min = j+1 + j_last = j + + if j == coll + #@info "$(ptr1+di+numshifts+1) → $(ptr1+numshifts+(C.colptr[J+1]-C.colptr[J]))" + rowval[ptr1+di+numshifts+1:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.rowval, i+1:C.colptr[J+1]-1) #C.rowval[i:C.colptr[J+1]-1] + nzval[ptr1+di+numshifts+1:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.nzval, i+1:C.colptr[J+1]-1) #C.nzval[i:C.colptr[J+1]-1] + + #@info "FINISH" + return numshifts + end + + break + end + + if j == coll + #@info "$(ptr1+di+numshifts) → $(ptr1+numshifts+(C.colptr[J+1]-C.colptr[J]))" + rowval[ptr1+di+numshifts:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.rowval, i:C.colptr[J+1]-1) #C.rowval[i:C.colptr[J+1]-1] + nzval[ptr1+di+numshifts:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.nzval, i:C.colptr[J+1]-1) #C.nzval[i:C.colptr[J+1]-1] + + #@info "FINISH" + return numshifts + end + + last_row = col[j].rowval + end + end + endptr = ptr1 + numshifts + (C.colptr[J+1]-C.colptr[J]) + last_row = 0 + numshifts_old = numshifts + numshifts = 0 + #start_ptr = endptr - 1 #C.colptr[J+1]-1 + if j_last > 0 + last_row = col[j_last].rowval + end + + if j_last != coll + for j=j_last+1:coll + if col[j].rowval != last_row + numshifts += 1 + #shift_e!(C.rowval, C.nzval, 1, start_ptr+numshifts, C.colptr[end]-1) + #for k=start_ptr+numshifts: + #@info "$(endptr+numshifts) : after..." + rowval[endptr+numshifts] = col[j].rowval + nzval[endptr+numshifts] = col[j].nzval + last_row = rowval[endptr+numshifts] + #colptr[J+1:end] .+= 1 + else + nzval[endptr+numshifts] += col[j].nzval + end + end + end + + return numshifts + numshifts_old + +end + + +function print_col(col, coll) + v = zeros((2, coll)) + for j=1:coll + v[1,j] = col[j].rowval + v[2,j] = col[j].nzval + end + @info v +end + +function plus(lnk::SparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} + if lnk.nnz == 0 + return csc + elseif length(csc.rowval) == 0 + return SparseMatrixCSC(lnk) + else + return lnk + csc + end +end + +function plus(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} + gi = collect(1:csc.n) + + + supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]] + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + #sortedcolumns = vcat([1], sortedcolumns) + sortedcolumns = vcat(sortedcolumns, [csc.n+1]) + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m] + + #@info sortedcolumnids + + nnz_sum = length(csc.rowval) + lnk.nnz + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + #first part: columns between 1 and first column of lnk + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + numshifts = 0 + + for J=1:length(sortedcolumns)-1 + #@info ">>>>>>> $J <<<<<<<<<<<<<<<" + # insert new added column here / dummy + i = sortedcolumns[J] + coll = get_column!(col, lnk, i) + #print_col(col, coll) + + nns = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1) + + numshifts += nns + #j = colptr[i] #sortedcolumns[J]] + #rowval[j] = J + #nzval[j] = J + # insertion end + + #colptr[i+1] = colptr[i] + csc.colptr[i+1]-csc.colptr[i] + numshifts + + #a = i+1 + #b = sortedcolumns[J+1] + #@info a, b + + + #colptr[i+1:sortedcolumns[J+1]] = (csc.colptr[i+1:sortedcolumns[J+1]]-csc.colptr[i:sortedcolumns[J+1]-1]).+(colptr[i] + nns) + + colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns) + + + rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + + + #= + + @info csc.colptr[a:b] + + colptr[a:b] = csc.colptr[a:b].+numshifts + + #colptr[i+2:sortedcolumns[J+1]] = csc.colptr[i+2:sortedcolumns[J+1]].+numshifts + @info i, J, colptr[i+2], colptr[sortedcolumns[J+1]], csc.colptr[i+2], csc.colptr[sortedcolumns[J+1]] + @info i, J, colptr[a], colptr[b], csc.colptr[a], csc.colptr[b] + rowval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.rowval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]]) + nzval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.nzval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]]) + #rowval[colptrsortedcolumns[J+1]] + =# + end + + #@info colptr + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + + + +end + + +function plus_remap(lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Vector{Ti}}; keep_zeros=true) where {Tv, Ti <: Integer} + nt = length(lnks) + + if keep_zeros + get_col! = get_column_keepzeros! + else + get_col! = get_column_removezeros! + end + lnkscols = vcat([lnks[i].collnk[1:lnks[i].colctr] for i=1:nt]...) + supersparsecolumns = vcat([gi[i][lnks[i].collnk[1:lnks[i].colctr]] for i=1:nt]...) + num_cols = sum([lnks[i].colctr for i=1:nt]) + tids = Vector{Ti}(undef, num_cols) + ctr = 0 + for i=1:nt + for j=1:lnks[i].colctr + ctr += 1 + tids[ctr] = i + end + end + + + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + sortedcolumns = vcat(sortedcolumns, [Ti(csc.n+1)]) + + coll = sum([lnks[i].nnz for i=1:nt]) + nnz_sum = length(csc.rowval) + coll + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + if csc.m < coll + coll = csc.m + end + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:coll] + numshifts = 0 + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + J = 1 + i0 = 0 + #lj_last = [] + #tid_last = [] + lj_last = Vector{Ti}(undef, nt) + tid_last = Vector{Ti}(undef, nt) + ctr_last = 1 + gj_last = 0 + for J=1:length(sortedcolumns)-1 + gj_now = sortedcolumns[J] + gj_next = sortedcolumns[J+1] + + lj_last[ctr_last] = lnkscols[sortedcolumnids[J]] + tid_last[ctr_last] = tids[sortedcolumnids[J]] + + if gj_now != gj_next + #@info typeof(lnks) + # do stuff from gj_last to gj_now / from last_lj to J + #@info lj_last, tid_last + coll = get_col!(col, lnks, lj_last, tid_last, ctr_last) + + nns = merge_into!(rowval, nzval, csc, col, gj_now, coll, colptr[gj_now]-one(Ti)) + numshifts += nns + + + colptr[gj_now+1:sortedcolumns[J+1]] = csc.colptr[gj_now+1:sortedcolumns[J+1]].+(-csc.colptr[gj_now]+colptr[gj_now] + nns) + + rowval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1) + nzval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.nzval, csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1) + + #rowval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = csc.rowval[csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1] + #nzval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = csc.nzval[csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1] + + + #for k=csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1 + # k2 = k+(-csc.colptr[gj_now]+colptr[gj_now] + nns) + # rowval[k2] = csc.rowval[k] + # nzval[k2] = csc.nzval[k] + #end + + gj_last = gj_now + ctr_last = 0 #tids[sortedcolumnids[J]]] + end + + ctr_last += 1 + + + end + + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseArrays.SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + + + #for ... + # take many columns together if necessary in `get_column` + #end + + + +end + + + +function plus_remap(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Ti}) where {Tv, Ti <: Integer} + + #@info lnk.collnk[1:lnk.colctr] + + + supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]] + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + #sortedcolumns = vcat([1], sortedcolumns) + #@info typeof(supersparsecolumns), typeof(sortedcolumns) + + sortedcolumns = vcat(sortedcolumns, [Ti(csc.n+1)]) + + #@info typeof(supersparsecolumns), typeof(sortedcolumns) + + + #@info supersparsecolumns + #@info sortedcolumns + #@info lnk.collnk[1:length(sortedcolumns)-1] + #@info lnk.collnk[sortedcolumnids[1:length(sortedcolumns)-1]] + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m] + + #@info sortedcolumnids + + nnz_sum = length(csc.rowval) + lnk.nnz + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + #first part: columns between 1 and first column of lnk + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + numshifts = 0 + + for J=1:length(sortedcolumns)-1 + i = sortedcolumns[J] + + coll = get_column!(col, lnk, lnk.collnk[sortedcolumnids[J]] ) + #@info typeof(i), typeof(coll), typeof(colptr), typeof(colptr[i]), typeof(colptr[i]-1) + nns = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-one(Ti)) + numshifts += nns + + + colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns) + rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + + #= + for k=csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1 + k2 = k+(-csc.colptr[i]+colptr[i] + nns) + rowval[k2] = csc.rowval[k] + nzval[k2] = csc.nzval[k] + end + =# + end + + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseArrays.SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + +end + + + + +function plus_loop(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} + gi = collect(1:csc.n) + + supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]] + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + #sortedcolumns = vcat([1], sortedcolumns) + sortedcolumns = vcat(sortedcolumns, [csc.n+1]) + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m] + + #@info sortedcolumnids + + nnz_sum = length(csc.rowval) + lnk.nnz + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + #first part: columns between 1 and first column of lnk + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + numshifts = 0 + + for J=1:length(sortedcolumns)-1 + i = sortedcolumns[J] + coll = get_column!(col, lnk, i) + + nns = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1) + numshifts += nns + + colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns) + + + for k=csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1 + k2 = k+(-csc.colptr[i]+colptr[i] + nns) + rowval[k2] = csc.rowval[k] + nzval[k2] = csc.nzval[k] + end + + + end + + #@info colptr + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + + + +end + + + +function twodisjointsets(n, k) + A = rand(1:n, k) + B = zeros(Int64, k) + done = false + ctr = 0 + while ctr != k + v = rand(1:n) + if !(v in A) + ctr += 1 + B[ctr] = v + end + end + + A, B +end + +function distinct(x, n) + y = zeros(typeof(x[1]), n) + ctr = 0 + while ctr != n + v = rand(x) + if !(v in y[1:ctr]) + ctr += 1 + y[ctr] = v + end + end + y +end + + +function mean(x) + sum(x)/length(x) +end + +function form(x) + [minimum(x), mean(x), maximum(x)] +end + + + + + + + + + + + From b0953fe2183eb896998df1bc8ea104fb0cfd3689 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Tue, 20 Feb 2024 17:58:45 +0100 Subject: [PATCH 05/44] add Mittal,Al-Kurdi ILU --- src/ExtendableSparse.jl | 2 + src/factorizations/factorizations.jl | 2 + src/factorizations/ilu_Al-Kurdi_Mittal.jl | 136 ++++++++++++++++++++++ src/factorizations/iluam.jl | 35 ++++++ 4 files changed, 175 insertions(+) create mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal.jl create mode 100644 src/factorizations/iluam.jl diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 372df82..2894027 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -39,6 +39,8 @@ include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl") export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light +include("factorizations/ilu_Al-Kurdi_Mittal.jl") +using .ILUAM include("factorizations/factorizations.jl") diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl index d278d8b..ead23c5 100644 --- a/src/factorizations/factorizations.jl +++ b/src/factorizations/factorizations.jl @@ -157,6 +157,7 @@ end include("jacobi.jl") include("ilu0.jl") include("iluzero.jl") +include("iluam.jl") include("parallel_jacobi.jl") include("parallel_ilu0.jl") include("sparspak.jl") @@ -165,6 +166,7 @@ include("blockpreconditioner.jl") @eval begin @makefrommatrix ILU0Preconditioner @makefrommatrix ILUZeroPreconditioner + @makefrommatrix ILUAMPreconditioner @makefrommatrix PointBlockILUZeroPreconditioner @makefrommatrix JacobiPreconditioner @makefrommatrix ParallelJacobiPreconditioner diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl new file mode 100644 index 0000000..a47bd50 --- /dev/null +++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl @@ -0,0 +1,136 @@ +module ILUAM +using LinearAlgebra, SparseArrays + +import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + + +mutable struct ILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + rowval::AbstractVector + colptr::AbstractVector + +end + +function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} + n = A.n # number of columns + nzval = copy(A.nzval) + diag = Vector{N}(undef, n) + + ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr)) +end + +function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} + nzval = LU.nzval + diag = LU.diag + + colptr = LU.colptr + rowval = LU.rowval + n = A.n # number of columns + point = zeros(N, n) #Vector{N}(undef, n) + + # find diagonal entries + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(N) + end + end +end + +function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} + LU = ILUAMPrecon(A::SparseMatrixCSC{T,N}) + iluAM!(LU, A) + LU +end + + +function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.colptr + rowval = ilu.rowval + diag = ilu.diag + y .= 0 + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + y +end + + +function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.colptr + rowval = ilu.rowval + diag = ilu.diag + wrk = copy(y) + @inbounds for j=n:-1:1 + x[j] = wrk[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 + wrk[rowval[i]] -= nzval[i]*x[j] + end + end + x +end + +function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(x, ilu, y) + x +end + +function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(b, ilu, y) + b +end + +function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + x = copy(b) + ldiv!(x, ilu, b) +end + +function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + + +end \ No newline at end of file diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl new file mode 100644 index 0000000..5d65e40 --- /dev/null +++ b/src/factorizations/iluam.jl @@ -0,0 +1,35 @@ +mutable struct ILUAMPreconditioner <: AbstractPreconditioner + A::ExtendableSparseMatrix + factorization::ILUAM.ILUAMPrecon + phash::UInt64 + function ILUAMPreconditioner() + p = new() + p.phash = 0 + p + end +end + +""" +``` +ILUAMPreconditioner() +ILUAMPreconditioner(matrix) +``` +Incomplete LU preconditioner with zero fill-in using ... . This preconditioner +also calculates and stores updates to the off-diagonal entries and thus delivers better convergence than the [`ILU0Preconditioner`](@ref). +""" +function ILUAMPreconditioner end + +function update!(p::ILUAMPreconditioner) + flush!(p.A) + if p.A.phash != p.phash + p.factorization = ILUAM.iluAM(p.A.cscmatrix) + p.phash=p.A.phash + else + ILUAM.ilu0!(p.factorization, p.A.cscmatrix) + end + p +end + +allow_views(::ILUAMPreconditioner)=true +allow_views(::Type{ILUAMPreconditioner})=true + From 5df873594a31584fda8d3cacbb352b6682a73b2a Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Fri, 23 Feb 2024 12:47:51 +0100 Subject: [PATCH 06/44] implement ILU (sequential and parallel) based on Mittal and Al-Kurdi --- Project.toml | 6 +- src/ExtendableSparse.jl | 33 ++- src/factorizations/factorizations.jl | 119 +++++--- src/factorizations/ilu_Al-Kurdi_Mittal.jl | 176 ++++++++---- src/factorizations/ilu_Al-Kurdi_Mittal_0.jl | 146 ++++++++++ src/factorizations/ilu_Al-Kurdi_Mittal_1.jl | 229 +++++++++++++++ src/factorizations/iluam.jl | 7 +- src/factorizations/pilu_Al-Kurdi_Mittal.jl | 270 ++++++++++++++++++ src/factorizations/piluam.jl | 36 +++ .../ExtendableSparseParallel.jl | 34 ++- .../struct_flush.jl | 2 +- 11 files changed, 941 insertions(+), 117 deletions(-) create mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_0.jl create mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_1.jl create mode 100644 src/factorizations/pilu_Al-Kurdi_Mittal.jl create mode 100644 src/factorizations/piluam.jl diff --git a/Project.toml b/Project.toml index 46d6e61..8776054 100644 --- a/Project.toml +++ b/Project.toml @@ -1,13 +1,15 @@ name = "ExtendableSparse" uuid = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3" authors = ["Juergen Fuhrmann "] -version = "1.4" +version = "1.4.0" [deps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8" ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Requires = "ae029012-a4dd-5104-9daa-d747884805df" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" @@ -15,8 +17,6 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" -ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8" [weakdeps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 2894027..285d0c2 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -28,25 +28,39 @@ include("matrix/sparsematrixcsc.jl") include("matrix/sparsematrixlnk.jl") include("matrix/extendable.jl") -export SparseMatrixLNK, - ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse +export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet +@warn "ESMP!" include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl") -export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK -export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light include("factorizations/ilu_Al-Kurdi_Mittal.jl") -using .ILUAM - +#using .ILUAM +include("factorizations/pilu_Al-Kurdi_Mittal.jl") +#using .PILUAM include("factorizations/factorizations.jl") +include("factorizations/simple_iteration.jl") +export simple, simple! + +include("matrix/sprand.jl") +export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark + + + + +export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK +export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light + + export JacobiPreconditioner, ILU0Preconditioner, ILUZeroPreconditioner, + ILUAMPreconditioner, + PILUAMPreconditioner, PointBlockILUZeroPreconditioner, ParallelJacobiPreconditioner, ParallelILU0Preconditioner, @@ -57,13 +71,6 @@ export AbstractFactorization, LUFactorization, CholeskyFactorization, SparspakLU export issolver export factorize!, update! -include("factorizations/simple_iteration.jl") -export simple, simple! - -include("matrix/sprand.jl") -export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark - - @static if !isdefined(Base, :get_extension) function __init__() @require Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2" begin diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl index ead23c5..2d56fce 100644 --- a/src/factorizations/factorizations.jl +++ b/src/factorizations/factorizations.jl @@ -51,6 +51,52 @@ Determine if factorization is a solver or not issolver(::AbstractLUFactorization) = true issolver(::AbstractPreconditioner) = false + + +"""" + @makefrommatrix(fact) + +For an AbstractFactorization `MyFact`, provide methods +``` + MyFact(A::ExtendableSparseMatrix; kwargs...) + MyFact(A::SparseMatrixCSC; kwargs...) +``` +""" +macro makefrommatrix(fact) + return quote + function $(esc(fact))(A::ExtendableSparseMatrix; kwargs...) + factorize!($(esc(fact))(;kwargs...), A) + end + function $(esc(fact))(A::SparseMatrixCSC; kwargs...) + $(esc(fact))(ExtendableSparseMatrix(A); kwargs...) + end + end +end + +include("ilu0.jl") +include("iluzero.jl") +include("iluam.jl") +include("piluam.jl") +include("parallel_jacobi.jl") +include("parallel_ilu0.jl") +include("sparspak.jl") +include("blockpreconditioner.jl") +include("jacobi.jl") + +@eval begin + @makefrommatrix ILU0Preconditioner + @makefrommatrix ILUZeroPreconditioner + @makefrommatrix ILUAMPreconditioner + @makefrommatrix PILUAMPreconditioner + @makefrommatrix PointBlockILUZeroPreconditioner + @makefrommatrix JacobiPreconditioner + @makefrommatrix ParallelJacobiPreconditioner + @makefrommatrix ParallelILU0Preconditioner + @makefrommatrix SparspakLU + @makefrommatrix UpdateteableBlockpreconditioner + @makefrommatrix BlockPreconditioner +end + """ ``` factorize!(factorization, matrix) @@ -65,8 +111,40 @@ function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix) p end +function factorize!(p::PILUAMPreconditioner, A::ExtendableSparseMatrixParallel) + p.A = A + update!(p) + p +end + +#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel) +# p.A = A +# update!(p) +# p +#end + +#factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel)=factorize!(p,ExtendableSparseMatrix(A.cscmatrix)) + +#factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)=factorize!(p,ExtendableSparseMatrix(A.cscmatrix)) factorize!(p::AbstractFactorization, A::SparseMatrixCSC)=factorize!(p,ExtendableSparseMatrix(A)) + +#function factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel) +# factorize!(p, A) +#end + +#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel) +# factorize!(p, A.cscmatrix) +#end + + +#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix) +# factorize!(p, A.cscmatrix) +#end + + +#factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)=factorize!(p,A) + """ ``` lu!(factorization, matrix) @@ -134,47 +212,6 @@ LinearAlgebra.ldiv!(fact::AbstractFactorization, v) = ldiv!(fact.factorization, -"""" - @makefrommatrix(fact) - -For an AbstractFactorization `MyFact`, provide methods -``` - MyFact(A::ExtendableSparseMatrix; kwargs...) - MyFact(A::SparseMatrixCSC; kwargs...) -``` -""" -macro makefrommatrix(fact) - return quote - function $(esc(fact))(A::ExtendableSparseMatrix; kwargs...) - factorize!($(esc(fact))(;kwargs...), A) - end - function $(esc(fact))(A::SparseMatrixCSC; kwargs...) - $(esc(fact))(ExtendableSparseMatrix(A); kwargs...) - end - end -end - -include("jacobi.jl") -include("ilu0.jl") -include("iluzero.jl") -include("iluam.jl") -include("parallel_jacobi.jl") -include("parallel_ilu0.jl") -include("sparspak.jl") -include("blockpreconditioner.jl") - -@eval begin - @makefrommatrix ILU0Preconditioner - @makefrommatrix ILUZeroPreconditioner - @makefrommatrix ILUAMPreconditioner - @makefrommatrix PointBlockILUZeroPreconditioner - @makefrommatrix JacobiPreconditioner - @makefrommatrix ParallelJacobiPreconditioner - @makefrommatrix ParallelILU0Preconditioner - @makefrommatrix SparspakLU - @makefrommatrix UpdateteableBlockpreconditioner - @makefrommatrix BlockPreconditioner -end if USE_GPL_LIBS #requires SuiteSparse which is not available in non-GPL builds diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl index a47bd50..97bb9a8 100644 --- a/src/factorizations/ilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl @@ -1,34 +1,27 @@ -module ILUAM -using LinearAlgebra, SparseArrays +#module ILUAM +#using LinearAlgebra, SparseArrays import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz +@info "ILUAM" mutable struct ILUAMPrecon{T,N} diag::AbstractVector nzval::AbstractVector - rowval::AbstractVector - colptr::AbstractVector + A::AbstractMatrix end -function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} - n = A.n # number of columns - nzval = copy(A.nzval) - diag = Vector{N}(undef, n) - - ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr)) -end - -function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} - nzval = LU.nzval - diag = LU.diag - - colptr = LU.colptr - rowval = LU.rowval - n = A.n # number of columns - point = zeros(N, n) #Vector{N}(undef, n) +function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} + @info "iluAM" + nzval = copy(A.nzval) + colptr = A.colptr + rowval = A.rowval + #nzval = ILU.nzval + n = A.n # number of columns + point = zeros(Ti, n) #Vector{Ti}(undef, n) + diag = Vector{Ti}(undef, n) # find diagonal entries for j=1:n @@ -64,25 +57,23 @@ function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integ for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(N) + point[rowval[v]] = zero(Ti) end end + #nzval, diag + ILUAMPrecon{Tv,Ti}(diag, nzval, A) end -function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} - LU = ILUAMPrecon(A::SparseMatrixCSC{T,N}) - iluAM!(LU, A) - LU -end - +function forward_subst_old!(y, v, nzval, diag, A) + n = A.n + colptr = A.colptr + rowval = A.rowval + + for i in eachindex(y) + y[i] = zero(Float64) + end -function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.colptr - rowval = ilu.rowval - diag = ilu.diag - y .= 0 + #y .= 0 @inbounds for j=1:n y[j] += v[j] for v=diag[j]+1:colptr[j+1]-1 @@ -93,44 +84,119 @@ function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} end -function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.colptr - rowval = ilu.rowval - diag = ilu.diag - wrk = copy(y) +function backward_subst_old!(x, y, nzval, diag, A) + n = A.n + colptr = A.colptr + rowval = A.rowval @inbounds for j=n:-1:1 - x[j] = wrk[j] / nzval[diag[j]] + x[j] = y[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 - wrk[rowval[i]] -= nzval[i]*x[j] + y[rowval[i]] -= nzval[i]*x[j] end + end - x + x end -function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(x, ilu, y) - x +function ldiv!(x, ILU::ILUAMPrecon, b) + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + backward_subst_old!(x, y, nzval, diag, A) + x end -function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(b, ilu, y) - b +function ldiv!(ILU::ILUAMPrecon, b) + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + backward_subst_old!(b, y, nzval, diag, A) + b end function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} x = copy(b) ldiv!(x, ilu, b) + x end function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} length(ilu.nzval) end +#= +function forward_subst!(y, v, ilu) #::ILUAMPrecon{T,N}) where {T,N<:Integer} + @info "fw" + n = ilu.A.n + nzval = ilu.nzval + diag = ilu.diag + colptr = ilu.A.colptr + rowval = ilu.A.rowval + + for i in eachindex(y) + y[i] = zero(Float64) + end + + #y .= 0 + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + y +end + +function backward_subst!(x, y, ilu) #::ILUAMPrecon{T,N}) where {T,N<:Integer} + @info "bw" + n = ilu.A.n + nzval = ilu.nzval + diag = ilu.diag + colptr = ilu.A.colptr + rowval = ilu.A.rowval + #wrk = copy(y) + @inbounds for j=n:-1:1 + x[j] = y[j] / nzval[diag[j]] + + for i=colptr[j]:diag[j]-1 + y[rowval[i]] -= nzval[i]*x[j] + end + + end + x +end + +function iluam_subst(ILU::ILUAMPrecon, b) + y = copy(b) + forward_subst!(y, b, ILU) + z = copy(b) + backward_subst!(z, y, ILU) + z +end + + + +function iluam_subst_old(ILU::ILUAMPrecon, b) + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + z = copy(b) + backward_subst_old!(z, y, nzval, diag, A) + #backward_subst!(z, y, ILU) + z +end +=# + + -end \ No newline at end of file +#end \ No newline at end of file diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl new file mode 100644 index 0000000..26f9788 --- /dev/null +++ b/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl @@ -0,0 +1,146 @@ +module ILUAM +using LinearAlgebra, SparseArrays + +import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + + +mutable struct ILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + rowval::AbstractVector + colptr::AbstractVector + +end + +function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} + @info "ILUAMPrecon" + n = A.n # number of columns + nzval = copy(A.nzval) + diag = Vector{N}(undef, n) + + ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr)) +end + +function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} + @info "iluAM!" + nzval = LU.nzval + diag = LU.diag + + colptr = LU.colptr + rowval = LU.rowval + n = A.n # number of columns + point = zeros(N, n) #Vector{N}(undef, n) + + t = zeros(5) + + # find diagonal entries + t[1] = @elapsed for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + t[3] += @elapsed for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(N) + end + end + t +end + +function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} + t = zeros(6) + t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N})) + t[2:6] = iluAM!(LU, A) + @info t + LU +end + + +function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.colptr + rowval = ilu.rowval + diag = ilu.diag + y .= 0 + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + y +end + + +function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.colptr + rowval = ilu.rowval + diag = ilu.diag + wrk = copy(y) + @inbounds for j=n:-1:1 + x[j] = wrk[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 + wrk[rowval[i]] -= nzval[i]*x[j] + end + end + x +end + +function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + @info "AM ldiv1" + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(x, ilu, y) + x +end + +function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + @info "AM ldiv2" + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(b, ilu, y) + b +end + +function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + @info "AM bs " + x = copy(b) + ldiv!(x, ilu, b) +end + +function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + + +end \ No newline at end of file diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl new file mode 100644 index 0000000..a599094 --- /dev/null +++ b/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl @@ -0,0 +1,229 @@ +module ILUAM +using LinearAlgebra, SparseArrays + +#import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + +@info "ILUAM" + +mutable struct ILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + A::AbstractMatrix + +end + +function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} + @info "ILUAMPrecon" + n = A.n # number of columns + nzval = copy(A.nzval) + diag = Vector{N}(undef, n) + + ILUAMPrecon{T, N}(diag, copy(A.nzval), A) +end + + + +function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} + @info "iluAM!" + nzval = LU.nzval + diag = LU.diag + + colptr = LU.A.colptr + rowval = LU.A.rowval + n = A.n # number of columns + point = zeros(N, n) #Vector{N}(undef, n) + + t = zeros(5) + + # find diagonal entries + t[1] = @elapsed for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + t[3] += @elapsed for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(N) + end + end + t +end + + +function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} + @info "iluAM" + nzval = copy(A.nzval) + colptr = A.colptr + rowval = A.rowval + #nzval = ILU.nzval + n = A.n # number of columns + point = zeros(Ti, n) #Vector{Ti}(undef, n) + diag = Vector{Ti}(undef, n) + + # find diagonal entries + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(Ti) + end + end + #nzval, diag + ILUAMPrecon{Tv,Ti}(diag, nzval, A) +end + +#function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} +# t = zeros(6) +# t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N})) +# t[2:6] = iluAM!(LU, A) +# @info t +# LU +#end + + +function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.A.colptr + rowval = ilu.A.rowval + diag = ilu.diag + y .= 0 + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + y +end + + +function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} + n = ilu.A.n + nzval = ilu.nzval + colptr = ilu.A.colptr + rowval = ilu.A.rowval + diag = ilu.diag + wrk = copy(y) + @inbounds for j=n:-1:1 + x[j] = wrk[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 + wrk[rowval[i]] -= nzval[i]*x[j] + end + end + x +end + +function ldiv_new!(x, ilu, v) + + n = ilu.A.n + y = Vector{Float64}(undef, n) + y .= 0 + nzval = ilu.nzval + colptr = ilu.A.colptr + rowval = ilu.A.rowval + diag = ilu.diag + #forward + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + + #backward + wrk = copy(y) + @inbounds for j=n:-1:1 + x[j] = wrk[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 + wrk[rowval[i]] -= nzval[i]*x[j] + end + end + x +end + +function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + #@info "AM ldiv1" + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(x, ilu, y) + x +end + +function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + @info "AM ldiv2" + y = copy(b) + forward_substitution!(y, ilu, b) + backward_substitution!(b, ilu, y) + b +end + +function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + @info "AM bs " + x = copy(b) + ldiv!(x, ilu, b) + x +end + +function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + + +end \ No newline at end of file diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl index 5d65e40..a4aed06 100644 --- a/src/factorizations/iluam.jl +++ b/src/factorizations/iluam.jl @@ -1,6 +1,6 @@ mutable struct ILUAMPreconditioner <: AbstractPreconditioner A::ExtendableSparseMatrix - factorization::ILUAM.ILUAMPrecon + factorization::ILUAMPrecon phash::UInt64 function ILUAMPreconditioner() p = new() @@ -22,10 +22,11 @@ function ILUAMPreconditioner end function update!(p::ILUAMPreconditioner) flush!(p.A) if p.A.phash != p.phash - p.factorization = ILUAM.iluAM(p.A.cscmatrix) + p.factorization = iluAM(p.A.cscmatrix) p.phash=p.A.phash else - ILUAM.ilu0!(p.factorization, p.A.cscmatrix) + @warn "fuck?" + ilu0!(p.factorization, p.A.cscmatrix) end p end diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl new file mode 100644 index 0000000..a1ef818 --- /dev/null +++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl @@ -0,0 +1,270 @@ +#module PILUAM +#using Base.Threads +#using LinearAlgebra, SparseArrays + +import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + +@info "PILUAM" + +mutable struct PILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + A::AbstractMatrix + start::AbstractVector + nt::Integer + depth::Integer + +end + +function use_vector_par(n, nt, Ti) + point = [Vector{Ti}(undef, n) for tid=1:nt] + @threads for tid=1:nt + point[tid] = zeros(Ti, n) + end + point +end + +function compute_lu!(nzval, point, j0, j1, tid, rowval, colptr, diag, Ti) + for j=j0:j1-1 + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[tid][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = zero(Ti) + end + end +end + +function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} + start = A.start + nt = A.nt + depth = A.depth + + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + nzval = Vector{Tv}(undef, length(rowval)) #copy(A.nzval) + n = A.cscmatrix.n # number of columns + diag = Vector{Ti}(undef, n) + point = use_vector_par(n, A.nt, Int32) + + # find diagonal entries + # + @threads for tid=1:depth*nt+1 + for j=start[tid]:start[tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + nzval[v] = A.cscmatrix.nzval[v] + if rowval[v] == j + diag[j] = v + end + #elseif rowval[v] + end + end + end + + #= + @info "piluAM" + nzval = copy(A.cscmatrix.nzval) + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + #nzval = ILU.nzval + n = A.n # number of columns + diag = Vector{Ti}(undef, n) + start = A.start + nt = A.nt + depth = A.depth + point = use_vector_par(n, nt, Ti) + + # find diagonal entries + @threads for tid=1:depth*nt+1 + for j=start[tid]:start[tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + end + + # compute L and U + for level=1:depth + @threads for tid=1:nt + compute_lu!(nzval, point, start[(level-1)*nt+tid], start[(level-1)*nt+tid+1], tid, rowval, colptr, diag, Ti) + end + end + + compute_lu!(nzval, point, start[depth*nt+1], start[depth*nt+2], 1, rowval, colptr, diag, Ti) + =# + + for level=1:depth + @threads for tid=1:nt + for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[tid][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = zero(Ti) + end + end + end + end + + #point = zeros(Ti, n) #Vector{Ti}(undef, n) + for j=start[depth*nt+1]:start[depth*nt+2]-1 + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[1][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = zero(Ti) + end + end + + #nzval, diag + PILUAMPrecon{Tv,Ti}(diag, nzval, A.cscmatrix, start, nt, depth) +end + +function forward_subst_old!(y, v, nzval, diag, start, nt, depth, A) + #@info "fwo" + n = A.n + colptr = A.colptr + rowval = A.rowval + + y .= 0 + + for level=1:depth + @threads for tid=1:nt + @inbounds for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1 + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + end + end + + @inbounds for j=start[depth*nt+1]:start[depth*nt+2]-1 + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + +end + + +function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) + #@info "bwo" + n = A.n + colptr = A.colptr + rowval = A.rowval + #wrk = copy(y) + + + @inbounds for j=start[depth*nt+2]-1:-1:start[depth*nt+1] + x[j] = y[j] / nzval[diag[j]] + + for i=colptr[j]:diag[j]-1 + y[rowval[i]] -= nzval[i]*x[j] + end + + end + + for level=depth:-1:1 + @threads for tid=1:nt + @inbounds for j=start[(level-1)*nt+tid+1]-1:-1:start[(level-1)*nt+tid] + x[j] = y[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 + y[rowval[i]] -= nzval[i]*x[j] + end + end + end + end + +end + +function ldiv!(x, ILU::PILUAMPrecon, b) + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + start = ILU.start + nt = ILU.nt + depth = ILU.depth + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, start, nt, depth, A) + backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) + x +end + +function ldiv!(ILU::PILUAMPrecon, b) + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + start = ILU.start + nt = ILU.nt + depth = ILU.depth + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, start, nt, depth, A) + backward_subst_old!(b, y, nzval, diag, start, nt, depth, A) + b +end + +function \(ilu::PILUAMPrecon{T,N}, b) where {T,N<:Integer} + x = copy(b) + ldiv!(x, ilu, b) + x +end + +function nnz(ilu::PILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + +#end \ No newline at end of file diff --git a/src/factorizations/piluam.jl b/src/factorizations/piluam.jl new file mode 100644 index 0000000..4a5fcdc --- /dev/null +++ b/src/factorizations/piluam.jl @@ -0,0 +1,36 @@ +mutable struct PILUAMPreconditioner <: AbstractPreconditioner + A::ExtendableSparseMatrixParallel + factorization::PILUAMPrecon + phash::UInt64 + function PILUAMPreconditioner() + p = new() + p.phash = 0 + p + end +end + +""" +``` +PILUAMPreconditioner() +PILUAMPreconditioner(matrix) +``` +Incomplete LU preconditioner with zero fill-in using ... . This preconditioner +also calculates and stores updates to the off-diagonal entries and thus delivers better convergence than the [`ILU0Preconditioner`](@ref). +""" +function PILUAMPreconditioner end + +function update!(p::PILUAMPreconditioner) + flush!(p.A) + if p.A.phash != p.phash + p.factorization = piluAM(p.A) + p.phash=p.A.phash + else + @warn "fuck?" + ilu0!(p.factorization, p.A.cscmatrix) + end + p +end + +allow_views(::PILUAMPreconditioner)=true +allow_views(::Type{PILUAMPreconditioner})=true + diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl index 68dace8..9e63c37 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -36,6 +36,12 @@ mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSpar nt::Ti depth::Ti + + phash::UInt64 + + n::Ti + + m::Ti end @@ -46,7 +52,7 @@ function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) w grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid)) lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt] - ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth) + ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m) end @@ -253,6 +259,32 @@ function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrixParall end end +""" +`function entryexists2(CSC, i, j)` + +Find out if CSC already has an nonzero entry at i,j without any allocations +""" +function entryexists2(CSC, i, j) #find out if CSC already has an nonzero entry at i,j + #vals = + #ids = CSC.colptr[j]:(CSC.colptr[j+1]-1) + i in view(CSC.rowval, CSC.colptr[j]:(CSC.colptr[j+1]-1)) +end + + +function updatentryCSC2!(CSC::SparseArrays.SparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer, v) where {Tv, Ti <: Integer} + p1 = CSC.colptr[j] + p2 = CSC.colptr[j+1]-1 + + searchk = searchsortedfirst(view(CSC.rowval, p1:p2), i) + p1 - 1 + + if (searchk <= p2) && (CSC.rowval[searchk] == i) + CSC.nzval[searchk] += v + return true + else + return false + end +end + Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n) include("struct_flush.jl") diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl index c27aab0..38608ad 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl @@ -11,7 +11,7 @@ function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=tr A.cscmatrix = dense_flush_removezeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices) end end - + A.phash = phash(A.cscmatrix) A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt] end From 3ed83faafb08e428f47ab032bf7c08004c1b9690 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Fri, 23 Feb 2024 15:38:05 +0100 Subject: [PATCH 07/44] ColEntry from struct to mutable struct --- src/matrix/sparsematrixlnk.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/matrix/sparsematrixlnk.jl b/src/matrix/sparsematrixlnk.jl index 2976e88..b00c6fc 100644 --- a/src/matrix/sparsematrixlnk.jl +++ b/src/matrix/sparsematrixlnk.jl @@ -278,7 +278,7 @@ end # Struct holding pair of value and row # number, for sorting -struct ColEntry{Tv, Ti <: Integer} +mutable struct ColEntry{Tv, Ti <: Integer} rowval::Ti nzval::Tv end From f78ed1df028b0fb3f009f10e78c3abf3e5e8fdc9 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Mon, 26 Feb 2024 14:10:38 +0100 Subject: [PATCH 08/44] enable deeper partitioning / fixing some preparatory functions --- src/factorizations/ilu_Al-Kurdi_Mittal.jl | 4 +- src/factorizations/iluam.jl | 1 + src/factorizations/pilu_Al-Kurdi_Mittal.jl | 2 + src/factorizations/piluam.jl | 1 + .../ExtendableSparseParallel.jl | 2 +- .../preparatory.jl | 416 ++++++++++++++---- 6 files changed, 336 insertions(+), 90 deletions(-) diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl index 97bb9a8..0b6b1b2 100644 --- a/src/factorizations/ilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl @@ -14,7 +14,7 @@ mutable struct ILUAMPrecon{T,N} end function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} - @info "iluAM" + #@info "iluAM" nzval = copy(A.nzval) colptr = A.colptr rowval = A.rowval @@ -100,6 +100,7 @@ function backward_subst_old!(x, y, nzval, diag, A) end function ldiv!(x, ILU::ILUAMPrecon, b) + #@info "iluam ldiv 1" nzval = ILU.nzval diag = ILU.diag A = ILU.A @@ -111,6 +112,7 @@ function ldiv!(x, ILU::ILUAMPrecon, b) end function ldiv!(ILU::ILUAMPrecon, b) + #@info "iluam ldiv 2" nzval = ILU.nzval diag = ILU.diag A = ILU.A diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl index a4aed06..6d061b0 100644 --- a/src/factorizations/iluam.jl +++ b/src/factorizations/iluam.jl @@ -22,6 +22,7 @@ function ILUAMPreconditioner end function update!(p::ILUAMPreconditioner) flush!(p.A) if p.A.phash != p.phash + @warn "p.A.phash != p.phash" p.factorization = iluAM(p.A.cscmatrix) p.phash=p.A.phash else diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl index a1ef818..15a8b23 100644 --- a/src/factorizations/pilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl @@ -230,6 +230,7 @@ function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) end function ldiv!(x, ILU::PILUAMPrecon, b) + #@info "piluam ldiv 1" nzval = ILU.nzval diag = ILU.diag A = ILU.A @@ -244,6 +245,7 @@ function ldiv!(x, ILU::PILUAMPrecon, b) end function ldiv!(ILU::PILUAMPrecon, b) + #@info "piluam ldiv 2" nzval = ILU.nzval diag = ILU.diag A = ILU.A diff --git a/src/factorizations/piluam.jl b/src/factorizations/piluam.jl index 4a5fcdc..075f73f 100644 --- a/src/factorizations/piluam.jl +++ b/src/factorizations/piluam.jl @@ -22,6 +22,7 @@ function PILUAMPreconditioner end function update!(p::PILUAMPreconditioner) flush!(p.A) if p.A.phash != p.phash + @warn "p.A.phash != p.phash" p.factorization = piluAM(p.A) p.phash=p.A.phash else diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl index 9e63c37..b635a33 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -49,7 +49,7 @@ end function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) where {Tv, Ti <: Integer} - grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) + grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid)) lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt] ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m) diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl index e14a066..a29356c 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl @@ -6,23 +6,36 @@ `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here. """ -function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0) +function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false) grid = getgrid(nm; x0, x1) - + adepth = 0 if sequential - (allcells, start, cellparts) = grid_to_graph_ps_multi!(grid, nt, depth)#) + (allcells, start, cellparts, adepth) = grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa, do_print)#) else - (allcells, start, cellparts) = grid_to_graph_ps_multi_par!(grid, nt, depth) + (allcells, start, cellparts, adepth) = grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa, do_print) + end + + if (adepth != depth) && do_print + @info "The requested depth of partitioning is too high. The depth is set to $adepth." end + depth = adepth + cfp = bettercellsforpart(cellparts, depth*nt+1) + + if check_partition + validate_partition(grid, cellparts, start, allcells, nt, depth) + end + + @info length.(cfp) + @info minimum(cellparts), maximum(cellparts), nt, depth + (nnts, s, onr, gi, gc, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush( - cellparts, allcells, start, num_nodes(grid), Ti, nt + cellparts, allcells, start, num_nodes(grid), Ti, nt, depth ) - cfp = bettercellsforpart(cellparts, depth*nt+1) - return grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts -end + return grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, adepth +end """ `function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)` @@ -35,10 +48,10 @@ Furthermore, `nnts` (number of nodes of the threads) is computed, which contain `Ti` is the type (Int64,...) of the elements in the created arrays. `nt` is the number of threads. """ -function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt) +function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt, depth) - num_matrices = maximum(cellregs) - depth = Int(floor((num_matrices-1)/nt)) + #num_matrices = maximum(cellregs) + #depth = Int(floor((num_matrices-1)/nt)) #loop over each node, get the cellregion of the cell (the one not in the separator) write the position of that node inside the cellregions sorted ranking into a long vector #nnts = [zeros(Ti, nt+1) for i=1:depth+1] @@ -62,6 +75,11 @@ function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_r nnts[crmod] += 1 #sortednodesperthread[crmod,j] = nnts[crmod] #nnts[i][cr] #push!(tmp, crmod) + if tmpctr > depth+1 + @info "Cellregs: ", sortedcellregs + @info "Levels : ", Int.(ceil.(sortedcellregs/nt)) + @info "PartsMod: ", ((sortedcellregs.-1).%nt).+1 + end tmp[tmpctr] = crmod tmpctr += 1 end @@ -127,9 +145,6 @@ end - - - """ `function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes)` @@ -141,47 +156,77 @@ This function partitons the separator, which is done if `depth`>1 (see `grid_to_ `level0` is the separator-partitoning level, if the (first) separator is partitioned, level0 = 1, in the next iteration, level0 = 2... `preparatory_multi_ps` is the number of separator-cells. """ -function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes) - sepanodes = findall(x->x==nt+1, cellregs) +function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes, ri, gi, do_print) + # current number of cells treated + nc2 = size(ACSC, 1) - indptr = collect(1:nc+1) - indices = zeros(Int64, nc) - rowval = zeros(Int64, nc) + indptr = collect(1:nc2+1) + indices = zeros(Int64, nc2) + rowval = zeros(Int64, nc2) indptrT = collect(1:ctr_sepanodes+1) indicesT = zeros(Int64, ctr_sepanodes) rowvalT = zeros(Int64, ctr_sepanodes) - for (i,j) in enumerate(sepanodes) - indices[j] = i + for i=1:ctr_sepanodes + j = ri[i] + indices[j] = i indicesT[i] = j rowval[j] = 1 rowvalT[i] = 1 end - R = SparseMatrixCSC(ctr_sepanodes, nc, indptr, indices, rowval) - RT = SparseMatrixCSC(nc, ctr_sepanodes, indptrT, indicesT, rowvalT) - prod = ACSC*dropzeros(RT) + + + R = SparseMatrixCSC(ctr_sepanodes, nc2, indptr, indices, rowval) + RT = SparseMatrixCSC(nc2, ctr_sepanodes, indptrT, indicesT, rowvalT) + # current adjacency matrix, taken as a part of the given one ACSC RART = dropzeros(R)*ACSC*dropzeros(RT) - partition2 = Metis.partition(RART, nt) - cellregs2 = copy(partition2) - - ctr_sepanodes = 0 - for (i,j) in enumerate(sepanodes) - rows = RART.rowval[RART.colptr[i]:(RART.colptr[i+1]-1)] - cellregs[j] = level0*nt + cellregs2[i] - if minimum(partition2[rows]) != maximum(partition2[rows]) - cellregs[j] = (level0+1)*nt+1 - ctr_sepanodes += 1 - end - end - - RART, ctr_sepanodes + cellregs2 = Metis.partition(RART, nt) + + + for i=1:ctr_sepanodes + if cellregs[gi[i]] < level0*nt+1 + @warn "cell treated in this iteration was not a separator-cell last iteration" + end + cellregs[gi[i]] = level0*nt + cellregs2[i] + end + + # how many cells are in the separator of the new partiton (which is only computed on the separator of the old partition) + new_ctr_sepanodes = 0 + ri2 = Vector{Int64}(undef, ctr_sepanodes) + gi2 = Vector{Int64}(undef, ctr_sepanodes) + + for tid=1:nt + for i=1:ctr_sepanodes + if cellregs2[i] == tid + neighbors = RART.rowval[RART.colptr[i]:(RART.colptr[i+1]-1)] + rows = gi[vcat(neighbors, [i])] + #counts how many different regions (besides) the separator are adjacent to the current cell + x = how_many_different_below(cellregs[rows], (level0+1)*nt+1) + if x > 1 + cellregs[gi[i]] = (level0+1)*nt+1 + new_ctr_sepanodes += 1 + gi2[new_ctr_sepanodes] = gi[i] + ri2[new_ctr_sepanodes] = i + end + end + end + end + + + ri2 = ri2[1:new_ctr_sepanodes] + gi2 = gi2[1:new_ctr_sepanodes] + + if do_print + @info "At level $(level0+1), we found $new_ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART, new_ctr_sepanodes, ri2, gi2 end - """ `function grid_to_graph_ps_multi!(grid, nt, depth)` @@ -190,7 +235,7 @@ The function assigns colors/partitons to each cell in the `grid`. First, the gri `nt` is the number of threads. `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... """ -function grid_to_graph_ps_multi!(grid, nt, depth) +function grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=false) A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) number_cells_per_node = zeros(Int64, num_nodes(grid)) for j=1:num_cells(grid) @@ -224,27 +269,46 @@ function grid_to_graph_ps_multi!(grid, nt, depth) partition = Metis.partition(ACSC, nt) cellregs = copy(partition) + sn = Vector{Int64}(undef, num_cells(grid)) + gi = Vector{Int64}(undef, num_cells(grid)) ctr_sepanodes = 0 - for j=1:num_cells(grid) - rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] - if minimum(partition[rows]) != maximum(partition[rows]) - cellregs[j] = nt+1 - ctr_sepanodes += 1 - end + + for tid=1:nt + for j=1:num_cells(grid) + if cellregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(cellregs[rows], nt+1) > 1 + cellregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodes += 1 + sn[ctr_sepanodes] = j + gi[ctr_sepanodes] = j + end + end + end end - RART = ACSC + + sn = sn[1:ctr_sepanodes] + gi = gi[1:ctr_sepanodes] + + if do_print + @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART = copy(ACSC) + actual_depth = 1 for level=1:depth-1 - RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes) + RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end end - - - return allcells, start, cellregs + + return allcells, start, cellregs, actual_depth, ACSC end - -function grid_to_graph_ps_multi_par!(grid, nt, depth) - time = zeros(12) +function grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa=10, do_print=false) As = [ExtendableSparseMatrix{Int64, Int64}(num_cells(grid), num_cells(grid)) for tid=1:nt] number_cells_per_node = zeros(Int64, num_nodes(grid)) @@ -288,54 +352,64 @@ function grid_to_graph_ps_multi_par!(grid, nt, depth) end ACSC = add_all_par!(As).cscmatrix - - #SparseArrays.SparseMatrixCSC(A)) - - - partition = Metis.partition(ACSC, nt) - cellregs = copy(partition) - ctr_sepanodes_a = zeros(Int64, nt) + cellregs = Metis.partition(ACSC, nt) - cell_range = get_starts(num_cells(grid), nt) - Threads.@threads :static for tid=1:nt - for j in cell_range[tid]:cell_range[tid+1]-1 - rows = @view ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] - if minimum(partition[rows]) != maximum(partition[rows]) - cellregs[j] = nt+1 - ctr_sepanodes_a[tid] += 1 - end - end + sn = [Vector{Int64}(undef, Int(ceil(num_cells(grid)/nt))) for tid=1:nt] + ctr_sepanodess = zeros(Int64, nt) + + @threads for tid=1:nt + for j=1:num_cells(grid) + if cellregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(cellregs[rows], nt+1) > 1 + cellregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodess[tid] += 1 + sn[tid][ctr_sepanodess[tid]] = j + end + end + end end - - ctr_sepanodes = sum(ctr_sepanodes_a) - - #= - time[10] = @elapsed for j=1:num_cells(grid) - rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] - if minimum(partition[rows]) != maximum(partition[rows]) - cellregs[j] = nt+1 - ctr_sepanodes += 1 - end - end - =# - RART = ACSC + + for tid=1:nt + sn[tid] = sn[tid][1:ctr_sepanodess[tid]] + end + ctr_sepanodes = sum(ctr_sepanodess) + sn = vcat(sn...) + gi = copy(sn) + + if do_print + @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART = ACSC + actual_depth = 1 for level=1:depth-1 - RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes) + RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end end - - - return allcells, start, cellregs + + #grid[CellRegions] = cellregs + #grid + return allcells, start, cellregs, actual_depth end +""" +`function add_all_par!(As)` +Add LNK matrices (stored in a vector) parallely (tree structure). +The result is stored in the first LNK matrix. +""" function add_all_par!(As) nt = length(As) depth = Int(floor(log2(nt))) ende = nt for level=1:depth - @threads :static for tid=1:2^(depth-level) + @threads for tid=1:2^(depth-level) #@info "$level, $tid" start = tid+2^(depth-level) while start <= ende @@ -425,3 +499,169 @@ function last_nz(x) end end + +function how_many_different_below(x0, y; u=0) + x = copy(x0) + z = unique(x) + t = findall(w->ww>u,z[t]) + length(t) +end + + + +function lookat_grid_to_graph_ps_multi!(nm, nt, depth) + grid = getgrid(nm) + A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) + number_cells_per_node = zeros(Int64, num_nodes(grid)) + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + number_cells_per_node[node_id] += 1 + end + end + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + for (i,id1) in enumerate(cells) + for id2 in cells[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + ACSC = SparseArrays.SparseMatrixCSC(A) + + partition = Metis.partition(ACSC, nt) + cellregs = copy(partition) + + sn = [] + gi = [] + ctr_sepanodes = 0 + for j=1:num_cells(grid) + rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] + if minimum(partition[rows]) != maximum(partition[rows]) + cellregs[j] = nt+1 + ctr_sepanodes += 1 + push!(sn, j) + push!(gi, j) + end + end + RART = ACSC + #sn = 1:num_cells(grid) + #gi = 1:num_cells(grid) + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate_careful!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi) + if ctr_sepanodes == 0 + return RART + end + end + + + #return allcells, start, cellregs + RART +end + + +function adjacencies(grid) + A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) + number_cells_per_node = zeros(Int64, num_nodes(grid)) + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + number_cells_per_node[node_id] += 1 + end + end + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + for (i,id1) in enumerate(cells) + for id2 in cells[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + allcells, start, SparseArrays.SparseMatrixCSC(A) +end + +function check_adjacencies(nm) + grid = getgrid(nm) + allcells, start, A = adjacencies(grid) + + i = 1 + cells1 = sort(vcat([i], A.rowval[A.colptr[i]:(A.colptr[i+1]-1)])) #adjacent cells + nodes2 = grid[CellNodes][:,i] + cells2 = sort(unique(vcat([allcells[start[j]:start[j+1]-1] for j in nodes2]...))) + + @info cells1 + @info cells2 + @info maximum(abs.(cells1-cells2)) + + +end + +#= +function check_partition(nm, nt, depth) + grid = getgrid(nm) + + (allcells, start, cellregs, adepth, ACSC) = grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=true)#) + + if (adepth != depth) + @info "The requested depth of partitioning is too high. The depth is set to $adepth." + end + depth = adepth + + validate_partition(num_nodes(grid), num_cells(grid), grid, cellregs, start, allcells, nt, depth, ACSC) +end +=# + +function validate_partition(grid, cellregs, start, allcells, nt, depth) + @info "Node based validation" + violation_ctr = 0 + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + sortedcellregs = unique(sort(cellregs[cells])) + levels = Int.(ceil.(sortedcellregs/nt)) + + for i=1:depth+1 + ids_lev = findall(x->x==i, levels) + if length(ids_lev) > 1 + violation_ctr += 1 + + if violation_ctr == 1 + @info "Node Id : ", j + @info "Cellregs: ", sortedcellregs + @info "Levels : ", levels + + loc = findall(x->x==4, Int.(ceil.(cellregs[allcells[start[j]:start[j+1]-1]]/nt))) + cells_at_level4 = allcells[loc.+(start[j]-1)] + @info cells_at_level4, cellregs[cells_at_level4] + @info grid[CellNodes][:,cells_at_level4[1]], grid[CellNodes][:,cells_at_level4[2]] + end + end + end + end + @info "We found $violation_ctr violation(s)" +end \ No newline at end of file From 9e6dd1db5607e2fdddb3b3663a65d41169decddf Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Sun, 17 Mar 2024 10:57:59 +0100 Subject: [PATCH 09/44] add parallel matrix vector product --- src/ExtendableSparse.jl | 1 + src/factorizations/filu_Al-Kurdi_Mittal.jl | 160 ++++++++++++++++++ src/factorizations/ilu_Al-Kurdi_Mittal.jl | 65 ++++++- src/factorizations/iluam.jl | 4 +- src/factorizations/pilu_Al-Kurdi_Mittal.jl | 149 ++++++++++++---- src/factorizations/piluam.jl | 5 +- .../ExtendableSparseParallel.jl | 63 ++++++- .../preparatory.jl | 4 +- .../struct_flush.jl | 4 + 9 files changed, 410 insertions(+), 45 deletions(-) create mode 100644 src/factorizations/filu_Al-Kurdi_Mittal.jl diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 285d0c2..bcf85e6 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -37,6 +37,7 @@ export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl") + include("factorizations/ilu_Al-Kurdi_Mittal.jl") #using .ILUAM include("factorizations/pilu_Al-Kurdi_Mittal.jl") diff --git a/src/factorizations/filu_Al-Kurdi_Mittal.jl b/src/factorizations/filu_Al-Kurdi_Mittal.jl new file mode 100644 index 0000000..2099208 --- /dev/null +++ b/src/factorizations/filu_Al-Kurdi_Mittal.jl @@ -0,0 +1,160 @@ +#module PILUAM +#using Base.Threads +#using LinearAlgebra, SparseArrays + +import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + +@info "PILUAM" + +mutable struct PILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + A::AbstractMatrix + +end + +function iluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <:Integer} + @info "filuAM!" + diag = ILU.diag + nzval = ILU.nzval + + nzval = copy(A.cscmatrix.nzval) + diag = Vector{Ti}(undef, n) + ILU.A = A + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + n = A.n + point = zeros(Ti, n) + + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(Ti) + end + end + +end + + +function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} + @info "filuAM, $(A[1,1])" + nzval = copy(A.cscmatrix.nzval) + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + #nzval = ILU.nzval + n = A.n # number of columns + point = zeros(Ti, n) #Vector{Ti}(undef, n) + diag = Vector{Ti}(undef, n) + + # find diagonal entries + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(Ti) + end + end + #nzval, diag + PILUAMPrecon{Tv,Ti}(diag, nzval, A) +end + + + +function ldiv!(x, ILU::PILUAMPrecon, b) + #@info "iluam ldiv 1" + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A.cscmatrix + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + backward_subst_old!(x, y, nzval, diag, A) + @info "FILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)) + #, maximum(abs.(b-A*x)), b[1], x[1], y[1] + x +end + + +function ldiv!(ILU::PILUAMPrecon, b) + #@info "iluam ldiv 2" + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A.cscmatrix + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + backward_subst_old!(b, y, nzval, diag, A) + b +end + +function \(ilu::PILUAMPrecon{T,N}, b) where {T,N<:Integer} + x = copy(b) + ldiv!(x, ilu, b) + x +end + +function nnz(ilu::PILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + +#end \ No newline at end of file diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl index 0b6b1b2..ad2207d 100644 --- a/src/factorizations/ilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl @@ -3,7 +3,7 @@ import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz -@info "ILUAM" +#@info "ILUAM" mutable struct ILUAMPrecon{T,N} @@ -13,6 +13,58 @@ mutable struct ILUAMPrecon{T,N} end + +function iluAM!(ILU::ILUAMPrecon{Tv,Ti}, A::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <:Integer} + diag = ILU.diag + nzval = ILU.nzval + + nzval = copy(A.nzval) + diag = Vector{Ti}(undef, n) + ILU.A = A + colptr = A.colptr + rowval = A.rowval + n = A.n + point = zeros(Ti, n) + + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(Ti) + end + end + +end + function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} #@info "iluAM" nzval = copy(A.nzval) @@ -33,6 +85,9 @@ function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} #elseif rowval[v] end end + + #@info diag[1:20]' + #@info diag[end-20:end]' # compute L and U for j=1:n @@ -65,6 +120,7 @@ function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} end function forward_subst_old!(y, v, nzval, diag, A) + #@info "fso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" n = A.n colptr = A.colptr rowval = A.rowval @@ -85,6 +141,7 @@ end function backward_subst_old!(x, y, nzval, diag, A) + #@info "bso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" n = A.n colptr = A.colptr rowval = A.rowval @@ -99,7 +156,9 @@ function backward_subst_old!(x, y, nzval, diag, A) x end + function ldiv!(x, ILU::ILUAMPrecon, b) + #t = @elapsed begin #@info "iluam ldiv 1" nzval = ILU.nzval diag = ILU.diag @@ -108,6 +167,10 @@ function ldiv!(x, ILU::ILUAMPrecon, b) #forward_subst!(y, b, ILU) forward_subst_old!(y, b, nzval, diag, A) backward_subst_old!(x, y, nzval, diag, A) + #@info "ILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)), nnz(A) #, A[10,10] + #, b[1], x[1], y[1]#maximum(abs.(b)), maximum(abs.(x)) + #end + #println("$t") #@info t x end diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl index 6d061b0..24b75be 100644 --- a/src/factorizations/iluam.jl +++ b/src/factorizations/iluam.jl @@ -22,12 +22,10 @@ function ILUAMPreconditioner end function update!(p::ILUAMPreconditioner) flush!(p.A) if p.A.phash != p.phash - @warn "p.A.phash != p.phash" p.factorization = iluAM(p.A.cscmatrix) p.phash=p.A.phash else - @warn "fuck?" - ilu0!(p.factorization, p.A.cscmatrix) + iluam!(p.factorization, p.A.cscmatrix) end p end diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl index 15a8b23..f2861ed 100644 --- a/src/factorizations/pilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl @@ -4,7 +4,7 @@ import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz -@info "PILUAM" +#@info "PILUAM" mutable struct PILUAMPrecon{T,N} @@ -51,6 +51,95 @@ function compute_lu!(nzval, point, j0, j1, tid, rowval, colptr, diag, Ti) end end +function piluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} + @info "piluAM!" + diag = ILU.diag + nzval = ILU.nzval + ILU.A = A + start = ILU.start + + ILU.nt = A.nt + nt = A.nt + + ILU.depth = A.depth + depth = A.depth + + + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + n = A.cscmatrix.n # number of columns + diag = Vector{Ti}(undef, n) + nzval = Vector{Tv}(undef, length(rowval)) #copy(A.nzval) + point = use_vector_par(n, A.nt, Int32) + + + @threads for tid=1:depth*nt+1 + for j=start[tid]:start[tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + nzval[v] = A.cscmatrix.nzval[v] + if rowval[v] == j + diag[j] = v + end + #elseif rowval[v] + end + end + end + + for level=1:depth + @threads for tid=1:nt + for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[tid][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = zero(Ti) + end + end + end + end + + #point = zeros(Ti, n) #Vector{Ti}(undef, n) + for j=start[depth*nt+1]:start[depth*nt+2]-1 + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[1][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = zero(Ti) + end + end + +end + function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} start = A.start nt = A.nt @@ -65,6 +154,22 @@ function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Intege # find diagonal entries # + + #= + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + nzval[v] = A.cscmatrix.nzval[v] + if rowval[v] == j + diag[j] = v + #break + end + #elseif rowval[v] + end + end + =# + + + @threads for tid=1:depth*nt+1 for j=start[tid]:start[tid+1]-1 for v=colptr[j]:colptr[j+1]-1 @@ -77,41 +182,9 @@ function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Intege end end - #= - @info "piluAM" - nzval = copy(A.cscmatrix.nzval) - colptr = A.cscmatrix.colptr - rowval = A.cscmatrix.rowval - #nzval = ILU.nzval - n = A.n # number of columns - diag = Vector{Ti}(undef, n) - start = A.start - nt = A.nt - depth = A.depth - point = use_vector_par(n, nt, Ti) - # find diagonal entries - @threads for tid=1:depth*nt+1 - for j=start[tid]:start[tid+1]-1 - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - end - - # compute L and U - for level=1:depth - @threads for tid=1:nt - compute_lu!(nzval, point, start[(level-1)*nt+tid], start[(level-1)*nt+tid+1], tid, rowval, colptr, diag, Ti) - end - end - - compute_lu!(nzval, point, start[depth*nt+1], start[depth*nt+2], 1, rowval, colptr, diag, Ti) - =# + #@info diag[1:20]' + #@info diag[end-20:end]' for level=1:depth @threads for tid=1:nt @@ -171,6 +244,7 @@ function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Intege end function forward_subst_old!(y, v, nzval, diag, start, nt, depth, A) + #@info "pfso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" #@info "fwo" n = A.n colptr = A.colptr @@ -200,6 +274,8 @@ end function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) + #@info "pbso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" + #@info "bwo" n = A.n colptr = A.colptr @@ -229,6 +305,7 @@ function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) end + function ldiv!(x, ILU::PILUAMPrecon, b) #@info "piluam ldiv 1" nzval = ILU.nzval @@ -241,6 +318,8 @@ function ldiv!(x, ILU::PILUAMPrecon, b) #forward_subst!(y, b, ILU) forward_subst_old!(y, b, nzval, diag, start, nt, depth, A) backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) + #@info "PILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)), nnz(A) #, A[10,10] + #@info "PILUAM:", maximum(abs.(b-A*x)), b[1], x[1], maximum(abs.(b)), maximum(abs.(x)) x end diff --git a/src/factorizations/piluam.jl b/src/factorizations/piluam.jl index 075f73f..50a46fd 100644 --- a/src/factorizations/piluam.jl +++ b/src/factorizations/piluam.jl @@ -20,14 +20,13 @@ also calculates and stores updates to the off-diagonal entries and thus delivers function PILUAMPreconditioner end function update!(p::PILUAMPreconditioner) + #@warn "Should flush now", nnz_noflush(p.A) flush!(p.A) if p.A.phash != p.phash - @warn "p.A.phash != p.phash" p.factorization = piluAM(p.A) p.phash=p.A.phash else - @warn "fuck?" - ilu0!(p.factorization, p.A.cscmatrix) + piluAM!(p.factorization, p.A) end p end diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl index b635a33..2c91a12 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -285,6 +285,67 @@ function updatentryCSC2!(CSC::SparseArrays.SparseMatrixCSC{Tv, Ti}, i::Integer, end end -Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n) + + +Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n) include("struct_flush.jl") + + + +import LinearAlgebra.mul! + +""" +```function LinearAlgebra.mul!(y, A, x)``` + +This overwrites the mul! function for A::ExtendableSparseMatrixParallel +""" +function LinearAlgebra.mul!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv, Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer} + #@info "my matvec" + _, nnzLNK = nnz_noflush(A) + @assert nnzLNK == 0 + #mul!(y, A.cscmatrix, x) + matvec!(y, A, x) +end + + +""" +```function matvec!(y, A, x)``` + +y <- A*x, where y and x are vectors and A is an ExtendableSparseMatrixParallel +this computation is done in parallel, it has the same result as y = A.cscmatrix*x +""" +function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer} + #a1 = @allocated begin + nt = A.nt + depth = A.depth + colptr = A.cscmatrix.colptr + nzv = A.cscmatrix.nzval + rv = A.cscmatrix.rowval + + LinearAlgebra._rmul_or_fill!(y, 0.0) + + #end + #a2 = @allocated + for level=1:depth + @threads for tid::Int64=1:nt + for col::Int64=A.start[(level-1)*nt+tid]:A.start[(level-1)*nt+tid+1]-1 + for row::Int64=colptr[col]:colptr[col+1]-1 # in nzrange(A, col) + y[rv[row]] += nzv[row]*x[col] + end + end + end + end + + @threads for tid=1:1 + #a3 = @allocated + for col::Int64=A.start[depth*nt+1]:A.start[depth*nt+2]-1 + for row::Int64=colptr[col]:colptr[col+1]-1 #nzrange(A, col) + y[rv[row]] += nzv[row]*x[col] + end + end + end + + #println(a1,a2,a3) + y +end diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl index a29356c..7eeb3d3 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl @@ -26,8 +26,8 @@ function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, validate_partition(grid, cellparts, start, allcells, nt, depth) end - @info length.(cfp) - @info minimum(cellparts), maximum(cellparts), nt, depth + #@info length.(cfp) + #@info minimum(cellparts), maximum(cellparts), nt, depth (nnts, s, onr, gi, gc, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush( cellparts, allcells, start, num_nodes(grid), Ti, nt, depth diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl index 38608ad..73471dc 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl @@ -1,5 +1,9 @@ function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=true) + _, nnzLNK = nnz_noflush(A) + if nnzLNK == 0 + return + end if !do_dense A.cscmatrix = A.cscmatrix+sparse_flush!(A; keep_zeros) From 0cc56e87cd29634014af8e53d33f284327b464e9 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Sun, 17 Mar 2024 11:03:16 +0100 Subject: [PATCH 10/44] remove old code --- src/ExtendableSparse.jl | 2 +- src/factorizations/filu_Al-Kurdi_Mittal.jl | 160 -------------- src/factorizations/ilu_Al-Kurdi_Mittal_0.jl | 146 ------------- src/factorizations/ilu_Al-Kurdi_Mittal_1.jl | 229 -------------------- 4 files changed, 1 insertion(+), 536 deletions(-) delete mode 100644 src/factorizations/filu_Al-Kurdi_Mittal.jl delete mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_0.jl delete mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_1.jl diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index bcf85e6..9c490ca 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -33,7 +33,7 @@ export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawup export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet -@warn "ESMP!" +#@warn "ESMP!" include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl") diff --git a/src/factorizations/filu_Al-Kurdi_Mittal.jl b/src/factorizations/filu_Al-Kurdi_Mittal.jl deleted file mode 100644 index 2099208..0000000 --- a/src/factorizations/filu_Al-Kurdi_Mittal.jl +++ /dev/null @@ -1,160 +0,0 @@ -#module PILUAM -#using Base.Threads -#using LinearAlgebra, SparseArrays - -import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz - -@info "PILUAM" - -mutable struct PILUAMPrecon{T,N} - - diag::AbstractVector - nzval::AbstractVector - A::AbstractMatrix - -end - -function iluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <:Integer} - @info "filuAM!" - diag = ILU.diag - nzval = ILU.nzval - - nzval = copy(A.cscmatrix.nzval) - diag = Vector{Ti}(undef, n) - ILU.A = A - colptr = A.cscmatrix.colptr - rowval = A.cscmatrix.rowval - n = A.n - point = zeros(Ti, n) - - for j=1:n - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - - # compute L and U - for j=1:n - for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? - point[rowval[v]] = v - end - - for v=colptr[j]:diag[j]-1 - i = rowval[v] - #nzval[v] /= nzval[diag[i]] - for w=diag[i]+1:colptr[i+1]-1 - k = point[rowval[w]] - if k>0 - nzval[k] -= nzval[v]*nzval[w] - end - end - end - - for v=diag[j]+1:colptr[j+1]-1 - nzval[v] /= nzval[diag[j]] - end - - - for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(Ti) - end - end - -end - - -function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} - @info "filuAM, $(A[1,1])" - nzval = copy(A.cscmatrix.nzval) - colptr = A.cscmatrix.colptr - rowval = A.cscmatrix.rowval - #nzval = ILU.nzval - n = A.n # number of columns - point = zeros(Ti, n) #Vector{Ti}(undef, n) - diag = Vector{Ti}(undef, n) - - # find diagonal entries - for j=1:n - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - - # compute L and U - for j=1:n - for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? - point[rowval[v]] = v - end - - for v=colptr[j]:diag[j]-1 - i = rowval[v] - #nzval[v] /= nzval[diag[i]] - for w=diag[i]+1:colptr[i+1]-1 - k = point[rowval[w]] - if k>0 - nzval[k] -= nzval[v]*nzval[w] - end - end - end - - for v=diag[j]+1:colptr[j+1]-1 - nzval[v] /= nzval[diag[j]] - end - - - for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(Ti) - end - end - #nzval, diag - PILUAMPrecon{Tv,Ti}(diag, nzval, A) -end - - - -function ldiv!(x, ILU::PILUAMPrecon, b) - #@info "iluam ldiv 1" - nzval = ILU.nzval - diag = ILU.diag - A = ILU.A.cscmatrix - y = copy(b) - #forward_subst!(y, b, ILU) - forward_subst_old!(y, b, nzval, diag, A) - backward_subst_old!(x, y, nzval, diag, A) - @info "FILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)) - #, maximum(abs.(b-A*x)), b[1], x[1], y[1] - x -end - - -function ldiv!(ILU::PILUAMPrecon, b) - #@info "iluam ldiv 2" - nzval = ILU.nzval - diag = ILU.diag - A = ILU.A.cscmatrix - y = copy(b) - #forward_subst!(y, b, ILU) - forward_subst_old!(y, b, nzval, diag, A) - backward_subst_old!(b, y, nzval, diag, A) - b -end - -function \(ilu::PILUAMPrecon{T,N}, b) where {T,N<:Integer} - x = copy(b) - ldiv!(x, ilu, b) - x -end - -function nnz(ilu::PILUAMPrecon{T,N}) where {T,N<:Integer} - length(ilu.nzval) -end - -#end \ No newline at end of file diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl deleted file mode 100644 index 26f9788..0000000 --- a/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl +++ /dev/null @@ -1,146 +0,0 @@ -module ILUAM -using LinearAlgebra, SparseArrays - -import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz - - -mutable struct ILUAMPrecon{T,N} - - diag::AbstractVector - nzval::AbstractVector - rowval::AbstractVector - colptr::AbstractVector - -end - -function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} - @info "ILUAMPrecon" - n = A.n # number of columns - nzval = copy(A.nzval) - diag = Vector{N}(undef, n) - - ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr)) -end - -function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} - @info "iluAM!" - nzval = LU.nzval - diag = LU.diag - - colptr = LU.colptr - rowval = LU.rowval - n = A.n # number of columns - point = zeros(N, n) #Vector{N}(undef, n) - - t = zeros(5) - - # find diagonal entries - t[1] = @elapsed for j=1:n - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - - # compute L and U - for j=1:n - t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? - point[rowval[v]] = v - end - - t[3] += @elapsed for v=colptr[j]:diag[j]-1 - i = rowval[v] - #nzval[v] /= nzval[diag[i]] - for w=diag[i]+1:colptr[i+1]-1 - k = point[rowval[w]] - if k>0 - nzval[k] -= nzval[v]*nzval[w] - end - end - end - - t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1 - nzval[v] /= nzval[diag[j]] - end - - - t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(N) - end - end - t -end - -function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} - t = zeros(6) - t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N})) - t[2:6] = iluAM!(LU, A) - @info t - LU -end - - -function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.colptr - rowval = ilu.rowval - diag = ilu.diag - y .= 0 - @inbounds for j=1:n - y[j] += v[j] - for v=diag[j]+1:colptr[j+1]-1 - y[rowval[v]] -= nzval[v]*y[j] - end - end - y -end - - -function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.colptr - rowval = ilu.rowval - diag = ilu.diag - wrk = copy(y) - @inbounds for j=n:-1:1 - x[j] = wrk[j] / nzval[diag[j]] - for i=colptr[j]:diag[j]-1 - wrk[rowval[i]] -= nzval[i]*x[j] - end - end - x -end - -function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - @info "AM ldiv1" - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(x, ilu, y) - x -end - -function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - @info "AM ldiv2" - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(b, ilu, y) - b -end - -function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - @info "AM bs " - x = copy(b) - ldiv!(x, ilu, b) -end - -function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} - length(ilu.nzval) -end - - -end \ No newline at end of file diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl deleted file mode 100644 index a599094..0000000 --- a/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl +++ /dev/null @@ -1,229 +0,0 @@ -module ILUAM -using LinearAlgebra, SparseArrays - -#import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz - -@info "ILUAM" - -mutable struct ILUAMPrecon{T,N} - - diag::AbstractVector - nzval::AbstractVector - A::AbstractMatrix - -end - -function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer} - @info "ILUAMPrecon" - n = A.n # number of columns - nzval = copy(A.nzval) - diag = Vector{N}(undef, n) - - ILUAMPrecon{T, N}(diag, copy(A.nzval), A) -end - - - -function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer} - @info "iluAM!" - nzval = LU.nzval - diag = LU.diag - - colptr = LU.A.colptr - rowval = LU.A.rowval - n = A.n # number of columns - point = zeros(N, n) #Vector{N}(undef, n) - - t = zeros(5) - - # find diagonal entries - t[1] = @elapsed for j=1:n - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - - # compute L and U - for j=1:n - t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? - point[rowval[v]] = v - end - - t[3] += @elapsed for v=colptr[j]:diag[j]-1 - i = rowval[v] - #nzval[v] /= nzval[diag[i]] - for w=diag[i]+1:colptr[i+1]-1 - k = point[rowval[w]] - if k>0 - nzval[k] -= nzval[v]*nzval[w] - end - end - end - - t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1 - nzval[v] /= nzval[diag[j]] - end - - - t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(N) - end - end - t -end - - -function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} - @info "iluAM" - nzval = copy(A.nzval) - colptr = A.colptr - rowval = A.rowval - #nzval = ILU.nzval - n = A.n # number of columns - point = zeros(Ti, n) #Vector{Ti}(undef, n) - diag = Vector{Ti}(undef, n) - - # find diagonal entries - for j=1:n - for v=colptr[j]:colptr[j+1]-1 - if rowval[v] == j - diag[j] = v - break - end - #elseif rowval[v] - end - end - - # compute L and U - for j=1:n - for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? - point[rowval[v]] = v - end - - for v=colptr[j]:diag[j]-1 - i = rowval[v] - #nzval[v] /= nzval[diag[i]] - for w=diag[i]+1:colptr[i+1]-1 - k = point[rowval[w]] - if k>0 - nzval[k] -= nzval[v]*nzval[w] - end - end - end - - for v=diag[j]+1:colptr[j+1]-1 - nzval[v] /= nzval[diag[j]] - end - - - for v=colptr[j]:colptr[j+1]-1 - point[rowval[v]] = zero(Ti) - end - end - #nzval, diag - ILUAMPrecon{Tv,Ti}(diag, nzval, A) -end - -#function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer} -# t = zeros(6) -# t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N})) -# t[2:6] = iluAM!(LU, A) -# @info t -# LU -#end - - -function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.A.colptr - rowval = ilu.A.rowval - diag = ilu.diag - y .= 0 - @inbounds for j=1:n - y[j] += v[j] - for v=diag[j]+1:colptr[j+1]-1 - y[rowval[v]] -= nzval[v]*y[j] - end - end - y -end - - -function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer} - n = ilu.A.n - nzval = ilu.nzval - colptr = ilu.A.colptr - rowval = ilu.A.rowval - diag = ilu.diag - wrk = copy(y) - @inbounds for j=n:-1:1 - x[j] = wrk[j] / nzval[diag[j]] - for i=colptr[j]:diag[j]-1 - wrk[rowval[i]] -= nzval[i]*x[j] - end - end - x -end - -function ldiv_new!(x, ilu, v) - - n = ilu.A.n - y = Vector{Float64}(undef, n) - y .= 0 - nzval = ilu.nzval - colptr = ilu.A.colptr - rowval = ilu.A.rowval - diag = ilu.diag - #forward - @inbounds for j=1:n - y[j] += v[j] - for v=diag[j]+1:colptr[j+1]-1 - y[rowval[v]] -= nzval[v]*y[j] - end - end - - #backward - wrk = copy(y) - @inbounds for j=n:-1:1 - x[j] = wrk[j] / nzval[diag[j]] - for i=colptr[j]:diag[j]-1 - wrk[rowval[i]] -= nzval[i]*x[j] - end - end - x -end - -function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - #@info "AM ldiv1" - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(x, ilu, y) - x -end - -function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - @info "AM ldiv2" - y = copy(b) - forward_substitution!(y, ilu, b) - backward_substitution!(b, ilu, y) - b -end - -function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} - @info "AM bs " - x = copy(b) - ldiv!(x, ilu, b) - x -end - -function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} - length(ilu.nzval) -end - - -end \ No newline at end of file From 0f282c6f37b45146816a0243e5f6ff7f24e2d84e Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Sun, 24 Mar 2024 17:40:05 +0100 Subject: [PATCH 11/44] added preparation for edgewise assembly --- src/factorizations/pilu_Al-Kurdi_Mittal.jl | 2 +- .../ExtendableSparseParallel.jl | 9 +- .../preparatory.jl | 234 ++++++++++++++++-- .../struct_flush.jl | 14 +- 4 files changed, 231 insertions(+), 28 deletions(-) diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl index f2861ed..ad9529b 100644 --- a/src/factorizations/pilu_Al-Kurdi_Mittal.jl +++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl @@ -52,7 +52,7 @@ function compute_lu!(nzval, point, j0, j1, tid, rowval, colptr, diag, Ti) end function piluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} - @info "piluAM!" + #@info "piluAM!" diag = ILU.diag nzval = ILU.nzval ILU.A = A diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl index 2c91a12..b413c5c 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -103,7 +103,7 @@ function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_t if updatentryCSC2!(A.cscmatrix, i, j, v) else - level, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]]) + _, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]]) A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v end end @@ -316,7 +316,6 @@ y <- A*x, where y and x are vectors and A is an ExtendableSparseMatrixParallel this computation is done in parallel, it has the same result as y = A.cscmatrix*x """ function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer} - #a1 = @allocated begin nt = A.nt depth = A.depth colptr = A.cscmatrix.colptr @@ -325,8 +324,6 @@ function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti} LinearAlgebra._rmul_or_fill!(y, 0.0) - #end - #a2 = @allocated for level=1:depth @threads for tid::Int64=1:nt for col::Int64=A.start[(level-1)*nt+tid]:A.start[(level-1)*nt+tid+1]-1 @@ -337,8 +334,9 @@ function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti} end end + + @threads for tid=1:1 - #a3 = @allocated for col::Int64=A.start[depth*nt+1]:A.start[depth*nt+2]-1 for row::Int64=colptr[col]:colptr[col+1]-1 #nzrange(A, col) y[rv[row]] += nzv[row]*x[col] @@ -346,6 +344,5 @@ function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti} end end - #println(a1,a2,a3) y end diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl index 7eeb3d3..033a2fa 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl @@ -6,24 +6,31 @@ `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here. """ -function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false) +function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, assembly=:cellwise, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false) grid = getgrid(nm; x0, x1) adepth = 0 if sequential - (allcells, start, cellparts, adepth) = grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa, do_print)#) + (allcells, start, cellparts, adepth) = grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa, do_print)#) else - (allcells, start, cellparts, adepth) = grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa, do_print) + (allcells, start, cellparts, adepth) = grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa, do_print) end if (adepth != depth) && do_print @info "The requested depth of partitioning is too high. The depth is set to $adepth." end - depth = adepth - cfp = bettercellsforpart(cellparts, depth*nt+1) + + if assembly == :cellwise + cfp = bettercellsforpart(cellparts, depth*nt+1) + + else + edgeparts = edgewise_partition_from_cellwise_partition(grid, cellparts) + cfp = bettercellsforpart(edgeparts, depth*nt+1) + end + if check_partition - validate_partition(grid, cellparts, start, allcells, nt, depth) + validate_partition(grid, cellparts, start, allcells, nt, depth, assembly) end #@info length.(cfp) @@ -126,7 +133,7 @@ function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_r tmpctr = 1 for cr in sortedcellregs crmod = (cr-1)%nt+1 - level = Int(ceil(cr/nt)) + #level = Int(ceil(cr/nt)) if !(crmod in tmp[1:tmpctr-1]) gictrs[crmod] += 1 # , level] += 1 sortednodesperthread[crmod,nj] = gictrs[crmod] @@ -235,7 +242,7 @@ The function assigns colors/partitons to each cell in the `grid`. First, the gri `nt` is the number of threads. `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... """ -function grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=false) +function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=false) A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) number_cells_per_node = zeros(Int64, num_nodes(grid)) for j=1:num_cells(grid) @@ -304,11 +311,10 @@ function grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=fals end end - return allcells, start, cellregs, actual_depth, ACSC + return allcells, start, cellregs, actual_depth end - -function grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa=10, do_print=false) +function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print=false) As = [ExtendableSparseMatrix{Int64, Int64}(num_cells(grid), num_cells(grid)) for tid=1:nt] number_cells_per_node = zeros(Int64, num_nodes(grid)) @@ -397,6 +403,195 @@ function grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa=10, do_print= return allcells, start, cellregs, actual_depth end +function grid_to_graph_edgewise!(grid, nt, depth; minsize_sepa=10, do_print=false) + ce = grid[CellEdges] + A = SparseMatrixLNK{Int64, Int64}(num_edges(grid), num_edges(grid)) + number_edges_per_node = zeros(Int64, num_nodes(grid)) + + for i=1:num_edges(grid) + for node_id in grid[EdgeNodes][:,i] + number_edges_per_node[node_id] += 1 + end + end + + alledges = zeros(Int64, sum(number_edges_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_edges_per_node) + number_edges_per_node .= 0 + + for j=1:num_edges(grid) + for node_id in grid[EdgeNodes][:,j] + alledges[start[node_id] + number_edges_per_node[node_id]] = j + number_edges_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + edges = @view alledges[start[j]:start[j+1]-1] + for (i,id1) in enumerate(edges) + for id2 in edges[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + ACSC = SparseArrays.SparseMatrixCSC(A) + + partition = Metis.partition(ACSC, nt) + + sn = Vector{Int64}(undef, num_edges(grid)) + gi = Vector{Int64}(undef, num_edges(grid)) + ctr_sepanodes = 0 + + edgeregs = copy(partition) + for tid=1:nt + for j=1:num_edges(grid) + if edgeregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(edgeregs[rows], nt+1) > 1 + edgeregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodes += 1 + sn[ctr_sepanodes] = j + gi[ctr_sepanodes] = j + end + end + end + end + + sn = sn[1:ctr_sepanodes] + gi = gi[1:ctr_sepanodes] + + if do_print + @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART = copy(ACSC) + actual_depth = 1 + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate!(edgeregs, num_edges(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end + end + + return alledges, start, edgeregs, actual_depth +end + +function grid_to_graph_edgewise_par!(grid, nt, depth; minsize_sepa=10, do_print=false) + ce = grid[CellEdges] + cn = grid[EdgeNodes] + + As = [ExtendableSparseMatrix{Int64, Int64}(num_edges(grid), num_edges(grid)) for tid=1:nt] + number_edges_per_node = zeros(Int64, num_nodes(grid)) + + + for j=1:num_edges(grid) + tmp = view(cn, :, j) + for node_id in tmp + number_edges_per_node[node_id] += 1 + end + end + + + alledges = zeros(Int64, sum(number_edges_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_edges_per_node) + number_edges_per_node .= 0 + + for j=1:num_edges(grid) + tmp = view(cn, :, j) + for node_id in tmp + alledges[start[node_id] + number_edges_per_node[node_id]] = j + number_edges_per_node[node_id] += 1 + end + end + + node_range = get_starts(num_nodes(grid), nt) + Threads.@threads for tid=1:nt + for j in node_range[tid]:node_range[tid+1]-1 + edges = @view alledges[start[j]:start[j+1]-1] + l = length(edges) + for (i,id1) in enumerate(edges) + ce = view(edges, i+1:l) + for id2 in ce + As[tid][id1,id2] = 1 + As[tid][id2,id1] = 1 + + end + end + end + ExtendableSparse.flush!(As[tid]) + end + + ACSC = add_all_par!(As).cscmatrix + + cellregs = Metis.partition(ACSC, nt) + + sn = [Vector{Int64}(undef, Int(ceil(num_cells(grid)/nt))) for tid=1:nt] + ctr_sepanodess = zeros(Int64, nt) + + @threads for tid=1:nt + for j=1:num_edges(grid) + if cellregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(cellregs[rows], nt+1) > 1 + cellregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodess[tid] += 1 + sn[tid][ctr_sepanodess[tid]] = j + end + end + end + end + + for tid=1:nt + sn[tid] = sn[tid][1:ctr_sepanodess[tid]] + end + ctr_sepanodes = sum(ctr_sepanodess) + sn = vcat(sn...) + gi = copy(sn) + + if do_print + @info "At level $(1), we found $ctr_sepanodes edges that have to be treated in the next iteration!" + end + + RART = ACSC + actual_depth = 1 + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end + end + + #grid[CellRegions] = cellregs + #grid + return alledges, start, cellregs, actual_depth +end + + +function edgewise_partition_from_cellwise_partition(grid, cellregs) + ce = grid[CellEdges] + if num_edges(grid) == 0 + grid[EdgeNodes] + end + + edgeregs = maximum(cellregs)*ones(Int64, num_edges(grid)) + + for icell=1:num_cells(grid) + tmp = cellregs[icell] + for iedge in ce[:,icell] + if tmp < edgeregs[iedge] + edgeregs[iedge] = tmp + end + end + end + + edgeregs +end + """ `function add_all_par!(As)` @@ -636,10 +831,15 @@ function check_partition(nm, nt, depth) end =# -function validate_partition(grid, cellregs, start, allcells, nt, depth) - @info "Node based validation" +function validate_partition(grid, cellregs, start, allcells, nt, depth, assemblytype) violation_ctr = 0 + if assemblytype == :cellwise + key = CellNodes + else + key = EdgeNodes + end + for j=1:num_nodes(grid) cells = @view allcells[start[j]:start[j+1]-1] sortedcellregs = unique(sort(cellregs[cells])) @@ -651,14 +851,14 @@ function validate_partition(grid, cellregs, start, allcells, nt, depth) violation_ctr += 1 if violation_ctr == 1 - @info "Node Id : ", j - @info "Cellregs: ", sortedcellregs - @info "Levels : ", levels + @info "Node Id : $j (we only show one violation)" + @info "Cellregs: $sortedcellregs" + @info "Levels : $levels" loc = findall(x->x==4, Int.(ceil.(cellregs[allcells[start[j]:start[j+1]-1]]/nt))) cells_at_level4 = allcells[loc.+(start[j]-1)] @info cells_at_level4, cellregs[cells_at_level4] - @info grid[CellNodes][:,cells_at_level4[1]], grid[CellNodes][:,cells_at_level4[2]] + @info grid[key][:,cells_at_level4[1]], grid[key][:,cells_at_level4[2]] end end end diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl index 73471dc..1b8fc48 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl @@ -49,6 +49,9 @@ function dense_flush_keepzeros!( eqctr = 0 tmp = zeros(Ti, size(onr)[1]) + #@warn [As[i].nnz for i=1:nt], [As[i].n for i=1:nt], [As[i].m for i=1:nt] + #@info maximum.([As[i].colptr for i=1:nt]) + for nj=1:As[1].m indptr[nj] = ctr oj = rni[nj] @@ -62,7 +65,10 @@ function dense_flush_keepzeros!( k = s[regmod, nj] if regionctr == 1 while k>0 - #if As[regmod].nzval[k] != 0.0 + if As[regmod].rowval[k] != 0 + if ctr > nnz + @info "ctr > nnz, $nj, $oj" + end indices[ctr] = As[regmod].rowval[k] data[ctr] = As[regmod].nzval[k] @@ -82,12 +88,12 @@ function dense_flush_keepzeros!( ctr += 1 jc += 1 - #end + end k = As[regmod].colptr[k] end else while k>0 - #if As[regmod].nzval[k] != 0.0 + if As[regmod].rowval[k] != 0 indices[ctr] = As[regmod].rowval[k] data[ctr] = As[regmod].nzval[k] @@ -120,7 +126,7 @@ function dense_flush_keepzeros!( ctr += 1 jc += 1 - #end + end k = As[regmod].colptr[k] end From d5b9bacb79e9478d0792d5b847d4da71ccbd12a4 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Fri, 10 May 2024 15:58:05 +0200 Subject: [PATCH 12/44] added comment --- src/matrix/ExtendableSparseMatrixParallel/preparatory.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl index 033a2fa..e73c7d1 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl @@ -5,6 +5,8 @@ `nt` is the number of threads. `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here. + +This should be somewhere else, longterm """ function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, assembly=:cellwise, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false) grid = getgrid(nm; x0, x1) @@ -864,4 +866,4 @@ function validate_partition(grid, cellregs, start, allcells, nt, depth, assembly end end @info "We found $violation_ctr violation(s)" -end \ No newline at end of file +end From d0b6c63d8b62dfc6ae03b8ad5e39c394a1e47fc3 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Sat, 11 May 2024 19:31:01 +0200 Subject: [PATCH 13/44] removed grid dependency of ESMP --- .../ExtendableSparseParallel.jl | 63 +++++++++++++++++-- .../struct_flush.jl | 2 +- .../supersparse.jl | 7 ++- 3 files changed, 62 insertions(+), 10 deletions(-) diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl index b413c5c..5126f8d 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -9,38 +9,89 @@ mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSpar cscmatrix::SparseMatrixCSC{Tv, Ti} """ - Linked list structure holding data of extension + Linked list structures holding data of extension, one for each thread """ lnkmatrices::Vector{SuperSparseMatrixLNK{Tv, Ti}} - grid::ExtendableGrid + """ + this is the grid on which the pde lives + (We do not want this dependency) + """ + #grid::ExtendableGrid + """ + Number of Nodes per Threads + """ nnts::Vector{Ti} + """ + sortednodesperthread[i,j] = local index of the j-th global column in the i-th LNK matrix + (this is used e.g. when assembling the matrix) + """ sortednodesperthread::Matrix{Ti} + """ + depth+1 x nn matrix, + old_noderegions[i,j] = region in which node j is, in level i + old refers to the fact that j is the 'old index' (i.e. grid index, not matrix index, see 'new_indices') + """ old_noderegions::Matrix{Ti} + """ + cellsforpart[i] is a vector containing all cells in the i-th region + cellsforpart has length nt*depth + 1 + """ cellsforpart::Vector{Vector{Ti}} + """ + globalindices[i][j] = index in the global (ESMP & CSC) matrix of the j-th column of the i-th LNK matrix + (this maps the local indices (in the LNKs) to the global indices (ESMP & CSC)) + """ globalindices::Vector{Vector{Ti}} + """ + For some applications such as the parallel ILU preconditioner, a block form is necessary. + Thus, the columns are reordered and the A[i,i] does not correspond to the i-th node of the grid, + but A[new_indices[i], new_indices[i]] does + """ new_indices::Vector{Ti} + """ + Reverse: rev_new_indices[new_indices[i]] = i, for all i + """ rev_new_indices::Vector{Ti} + """ + starts[i] gives the first column of the i-th region, i.e. starts[1] = 1 + starts has length nt*depth + 1 + """ start::Vector{Ti} - + + """ + cellparts[i] = region of the i-th cell + """ cellparts::Vector{Ti} + """ + Number of threads + """ nt::Ti + """ + How often is the separator partitioned? (if never: depth = 1) + """ depth::Ti phash::UInt64 + """ + Number of rows / number of nodes in grid + """ n::Ti + """ + Number of columns / number of nodes in grid (only works for square matrices) + """ m::Ti @@ -52,7 +103,7 @@ function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) w grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid)) lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt] - ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m) + ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m) end @@ -206,8 +257,8 @@ end #------------------------------------ function reset!(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer} - A.cscmatrix = spzeros(Tv, Ti, num_nodes(A.grid), num_nodes(A.grid)) - A.lnkmatrices = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt] + A.cscmatrix = spzeros(Tv, Ti, A.n, A.m) + A.lnkmatrices = [SuperSparseMatrixLNK{Tv, Ti}(A.n, A.nnts[tid]) for tid=1:A.nt] end function nnz_flush(ext::ExtendableSparseMatrixParallel) diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl index 1b8fc48..3169d4b 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl @@ -16,7 +16,7 @@ function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=tr end end A.phash = phash(A.cscmatrix) - A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt] + A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(A.n, A.nnts[tid]) for tid=1:A.nt] end diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl index ae52f60..00b397d 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl @@ -43,16 +43,17 @@ mutable struct SuperSparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{T rowval::Vector{Ti} """ - Nonzero entry values correspondin to each pair + Nonzero entry values corresponding to each pair (colptr[index],rowval[index]) - Initial length is n, it grows with each new entry. + Initial length is n, it grows with each new entry. """ nzval::Vector{Tv} - + collnk::Vector{Ti} + # counts the number of columns in use colctr::Ti end From 6d182951d16b4e14b30012ce26a2e8692f9e431a Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Sun, 12 May 2024 12:21:45 +0200 Subject: [PATCH 14/44] minor change: no globalcounter ouutput in constructor anymore --- .../ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl | 2 +- src/matrix/ExtendableSparseMatrixParallel/preparatory.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl index 5126f8d..737660d 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -100,7 +100,7 @@ end function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) where {Tv, Ti <: Integer} - grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) + grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid)) lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt] ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m) diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl index e73c7d1..6e9eee3 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl @@ -43,7 +43,7 @@ function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, ) - return grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, adepth + return grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, adepth end """ From 01cc1ee8037230c34a02b59cadc837cb18b428c2 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Mon, 13 May 2024 04:14:40 +0200 Subject: [PATCH 15/44] remove grid dependency of ExtendableSparse.jl completely + minor restructuring --- Project.toml | 1 - src/ExtendableSparse.jl | 3 +- .../ExtendableSparseParallel.jl | 123 +++++++--- .../preparatory.jl | 222 ++++++++++++------ .../supersparse.jl | 204 ++++++++-------- src/matrix/extendable.jl | 9 + test/rect.jl | 182 ++++++++++++++ 7 files changed, 540 insertions(+), 204 deletions(-) create mode 100644 test/rect.jl diff --git a/Project.toml b/Project.toml index 8776054..36d1e03 100644 --- a/Project.toml +++ b/Project.toml @@ -6,7 +6,6 @@ version = "1.4.0" [deps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8" ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 9c490ca..f927622 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -6,7 +6,6 @@ using ILUZero using Metis using Base.Threads -using ExtendableGrids if !isdefined(Base, :get_extension) using Requires @@ -28,7 +27,7 @@ include("matrix/sparsematrixcsc.jl") include("matrix/sparsematrixlnk.jl") include("matrix/extendable.jl") -export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse +export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset! export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl index 737660d..08268ee 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -13,12 +13,6 @@ mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSpar """ lnkmatrices::Vector{SuperSparseMatrixLNK{Tv, Ti}} - """ - this is the grid on which the pde lives - (We do not want this dependency) - """ - #grid::ExtendableGrid - """ Number of Nodes per Threads """ @@ -98,52 +92,57 @@ mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSpar end +""" +$(SIGNATURES) -function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) where {Tv, Ti <: Integer} - grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1) - csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid)) - lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt] +`ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct = true) where {Tv, Ti <: Integer}` + +Create an ExtendableSparseMatrixParallel based on a grid. +The grid is specified by nc (number of cells), nn (number of nodes) and the `mat_cell_node` (i.e. grid[CellNodes] if ExtendableGrids is used). +Here, `mat_cell_node[k,i]` is the i-th node in the k-th cell. +The matrix structure is made for parallel computations with `nt` threads. +`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again +`block_struct=true` means, the matrix should be reordered two have a block structure, this is necessary for parallel ILU, for `false`, the matrix is not reordered +""" +function ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct = true) where {Tv, Ti <: Integer} + nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth, Ti; block_struct) + csc = spzeros(Tv, Ti, nn, nn) + lnk = [SuperSparseMatrixLNK{Tv, Ti}(nn, nnts[tid]) for tid=1:nt] ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m) end +""" +$(SIGNATURES) -function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer} - if known_that_unknown - A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v - return - end - - if updatentryCSC2!(A.cscmatrix, i, j, v) - else - A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v - end -end - +`addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}` -#= -function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer} +`A[i,j] += v` +This function should be used, if the thread in which the entry appears is known (`tid`). +If the thread is not known, use `addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false)`, this function calculates `tid`. +If you know that the entry is not yet known to the CSC structure, set `known_that_unknown=true`. +""" +function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer} if known_that_unknown - level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v return end if updatentryCSC2!(A.cscmatrix, i, j, v) else - level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v end end -=# """ -`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=true) where {Tv, Ti <: Integer}` +$(SIGNATURES) + +`addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer}` A[i,j] += v, using any partition. If the partition should be specified (for parallel use), use -`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=true) where {Tv, Ti <: Integer}`. +`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}`. """ function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer} if known_that_unknown @@ -161,7 +160,13 @@ end #--------------------------------- +""" +$(SIGNATURES) +`updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j) where {Tv, Ti <: Integer` +Update element of the matrix with operation `op`. +Use this method if the 'thread of the element' is not known, otherwise use `updateindex!(ext, op, v, i, j, tid)`. +""" function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, @@ -178,6 +183,13 @@ function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, ext end +""" +$(SIGNATURES) +`updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j, tid) where {Tv, Ti <: Integer` + +Update element of the matrix with operation `op`. +Use this method if the 'thread of the element' is known, otherwise use `updateindex!(ext, op, v, i, j)`. +""" function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, @@ -194,6 +206,13 @@ function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, ext end +""" +$(SIGNATURES) +`rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j) where {Tv, Ti <: Integer}` + +Like [`updateindex!`](@ref) but without checking if v is zero. +Use this method if the 'thread of the element' is not known. +""" function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, @@ -209,6 +228,13 @@ function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, ext end +""" +$(SIGNATURES) +`rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j, tid) where {Tv, Ti <: Integer}` + +Like [`updateindex!`](@ref) but without checking if v is zero. +Use this method if the 'thread of the element' is known +""" function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, @@ -224,6 +250,13 @@ function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, ext end +""" +$(SIGNATURES) +``Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti}, i::Integer, j::Integer) where {Tv, Ti <: Integer` + +Find index in CSC matrix and return value, if it exists. +Otherwise, return value from extension. +""" function Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti}, i::Integer, j::Integer) where {Tv, Ti <: Integer} @@ -237,6 +270,13 @@ function Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti}, end +""" +$(SIGNATURES) +`Base.setindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, v::Union{Number,AbstractVecOrMat}, i::Integer, j::Integer) where {Tv, Ti}` + +Find index in CSC matrix and set value if it exists. Otherwise, +set index in extension if `v` is nonzero. +""" function Base.setindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, v::Union{Number,AbstractVecOrMat}, i::Integer, @@ -256,16 +296,31 @@ end #------------------------------------ +""" +$(SIGNATURES) + +Reset matrix, such that CSC and LNK have no non-zero entries. +""" function reset!(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer} A.cscmatrix = spzeros(Tv, Ti, A.n, A.m) A.lnkmatrices = [SuperSparseMatrixLNK{Tv, Ti}(A.n, A.nnts[tid]) for tid=1:A.nt] end +""" +$(SIGNATURES) + +Compute number of non-zero elements, after flush. +""" function nnz_flush(ext::ExtendableSparseMatrixParallel) flush!(ext) return nnz(ext.cscmatrix) end +""" +$(SIGNATURES) + +Compute number of non-zero elements, without flush. +""" function nnz_noflush(ext::ExtendableSparseMatrixParallel) return nnz(ext.cscmatrix), sum([ext.lnkmatrices[i].nnz for i=1:ext.nt]) end @@ -279,7 +334,11 @@ function matrixvaluetype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, T end +""" +$(SIGNATURES) +Show matrix, without flushing +""" function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrixParallel) #flush!(ext) xnnzCSC, xnnzLNK = nnz_noflush(ext) @@ -321,7 +380,11 @@ function entryexists2(CSC, i, j) #find out if CSC already has an nonzero entry a i in view(CSC.rowval, CSC.colptr[j]:(CSC.colptr[j+1]-1)) end +""" +$(SIGNATURES) +Find out if i,j is non-zero entry in CSC, if yes, update entry with += v and return `true`, if not return `false` +""" function updatentryCSC2!(CSC::SparseArrays.SparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer, v) where {Tv, Ti <: Integer} p1 = CSC.colptr[j] p2 = CSC.colptr[j+1]-1 @@ -347,6 +410,7 @@ include("struct_flush.jl") import LinearAlgebra.mul! """ +$(SIGNATURES) ```function LinearAlgebra.mul!(y, A, x)``` This overwrites the mul! function for A::ExtendableSparseMatrixParallel @@ -361,6 +425,7 @@ end """ +$(SIGNATURES) ```function matvec!(y, A, x)``` y <- A*x, where y and x are vectors and A is an ExtendableSparseMatrixParallel diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl index 6e9eee3..fe5686f 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl @@ -1,5 +1,5 @@ """ -`function preparatory_multi_ps_less_reverse(nm, nt, depth)` +`function preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth)` `nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid). `nt` is the number of threads. @@ -8,13 +8,15 @@ To assemble the system matrix parallely, things such as `cellsforpart` (= which This should be somewhere else, longterm """ -function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, assembly=:cellwise, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false) - grid = getgrid(nm; x0, x1) +function preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth, Ti; + sequential=false, assembly=:cellwise, + minsize_sepa=10, do_print=false, check_partition=false, ne=0, ce=[], mat_edge_node=[], block_struct=true) + #grid = getgrid(nm; x0, x1) adepth = 0 if sequential - (allcells, start, cellparts, adepth) = grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa, do_print)#) + (allcells, start, cellparts, adepth) = grid_to_graph_cellwise_nogrid!(mat_cell_node, nc, nn, nt, depth; minsize_sepa, do_print)#) else - (allcells, start, cellparts, adepth) = grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa, do_print) + (allcells, start, cellparts, adepth) = grid_to_graph_cellwise_par_nogrid!(mat_cell_node, nc, nn, nt, depth; minsize_sepa, do_print) end if (adepth != depth) && do_print @@ -26,26 +28,32 @@ function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, cfp = bettercellsforpart(cellparts, depth*nt+1) else - edgeparts = edgewise_partition_from_cellwise_partition(grid, cellparts) + edgeparts = edgewise_partition_from_cellwise_partition(nc, ne, ce, cellparts) cfp = bettercellsforpart(edgeparts, depth*nt+1) end if check_partition - validate_partition(grid, cellparts, start, allcells, nt, depth, assembly) + if assembly == :cellwise + validate_partition(nn, mat_cell_node, cellparts, start, allcells, nt, depth, assembly) + else + validate_partition(nn, mat_edge_node, cellparts, start, allcells, nt, depth, assembly) + end end #@info length.(cfp) #@info minimum(cellparts), maximum(cellparts), nt, depth - (nnts, s, onr, gi, gc, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush( - cellparts, allcells, start, num_nodes(grid), Ti, nt, depth + (nnts, s, onr, gi, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush( + cellparts, allcells, start, nn, Ti, nt, depth; block_struct ) - return grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, adepth + return nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, adepth end + + """ `function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)` @@ -56,8 +64,9 @@ Furthermore, `nnts` (number of nodes of the threads) is computed, which contain `nn` is the number of nodes in the grid. `Ti` is the type (Int64,...) of the elements in the created arrays. `nt` is the number of threads. +`block_struct=true` means, the matrix should be reordered two have a block structure, this is necessary for parallel ILU, for `false`, the matrix is not reordered """ -function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt, depth) +function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt, depth; block_struct = true) #num_matrices = maximum(cellregs) #depth = Int(floor((num_matrices-1)/nt)) @@ -119,6 +128,12 @@ function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_r end starts .+= 1 + if !block_struct + new_indices = collect(1:nn) + rev_new_indices = collect(1:nn) + starts = [] + end + # Build sortednodesperthread and globalindices array: # They are inverses of each other: globalindices[tid][sortednodeperthread[tid][j]] = j # Note that j has to be a `new index` @@ -147,7 +162,7 @@ function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_r end end - nnts, sortednodesperthread, old_noderegions, globalindices, gictrs, new_indices, rev_new_indices, starts + nnts, sortednodesperthread, old_noderegions, globalindices, new_indices, rev_new_indices, starts end @@ -165,7 +180,7 @@ This function partitons the separator, which is done if `depth`>1 (see `grid_to_ `level0` is the separator-partitoning level, if the (first) separator is partitioned, level0 = 1, in the next iteration, level0 = 2... `preparatory_multi_ps` is the number of separator-cells. """ -function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes, ri, gi, do_print) +function separate!(cellregs, ACSC, nt, level0, ctr_sepanodes, ri, gi, do_print) # current number of cells treated nc2 = size(ACSC, 1) @@ -236,34 +251,36 @@ function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes, ri, gi, do_pri end + """ -`function grid_to_graph_ps_multi!(grid, nt, depth)` +`function grid_to_graph_ps_multi_nogrid!(nc, nn, mat_cell_node, nt, depth)` The function assigns colors/partitons to each cell in the `grid`. First, the grid is partitoned into `nt` partitions. If `depth` > 1, the separator is partitioned again... -`grid` is a simplexgrid. +The grid is specified by nc (number of cells), nn (number of nodes) and the mat_cell_node (i.e. grid[CellNodes] if ExtendableGrids is used). +Here, `mat_cell_node[k,i]` is the i-th node in the k-th cell. `nt` is the number of threads. `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... """ -function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=false) - A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) - number_cells_per_node = zeros(Int64, num_nodes(grid)) - for j=1:num_cells(grid) - for node_id in grid[CellNodes][:,j] +function grid_to_graph_cellwise_nogrid!(nc, nn, mat_cell_node, nt, depth; minsize_sepa=10, do_print=false) + A = SparseMatrixLNK{Int64, Int64}(nc, nc) + number_cells_per_node = zeros(Int64, nn) + for j=1:nc + for node_id in mat_cell_node[:,j] number_cells_per_node[node_id] += 1 end end allcells = zeros(Int64, sum(number_cells_per_node)) - start = ones(Int64, num_nodes(grid)+1) + start = ones(Int64, nn+1) start[2:end] += cumsum(number_cells_per_node) number_cells_per_node .= 0 - for j=1:num_cells(grid) - for node_id in grid[CellNodes][:,j] + for j=1:nc + for node_id in mat_cell_node[:,j] allcells[start[node_id] + number_cells_per_node[node_id]] = j number_cells_per_node[node_id] += 1 end end - for j=1:num_nodes(grid) + for j=1:nn cells = @view allcells[start[j]:start[j+1]-1] for (i,id1) in enumerate(cells) for id2 in cells[i+1:end] @@ -278,12 +295,12 @@ function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=fals partition = Metis.partition(ACSC, nt) cellregs = copy(partition) - sn = Vector{Int64}(undef, num_cells(grid)) - gi = Vector{Int64}(undef, num_cells(grid)) + sn = Vector{Int64}(undef, nc) + gi = Vector{Int64}(undef, nc) ctr_sepanodes = 0 for tid=1:nt - for j=1:num_cells(grid) + for j=1:nc if cellregs[j] == tid rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) if how_many_different_below(cellregs[rows], nt+1) > 1 @@ -306,7 +323,7 @@ function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=fals RART = copy(ACSC) actual_depth = 1 for level=1:depth-1 - RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print) actual_depth += 1 if ctr_sepanodes < minsize_sepa break @@ -316,13 +333,18 @@ function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=fals return allcells, start, cellregs, actual_depth end -function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print=false) - As = [ExtendableSparseMatrix{Int64, Int64}(num_cells(grid), num_cells(grid)) for tid=1:nt] - number_cells_per_node = zeros(Int64, num_nodes(grid)) + +""" +`function grid_to_graph_ps_multi_par_nogrid!(nc, nn, mat_cell_node, nt, depth)` + +Same result as `grid_to_graph_ps_multi_nogrid!`, but computed on multiple threads. +""" +function grid_to_graph_cellwise_par_nogrid!(cn, nc, nn, nt, depth; minsize_sepa=10, do_print=false) + As = [ExtendableSparseMatrix{Int64, Int64}(nc, nc) for tid=1:nt] + number_cells_per_node = zeros(Int64, nn) - cn = grid[CellNodes] - for j=1:num_cells(grid) + for j=1:nc tmp = view(cn, :, j) for node_id in tmp number_cells_per_node[node_id] += 1 @@ -331,11 +353,11 @@ function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print= allcells = zeros(Int64, sum(number_cells_per_node)) - start = ones(Int64, num_nodes(grid)+1) + start = ones(Int64, nn+1) start[2:end] += cumsum(number_cells_per_node) number_cells_per_node .= 0 - for j=1:num_cells(grid) + for j=1:nc tmp = view(cn, :, j) for node_id in tmp allcells[start[node_id] + number_cells_per_node[node_id]] = j @@ -343,7 +365,7 @@ function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print= end end - node_range = get_starts(num_nodes(grid), nt) + node_range = get_starts(nn, nt) Threads.@threads for tid=1:nt for j in node_range[tid]:node_range[tid+1]-1 cells = @view allcells[start[j]:start[j+1]-1] @@ -363,11 +385,11 @@ function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print= cellregs = Metis.partition(ACSC, nt) - sn = [Vector{Int64}(undef, Int(ceil(num_cells(grid)/nt))) for tid=1:nt] + sn = [Vector{Int64}(undef, Int(ceil(nc/nt))) for tid=1:nt] ctr_sepanodess = zeros(Int64, nt) @threads for tid=1:nt - for j=1:num_cells(grid) + for j=1:nc if cellregs[j] == tid rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) if how_many_different_below(cellregs[rows], nt+1) > 1 @@ -393,7 +415,7 @@ function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print= RART = ACSC actual_depth = 1 for level=1:depth-1 - RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print) actual_depth += 1 if ctr_sepanodes < minsize_sepa break @@ -405,6 +427,80 @@ function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print= return allcells, start, cellregs, actual_depth end +""" +function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=false) + A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) + number_cells_per_node = zeros(Int64, num_nodes(grid)) + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + number_cells_per_node[node_id] += 1 + end + end + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + for (i,id1) in enumerate(cells) + for id2 in cells[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + ACSC = SparseArrays.SparseMatrixCSC(A) + + partition = Metis.partition(ACSC, nt) + cellregs = copy(partition) + + sn = Vector{Int64}(undef, num_cells(grid)) + gi = Vector{Int64}(undef, num_cells(grid)) + ctr_sepanodes = 0 + + for tid=1:nt + for j=1:num_cells(grid) + if cellregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(cellregs[rows], nt+1) > 1 + cellregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodes += 1 + sn[ctr_sepanodes] = j + gi[ctr_sepanodes] = j + end + end + end + end + + sn = sn[1:ctr_sepanodes] + gi = gi[1:ctr_sepanodes] + + if do_print + @info "At level (1), we found ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART = copy(ACSC) + actual_depth = 1 + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end + end + + return allcells, start, cellregs, actual_depth +end + + function grid_to_graph_edgewise!(grid, nt, depth; minsize_sepa=10, do_print=false) ce = grid[CellEdges] A = SparseMatrixLNK{Int64, Int64}(num_edges(grid), num_edges(grid)) @@ -465,13 +561,13 @@ function grid_to_graph_edgewise!(grid, nt, depth; minsize_sepa=10, do_print=fals gi = gi[1:ctr_sepanodes] if do_print - @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!" + @info "At level (1), we found ctr_sepanodes cells that have to be treated in the next iteration!" end RART = copy(ACSC) actual_depth = 1 for level=1:depth-1 - RART, ctr_sepanodes, sn, gi = separate!(edgeregs, num_edges(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + RART, ctr_sepanodes, sn, gi = separate!(edgeregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print) actual_depth += 1 if ctr_sepanodes < minsize_sepa break @@ -555,13 +651,13 @@ function grid_to_graph_edgewise_par!(grid, nt, depth; minsize_sepa=10, do_print= gi = copy(sn) if do_print - @info "At level $(1), we found $ctr_sepanodes edges that have to be treated in the next iteration!" + @info "At level (1), we found ctr_sepanodes edges that have to be treated in the next iteration!" end RART = ACSC actual_depth = 1 for level=1:depth-1 - RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print) + RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print) actual_depth += 1 if ctr_sepanodes < minsize_sepa break @@ -572,17 +668,13 @@ function grid_to_graph_edgewise_par!(grid, nt, depth; minsize_sepa=10, do_print= #grid return alledges, start, cellregs, actual_depth end +""" +function edgewise_partition_from_cellwise_partition(nc, ne, ce, cellregs) + #ce = grid[CellEdges] + edgeregs = maximum(cellregs)*ones(Int64, ne) -function edgewise_partition_from_cellwise_partition(grid, cellregs) - ce = grid[CellEdges] - if num_edges(grid) == 0 - grid[EdgeNodes] - end - - edgeregs = maximum(cellregs)*ones(Int64, num_edges(grid)) - - for icell=1:num_cells(grid) + for icell=1:nc tmp = cellregs[icell] for iedge in ce[:,icell] if tmp < edgeregs[iedge] @@ -656,27 +748,7 @@ function bettercellsforpart(xx, upper) cfp end -""" -`function getgrid(nm)` -Returns a simplexgrid with a given number of nodes in each dimension. -`nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid). -""" -function getgrid(nm; x0=0.0, x1=1.0) - if length(nm) == 2 - n,m = nm - xx = collect(LinRange(x0, x1, n)) - yy = collect(LinRange(x0, x1, m)) - grid = simplexgrid(xx, yy) - else - n,m,l = nm - xx = collect(LinRange(x0, x1, n)) - yy = collect(LinRange(x0, x1, m)) - zz = collect(LinRange(x0, x1, l)) - grid = simplexgrid(xx, yy, zz) - end - grid -end function get_starts(n, nt) ret = ones(Int64, nt+1) @@ -833,7 +905,7 @@ function check_partition(nm, nt, depth) end =# -function validate_partition(grid, cellregs, start, allcells, nt, depth, assemblytype) +function validate_partition(nn, mat, cellregs, start, allcells, nt, depth, assemblytype) violation_ctr = 0 if assemblytype == :cellwise @@ -842,7 +914,7 @@ function validate_partition(grid, cellregs, start, allcells, nt, depth, assembly key = EdgeNodes end - for j=1:num_nodes(grid) + for j=1:nn cells = @view allcells[start[j]:start[j+1]-1] sortedcellregs = unique(sort(cellregs[cells])) levels = Int.(ceil.(sortedcellregs/nt)) @@ -860,7 +932,7 @@ function validate_partition(grid, cellregs, start, allcells, nt, depth, assembly loc = findall(x->x==4, Int.(ceil.(cellregs[allcells[start[j]:start[j+1]-1]]/nt))) cells_at_level4 = allcells[loc.+(start[j]-1)] @info cells_at_level4, cellregs[cells_at_level4] - @info grid[key][:,cells_at_level4[1]], grid[key][:,cells_at_level4[2]] + @info mat[:,cells_at_level4[1]], mat[:,cells_at_level4[2]] end end end diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl index 00b397d..4004b0d 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl @@ -399,103 +399,13 @@ function print_col(col, coll) @info v end -function plus(lnk::SparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} - if lnk.nnz == 0 - return csc - elseif length(csc.rowval) == 0 - return SparseMatrixCSC(lnk) - else - return lnk + csc - end -end - -function plus(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} - gi = collect(1:csc.n) - - - supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]] - sortedcolumnids = sortperm(supersparsecolumns) - sortedcolumns = supersparsecolumns[sortedcolumnids] - #sortedcolumns = vcat([1], sortedcolumns) - sortedcolumns = vcat(sortedcolumns, [csc.n+1]) - - col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m] - - #@info sortedcolumnids - - nnz_sum = length(csc.rowval) + lnk.nnz - colptr = Vector{Ti}(undef, csc.n+1) - rowval = Vector{Ti}(undef, nnz_sum) - nzval = Vector{Tv}(undef, nnz_sum) - colptr[1] = one(Ti) - - #first part: columns between 1 and first column of lnk - - colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) - rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) - nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) - - numshifts = 0 - - for J=1:length(sortedcolumns)-1 - #@info ">>>>>>> $J <<<<<<<<<<<<<<<" - # insert new added column here / dummy - i = sortedcolumns[J] - coll = get_column!(col, lnk, i) - #print_col(col, coll) - - nns = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1) - - numshifts += nns - #j = colptr[i] #sortedcolumns[J]] - #rowval[j] = J - #nzval[j] = J - # insertion end - - #colptr[i+1] = colptr[i] + csc.colptr[i+1]-csc.colptr[i] + numshifts - - #a = i+1 - #b = sortedcolumns[J+1] - #@info a, b - - - #colptr[i+1:sortedcolumns[J+1]] = (csc.colptr[i+1:sortedcolumns[J+1]]-csc.colptr[i:sortedcolumns[J+1]-1]).+(colptr[i] + nns) - - colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns) - - - rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) - nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) - - - #= - - @info csc.colptr[a:b] - - colptr[a:b] = csc.colptr[a:b].+numshifts - - #colptr[i+2:sortedcolumns[J+1]] = csc.colptr[i+2:sortedcolumns[J+1]].+numshifts - @info i, J, colptr[i+2], colptr[sortedcolumns[J+1]], csc.colptr[i+2], csc.colptr[sortedcolumns[J+1]] - @info i, J, colptr[a], colptr[b], csc.colptr[a], csc.colptr[b] - rowval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.rowval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]]) - nzval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.nzval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]]) - #rowval[colptrsortedcolumns[J+1]] - =# - end - - #@info colptr - - resize!(rowval, length(csc.rowval)+numshifts) - resize!(nzval, length(csc.rowval)+numshifts) - - - SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) - - - -end +""" +$(SIGNATURES) +Add the matrices `lnks` of type SuperSparseMatrixLNK onto the SparseMatrixCSC `csc`. +`gi[i]` maps the indices in `lnks[i]` to the indices of `csc`. +""" function plus_remap(lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Vector{Ti}}; keep_zeros=true) where {Tv, Ti <: Integer} nt = length(lnks) @@ -605,7 +515,12 @@ function plus_remap(lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, csc::SparseArray end +""" +$(SIGNATURES) +Add the SuperSparseMatrixLNK `lnk` onto the SparseMatrixCSC `csc`. +`gi` maps the indices in `lnk` to the indices of `csc`. +""" function plus_remap(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Ti}) where {Tv, Ti <: Integer} #@info lnk.collnk[1:lnk.colctr] @@ -677,7 +592,103 @@ function plus_remap(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseM end +""" + +function plus(lnk::SparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} + if lnk.nnz == 0 + return csc + elseif length(csc.rowval) == 0 + return SparseMatrixCSC(lnk) + else + return lnk + csc + end +end + +function plus(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} + gi = collect(1:csc.n) + + + supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]] + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + #sortedcolumns = vcat([1], sortedcolumns) + sortedcolumns = vcat(sortedcolumns, [csc.n+1]) + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m] + + #@info sortedcolumnids + + nnz_sum = length(csc.rowval) + lnk.nnz + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + #first part: columns between 1 and first column of lnk + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + numshifts = 0 + + for J=1:length(sortedcolumns)-1 + #@info ">>>>>>> J <<<<<<<<<<<<<<<" + # insert new added column here / dummy + i = sortedcolumns[J] + coll = get_column!(col, lnk, i) + #print_col(col, coll) + + nns = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1) + + numshifts += nns + #j = colptr[i] #sortedcolumns[J]] + #rowval[j] = J + #nzval[j] = J + # insertion end + + #colptr[i+1] = colptr[i] + csc.colptr[i+1]-csc.colptr[i] + numshifts + + #a = i+1 + #b = sortedcolumns[J+1] + #@info a, b + + + #colptr[i+1:sortedcolumns[J+1]] = (csc.colptr[i+1:sortedcolumns[J+1]]-csc.colptr[i:sortedcolumns[J+1]-1]).+(colptr[i] + nns) + + colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns) + + + rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + + + #= + + @info csc.colptr[a:b] + + colptr[a:b] = csc.colptr[a:b].+numshifts + + #colptr[i+2:sortedcolumns[J+1]] = csc.colptr[i+2:sortedcolumns[J+1]].+numshifts + @info i, J, colptr[i+2], colptr[sortedcolumns[J+1]], csc.colptr[i+2], csc.colptr[sortedcolumns[J+1]] + @info i, J, colptr[a], colptr[b], csc.colptr[a], csc.colptr[b] + rowval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.rowval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]]) + nzval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.nzval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]]) + #rowval[colptrsortedcolumns[J+1]] + =# + end + + #@info colptr + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + + +end function plus_loop(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} gi = collect(1:csc.n) @@ -738,7 +749,6 @@ function plus_loop(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMa end - function twodisjointsets(n, k) A = rand(1:n, k) B = zeros(Int64, k) @@ -767,7 +777,7 @@ function distinct(x, n) end y end - +""" function mean(x) sum(x)/length(x) diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl index 37cc015..abcd04a 100644 --- a/src/matrix/extendable.jl +++ b/src/matrix/extendable.jl @@ -313,6 +313,15 @@ function flush!(ext::ExtendableSparseMatrix) end return ext end +""" +$(SIGNATURES) + +Reset ExtenableSparseMatrix into state similar to that after creation. +""" +function reset!(A::ExtendableSparseMatrix) + A.cscmatrix=spzeros(size(A)...) + A.lnkmatrix=nothing +end """ $(SIGNATURES) diff --git a/test/rect.jl b/test/rect.jl new file mode 100644 index 0000000..c502fc6 --- /dev/null +++ b/test/rect.jl @@ -0,0 +1,182 @@ +""" +`test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)` + +Measure and output times for build and update for a rectangle grid with `n * n` cells. +Calculations are done on `nt` threads (`nt` >= 1). +Returns the assembled matrix. +""" +function test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10) + m = n + lindexes = LinearIndices((1:n,1:m)) + mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti) + if nt > 1 + A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false) + else + A = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m) + end + + X = collect(1:n) #LinRange(0,1,n) + Y = collect(1:n) #LinRange(0,1,m) + + #Build + times_build = zeros(k) + for i=1:k + ExtendableSparse.reset!(A) + times_build[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false) + end + + + + #update + times_update = zeros(k) + for i=1:k + times_update[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=true) + end + + @info "TIMES: MIN, AVG, MAX" + info_minmax(times_build, "build ") + info_minmax(times_update, "update") + + A +end + +""" +`generate_rectangle_grid(lindexes, Ti)` + +Generate a rectangle grid (i.e. a CellNodes matrix) based on LinerIndices +""" +function generate_rectangle_grid(lindexes, Ti) + n,m = size(lindexes) + nn = n*m # num nodes + nc = (n-1)*(m-1) + #lindexes=LinearIndices((1:n,1:m)) + + mat_cell_node = zeros(Ti, 4, nc) + + # links oben, rechts oben, rechts unten, links unten + cell_id = 1 + for ir in 1:n-1 + for jr in 1:m-1 + mat_cell_node[1,cell_id] = lindexes[ir,jr] + mat_cell_node[2,cell_id] = lindexes[ir,jr+1] + mat_cell_node[3,cell_id] = lindexes[ir+1,jr+1] + mat_cell_node[4,cell_id] = lindexes[ir+1,jr] + cell_id += 1 + end + end + + + mat_cell_node, nc, nn + +end + +function info_minmax(x, name; digits=3) + n = length(x) + @info name*" $(round(minimum(x),digits=digits)), $(round(sum(x)/n,digits=digits)), $(round(maximum(x),digits=digits))" +end + +""" +Assembly functions for ExtendableSparseMatrixParallel +""" +function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell_node, X, Y; d=0.1, set_CSC_zero=true) where {Tv, Ti <: Integer} + if set_CSC_zero + A.cscmatrix.nzval .= 0 + end + + for level=1:A.depth + Threads.@threads for tid=1:A.nt + for cell in A.cellsforpart[(level-1)*A.nt+tid] + assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, tid) + end + end + end + + for cell in A.cellsforpart[A.depth*A.nt+1] + assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, 1) + end + + nnzCSC, nnzLNK = ExtendableSparse.nnz_noflush(A) + if nnzCSC > 0 && nnzLNK > 0 + flush!(A; do_dense=false) + #sparse flush + elseif nnzCSC == 0 && nnzLNK > 0 + flush!(A; do_dense=true) + #dense flush + end +end + +function assembleedge!(A::ExtendableSparseMatrixParallel{Tv, Ti},v,k,l,tid) where {Tv, Ti <: Integer} + ExtendableSparse.addtoentry!(A, k, k, tid, +v) + ExtendableSparse.addtoentry!(A, k, l, tid, -v) + ExtendableSparse.addtoentry!(A, l, k, tid, -v) + ExtendableSparse.addtoentry!(A, l, l, tid, +v) +end + +function assemblecell!(A::ExtendableSparseMatrixParallel{Tv, Ti},n,m,mcn,X,Y,d,cell,tid) where {Tv, Ti <: Integer} + ij00=mcn[1,cell] + ij10=mcn[2,cell] + ij11=mcn[3,cell] + ij01=mcn[4,cell] + + ix = (cell-1)%n+1 + iy = Int64(ceil(cell/n)) + + hx=X[ix+1]-X[ix] + hy=Y[iy+1]-Y[iy] + + assembleedge!(A,0.5*hx/hy,ij00,ij01,tid) + assembleedge!(A,0.5*hx/hy,ij10,ij11,tid) + assembleedge!(A,0.5*hy/hx,ij00,ij10,tid) + assembleedge!(A,0.5*hy/hx,ij01,ij11,tid) + v=0.25*hx*hy + ExtendableSparse.addtoentry!(A, ij00, ij00, tid, v*d) + ExtendableSparse.addtoentry!(A, ij01, ij01, tid, v*d) + ExtendableSparse.addtoentry!(A, ij10, ij10, tid, v*d) + ExtendableSparse.addtoentry!(A, ij11, ij11, tid, v*d) +end + + + +""" +Assembly functions for ExtendableSparseMatrix +""" +function assemble_ESMP(A::ExtendableSparseMatrix{Tv, Ti}, n, m, mat_cell_node, X, Y; d=0.1, set_CSC_zero=true) where {Tv, Ti <: Integer} + if set_CSC_zero + A.cscmatrix.nzval .= 0 + end + nc = size(mat_cell_node,2) + for cell=1:nc + assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell) + end + ExtendableSparse.flush!(A) +end + +function assembleedge!(A::ExtendableSparseMatrix{Tv, Ti},v,k,l) where {Tv, Ti <: Integer} + A[k,k]+=v + A[k,l]-=v + A[l,k]-=v + A[l,l]+=v +end + +function assemblecell!(A::ExtendableSparseMatrix{Tv, Ti},n,m,mcn,X,Y,d,cell) where {Tv, Ti <: Integer} + ij00=mcn[1,cell] + ij10=mcn[2,cell] + ij11=mcn[3,cell] + ij01=mcn[4,cell] + + ix = (cell-1)%n+1 + iy = Int64(ceil(cell/n)) + + hx=X[ix+1]-X[ix] + hy=Y[iy+1]-Y[iy] + + assembleedge!(A,0.5*hx/hy,ij00,ij01) + assembleedge!(A,0.5*hx/hy,ij10,ij11) + assembleedge!(A,0.5*hy/hx,ij00,ij10) + assembleedge!(A,0.5*hy/hx,ij01,ij11) + v=0.25*hx*hy + A[ij00,ij00]+=v*d + A[ij01,ij01]+=v*d + A[ij10,ij10]+=v*d + A[ij11,ij11]+=v*d +end \ No newline at end of file From 8d56f995ab51ad41043878d1bf716196be3cc773 Mon Sep 17 00:00:00 2001 From: Johannes Taraz Date: Tue, 14 May 2024 19:12:47 +0200 Subject: [PATCH 16/44] add comment on collnk --- src/matrix/ExtendableSparseMatrixParallel/supersparse.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl index 4004b0d..ece23bd 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl @@ -50,7 +50,9 @@ mutable struct SuperSparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{T """ nzval::Vector{Tv} - + """ + (Unsorted) list of all columns with non-zero entries + """ collnk::Vector{Ti} # counts the number of columns in use From 1fa752d739a18a629a67f2fd70d30c1c8a0a3fb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Mon, 13 May 2024 21:44:24 +0200 Subject: [PATCH 17/44] some better explanation of parallel test tools --- test/parallel_testtools.jl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/parallel_testtools.jl b/test/parallel_testtools.jl index 2a92521..126edf0 100644 --- a/test/parallel_testtools.jl +++ b/test/parallel_testtools.jl @@ -1,4 +1,4 @@ -using ChunkSplitters +import ChunkSplitters # Methods to test parallel assembly # Will eventually become part of the package. @@ -9,12 +9,12 @@ Return colored partitioing of grid made up by `X` and `Y` for work with `max(nt as a vector `p` of a vector pairs of index ranges such that `p[i]` containes partions of color i which can be assembled independently. -The current algorithm +The current algorithm creates `nt^2` partitions with `nt` colors. """ function part2d(X,Y, nt) nt=max(4,nt) - XP=collect(chunks(1:length(X)-1,n=nt)) - YP=collect(chunks(1:length(Y)-1,n=nt)) + XP=collect(ChunkSplitters.chunks(1:length(X)-1,n=nt)) + YP=collect(ChunkSplitters.chunks(1:length(Y)-1,n=nt)) partitions = [Tuple{StepRange{Int64}, StepRange{Int64}}[] for i = 1:nt] ipart=1 col=1 @@ -28,7 +28,12 @@ function part2d(X,Y, nt) partitions end +""" + showgrid(Makie, ColorSchemes, X,Y,nt) +Show grid partitioned according to [`part2d`](@ref). Needs a makie variant and ColorSchemes +to be passed as modules. +""" function showgrid(Makie, ColorSchemes, X,Y,nt) f = Makie.Figure() ax = Makie.Axis(f[1, 1]; aspect = 1) @@ -109,7 +114,7 @@ function assemblepartition!(A,lindexes,X,Y,xp,yp,d) end """ - partassemble!(A,N,np=1;xrange=(0,1),yrange=(0,1), d=0.1) + partassemble!(A,N,nt=1;xrange=(0,1),yrange=(0,1), d=0.1) Partitioned, cellwise, multithreaded assembly of finite difference matrix for ` -Δu + d*u=f` with homogeneous Neumann bc on grid set up by coordinate vectors From 493bec2fd1d987402f538f5bd7dc5083968dbe56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Thu, 16 May 2024 22:52:13 +0200 Subject: [PATCH 18/44] reorganized: put developing stuff into Experimental --- Project.toml | 1 + src/ExtendableSparse.jl | 23 ++----- src/experimental/Experimental.jl | 41 ++++++++++++ .../experimental}/parallel_testtools.jl | 16 ++--- src/factorizations/factorizations.jl | 36 ----------- src/matrix/extendable.jl | 41 +++++++++--- ...t_parallel.jl => experimental_parallel.jl} | 5 +- test/{rect.jl => experimental_rect.jl} | 62 ++++++++++++++++--- 8 files changed, 140 insertions(+), 85 deletions(-) create mode 100644 src/experimental/Experimental.jl rename {test => src/experimental}/parallel_testtools.jl (92%) rename test/{test_parallel.jl => experimental_parallel.jl} (97%) rename test/{rect.jl => experimental_rect.jl} (74%) diff --git a/Project.toml b/Project.toml index 36d1e03..f3d1ef7 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "1.4.0" [deps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" +ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index f927622..5bdc442 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -4,8 +4,6 @@ using LinearAlgebra using Sparspak using ILUZero -using Metis -using Base.Threads if !isdefined(Base, :get_extension) using Requires @@ -31,41 +29,28 @@ export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawup export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet - -#@warn "ESMP!" -include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl") - - - -include("factorizations/ilu_Al-Kurdi_Mittal.jl") -#using .ILUAM -include("factorizations/pilu_Al-Kurdi_Mittal.jl") -#using .PILUAM include("factorizations/factorizations.jl") +include("experimental/Experimental.jl") + include("factorizations/simple_iteration.jl") export simple, simple! include("matrix/sprand.jl") export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark +export rawupdateindex!, updateindex! -export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK -export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light - export JacobiPreconditioner, ILU0Preconditioner, ILUZeroPreconditioner, - ILUAMPreconditioner, - PILUAMPreconditioner, PointBlockILUZeroPreconditioner, ParallelJacobiPreconditioner, ParallelILU0Preconditioner, - BlockPreconditioner,allow_views, - reorderlinsys + BlockPreconditioner,allow_views export AbstractFactorization, LUFactorization, CholeskyFactorization, SparspakLU export issolver diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl new file mode 100644 index 0000000..7108c25 --- /dev/null +++ b/src/experimental/Experimental.jl @@ -0,0 +1,41 @@ +module Experimental +using ExtendableSparse, SparseArrays +import ExtendableSparse: flush!, reset!, rawupdateindex! +using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash +using DocStringExtensions +using Metis +using Base.Threads +using LinearAlgebra + +include(joinpath(@__DIR__, "..", "matrix", "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl")) + +include(joinpath(@__DIR__, "..", "factorizations","ilu_Al-Kurdi_Mittal.jl")) +#using .ILUAM +include(joinpath(@__DIR__, "..", "factorizations","pilu_Al-Kurdi_Mittal.jl")) +#using .PILUAM + +include(joinpath(@__DIR__, "..", "factorizations","iluam.jl")) +include(joinpath(@__DIR__, "..", "factorizations","piluam.jl")) + +@eval begin + @makefrommatrix ILUAMPreconditioner + @makefrommatrix PILUAMPreconditioner +end + +function factorize!(p::PILUAMPreconditioner, A::ExtendableSparseMatrixParallel) + p.A = A + update!(p) + p +end + +export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK +export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, compare_matrices_light +export ILUAMPreconditioner, PILUAMPreconditioner +export reorderlinsys, nnz_noflush + + +include("parallel_testtools.jl") +export part2d, showgrid, partassemble! + +end + diff --git a/test/parallel_testtools.jl b/src/experimental/parallel_testtools.jl similarity index 92% rename from test/parallel_testtools.jl rename to src/experimental/parallel_testtools.jl index 126edf0..a007ff2 100644 --- a/test/parallel_testtools.jl +++ b/src/experimental/parallel_testtools.jl @@ -66,10 +66,10 @@ Assemble edge for finite volume laplacian. Used by [`partassemble!`](@ref). """ function assembleedge!(A,v,k,l) - A[k,k]+=v - A[k,l]-=v - A[l,k]-=v - A[l,l]+=v + rawupdateindex!(A,+,v,k,k) + rawupdateindex!(A,+,-v,k,l) + rawupdateindex!(A,+,-v,l,k) + rawupdateindex!(A,+,v,l,l) end """ @@ -92,10 +92,10 @@ function assemblecell!(A,lindexes,X,Y,i,j,d) assembleedge!(A,0.5*hy/hx,ij00,ij10) assembleedge!(A,0.5*hy/hx,ij01,ij11) v=0.25*hx*hy - A[ij00,ij00]+=v*d - A[ij01,ij01]+=v*d - A[ij10,ij10]+=v*d - A[ij11,ij11]+=v*d + rawupdateindex!(A,+,v*d,ij00,ij00) + rawupdateindex!(A,+,v*d,ij01,ij01) + rawupdateindex!(A,+,v*d,ij10,ij10) + rawupdateindex!(A,+,v*d,ij11,ij11) end """ diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl index 2d56fce..c9809d3 100644 --- a/src/factorizations/factorizations.jl +++ b/src/factorizations/factorizations.jl @@ -75,8 +75,6 @@ end include("ilu0.jl") include("iluzero.jl") -include("iluam.jl") -include("piluam.jl") include("parallel_jacobi.jl") include("parallel_ilu0.jl") include("sparspak.jl") @@ -86,8 +84,6 @@ include("jacobi.jl") @eval begin @makefrommatrix ILU0Preconditioner @makefrommatrix ILUZeroPreconditioner - @makefrommatrix ILUAMPreconditioner - @makefrommatrix PILUAMPreconditioner @makefrommatrix PointBlockILUZeroPreconditioner @makefrommatrix JacobiPreconditioner @makefrommatrix ParallelJacobiPreconditioner @@ -110,40 +106,8 @@ function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix) update!(p) p end - -function factorize!(p::PILUAMPreconditioner, A::ExtendableSparseMatrixParallel) - p.A = A - update!(p) - p -end - -#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel) -# p.A = A -# update!(p) -# p -#end - -#factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel)=factorize!(p,ExtendableSparseMatrix(A.cscmatrix)) - -#factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)=factorize!(p,ExtendableSparseMatrix(A.cscmatrix)) - factorize!(p::AbstractFactorization, A::SparseMatrixCSC)=factorize!(p,ExtendableSparseMatrix(A)) -#function factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel) -# factorize!(p, A) -#end - -#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel) -# factorize!(p, A.cscmatrix) -#end - - -#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix) -# factorize!(p, A.cscmatrix) -#end - - -#factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)=factorize!(p,A) """ ``` diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl index abcd04a..df67dc7 100644 --- a/src/matrix/extendable.jl +++ b/src/matrix/extendable.jl @@ -26,6 +26,29 @@ mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractSparseMatrix phash::UInt64 end +mutable struct Locking + locking::Bool +end + +const locking=Locking(true) + +function with_locking!(l::Bool) + global locking + locking.locking=l +end + +function with_locking() + global locking + locking.locking +end + +mylock(x)=with_locking() ? Base.lock(x) : nothing +myunlock(x)=with_locking() ? Base.unlock(x) : nothing + + +#mylock(x)=nothing +#myunlock(x)=nothing + """ ``` ExtendableSparseMatrix(Tv,Ti,m,n) @@ -57,7 +80,7 @@ ExtendableSparseMatrix(m, n) = ExtendableSparseMatrix{Float64, Int}(m, n) """ $(SIGNATURES) - Create ExtendableSparseMatrix from SparseMatrixCSC +Create ExtendableSparseMatrix from SparseMatrixCSC """ function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} @@ -171,14 +194,14 @@ function updateindex!(ext::ExtendableSparseMatrix{Tv, Ti}, if k > 0 ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) else - lock(ext.lock) + mylock(ext.lock) try if ext.lnkmatrix == nothing ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) end updateindex!(ext.lnkmatrix, op, v, i, j) finally - unlock(ext.lock) + myunlock(ext.lock) end end ext @@ -198,14 +221,14 @@ function rawupdateindex!(ext::ExtendableSparseMatrix{Tv, Ti}, if k > 0 ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) else - lock(ext.lock) + mylock(ext.lock) try if ext.lnkmatrix == nothing ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) end rawupdateindex!(ext.lnkmatrix, op, v, i, j) finally - unlock(ext.lock) + myunlock(ext.lock) end end ext @@ -225,14 +248,14 @@ function Base.setindex!(ext::ExtendableSparseMatrix{Tv, Ti}, if k > 0 ext.cscmatrix.nzval[k] = v else - lock(ext.lock) + mylock(ext.lock) try if ext.lnkmatrix == nothing ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) end ext.lnkmatrix[i, j] = v finally - unlock(ext.lock) + myunlock(ext.lock) end end end @@ -253,11 +276,11 @@ function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti}, return zero(Tv) else v=zero(Tv) - lock(ext.lock) + mylock(ext.lock) try v=ext.lnkmatrix[i, j] finally - unlock(ext.lock) + myunlock(ext.lock) end end end diff --git a/test/test_parallel.jl b/test/experimental_parallel.jl similarity index 97% rename from test/test_parallel.jl rename to test/experimental_parallel.jl index 1fe3f1d..fa42d1d 100644 --- a/test/test_parallel.jl +++ b/test/experimental_parallel.jl @@ -1,10 +1,9 @@ using ExtendableSparse,SparseArrays +using ExtendableSparse.Experimental using DocStringExtensions using BenchmarkTools using Test -include("parallel_testtools.jl") - """ test_correctness_update(N) @@ -99,7 +98,7 @@ end Reset ExtenableSparseMatrix into state similar to that after creation. """ -function reset!(A) +function ExtendableSparse.reset!(A::ExtendableSparseMatrix) A.cscmatrix=spzeros(size(A)...) A.lnkmatrix=nothing end diff --git a/test/rect.jl b/test/experimental_rect.jl similarity index 74% rename from test/rect.jl rename to test/experimental_rect.jl index c502fc6..367e1ac 100644 --- a/test/rect.jl +++ b/test/experimental_rect.jl @@ -1,3 +1,10 @@ +using ExtendableSparse,SparseArrays +using ExtendableSparse.Experimental +using DocStringExtensions +using BenchmarkTools +using Test + + """ `test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)` @@ -40,6 +47,41 @@ function test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10) A end + +function speedup_build_ESMP(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10]) + m = n + lindexes = LinearIndices((1:n,1:m)) + X = collect(1:n) #LinRange(0,1,n) + Y = collect(1:n) #LinRange(0,1,m) + + + ExtendableSparse.with_locking!(false) + A = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m) + t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A)) + ExtendableSparse.with_locking!(true) + + mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti) + result=[] + + for nt in allnp + A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false) + t=@belapsed assemble_ESMP($A, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) setup=(ExtendableSparse.reset!($A)) seconds=1 + push!(result,(nt,round(t0/t,digits=2))) + end + + # #update + # times_update = zeros(k) + # for i=1:k + # times_update[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=true) + # end + + # @info "TIMES: MIN, AVG, MAX" + # info_minmax(times_build, "build ") + # info_minmax(times_update, "update") + result + +end + """ `generate_rectangle_grid(lindexes, Ti)` @@ -95,7 +137,7 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, 1) end - nnzCSC, nnzLNK = ExtendableSparse.nnz_noflush(A) + nnzCSC, nnzLNK = nnz_noflush(A) if nnzCSC > 0 && nnzLNK > 0 flush!(A; do_dense=false) #sparse flush @@ -106,10 +148,10 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell end function assembleedge!(A::ExtendableSparseMatrixParallel{Tv, Ti},v,k,l,tid) where {Tv, Ti <: Integer} - ExtendableSparse.addtoentry!(A, k, k, tid, +v) - ExtendableSparse.addtoentry!(A, k, l, tid, -v) - ExtendableSparse.addtoentry!(A, l, k, tid, -v) - ExtendableSparse.addtoentry!(A, l, l, tid, +v) + addtoentry!(A, k, k, tid, +v) + addtoentry!(A, k, l, tid, -v) + addtoentry!(A, l, k, tid, -v) + addtoentry!(A, l, l, tid, +v) end function assemblecell!(A::ExtendableSparseMatrixParallel{Tv, Ti},n,m,mcn,X,Y,d,cell,tid) where {Tv, Ti <: Integer} @@ -129,10 +171,10 @@ function assemblecell!(A::ExtendableSparseMatrixParallel{Tv, Ti},n,m,mcn,X,Y,d,c assembleedge!(A,0.5*hy/hx,ij00,ij10,tid) assembleedge!(A,0.5*hy/hx,ij01,ij11,tid) v=0.25*hx*hy - ExtendableSparse.addtoentry!(A, ij00, ij00, tid, v*d) - ExtendableSparse.addtoentry!(A, ij01, ij01, tid, v*d) - ExtendableSparse.addtoentry!(A, ij10, ij10, tid, v*d) - ExtendableSparse.addtoentry!(A, ij11, ij11, tid, v*d) + addtoentry!(A, ij00, ij00, tid, v*d) + addtoentry!(A, ij01, ij01, tid, v*d) + addtoentry!(A, ij10, ij10, tid, v*d) + addtoentry!(A, ij11, ij11, tid, v*d) end @@ -179,4 +221,4 @@ function assemblecell!(A::ExtendableSparseMatrix{Tv, Ti},n,m,mcn,X,Y,d,cell) whe A[ij01,ij01]+=v*d A[ij10,ij10]+=v*d A[ij11,ij11]+=v*d -end \ No newline at end of file +end From 954819c349df7d24cb73fce1f50a27f2ec313c06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Sat, 18 May 2024 23:15:11 +0200 Subject: [PATCH 19/44] ExtendableSparseMatrixParallelDict & tests Steps to AbstractExtendableSparse --- Project.toml | 1 + src/experimental/Experimental.jl | 18 +- src/experimental/abstractextendable.jl | 34 ++++ .../extendablesparsematrixdict.jl | 191 ++++++++++++++++++ src/experimental/parallel_testtools.jl | 81 +++++++- src/experimental/sparsematrixdict.jl | 64 ++++++ .../supersparse.jl | 5 +- test/experimental_dict.jl | 130 ++++++++++++ 8 files changed, 516 insertions(+), 8 deletions(-) create mode 100644 src/experimental/abstractextendable.jl create mode 100644 src/experimental/extendablesparsematrixdict.jl create mode 100644 src/experimental/sparsematrixdict.jl create mode 100644 test/experimental_dict.jl diff --git a/Project.toml b/Project.toml index f3d1ef7..1d10c82 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" +OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Requires = "ae029012-a4dd-5104-9daa-d747884805df" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl index 7108c25..f7aa518 100644 --- a/src/experimental/Experimental.jl +++ b/src/experimental/Experimental.jl @@ -1,11 +1,15 @@ module Experimental using ExtendableSparse, SparseArrays -import ExtendableSparse: flush!, reset!, rawupdateindex! +using LinearAlgebra +using SparseArrays: AbstractSparseMatrixCSC +import SparseArrays: nonzeros, getcolptr,nzrange +import ExtendableSparse: flush!, reset!, rawupdateindex!, findindex using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash using DocStringExtensions using Metis using Base.Threads -using LinearAlgebra +using OhMyThreads: @tasks + include(joinpath(@__DIR__, "..", "matrix", "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl")) @@ -34,8 +38,16 @@ export ILUAMPreconditioner, PILUAMPreconditioner export reorderlinsys, nnz_noflush +include("abstractextendable.jl") + +include("sparsematrixdict.jl") +export SparseMatrixDict + +include("extendablesparsematrixdict.jl") +export ExtendableSparseMatrixParallelDict, partcolors! + include("parallel_testtools.jl") -export part2d, showgrid, partassemble! +export part2d, showgrid, partassemble!, assemblepartition! end diff --git a/src/experimental/abstractextendable.jl b/src/experimental/abstractextendable.jl new file mode 100644 index 0000000..d44b4d1 --- /dev/null +++ b/src/experimental/abstractextendable.jl @@ -0,0 +1,34 @@ +abstract type AbstractExtendableSparseMatrix{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end + +SparseArrays.nnz(ext::AbstractExtendableSparseMatrix)=nnz(sparse(ext)) + +SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrix)=nonzeros(sparse(ext)) + +Base.size(ext::AbstractExtendableSparseMatrix)=size(sparse(ext)) + +function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrix) + A=sparse(ext) + xnnz = nnz(A) + m, n = size(A) + print(io, + m, + "×", + n, + " ", + typeof(ext), + " with ", + xnnz, + " stored ", + xnnz == 1 ? "entry" : "entries") + + if !haskey(io, :compact) + io = IOContext(io, :compact => true) + end + + if !(m == 0 || n == 0 || xnnz == 0) + print(io, ":\n") + Base.print_array(IOContext(io), A) + end +end + + diff --git a/src/experimental/extendablesparsematrixdict.jl b/src/experimental/extendablesparsematrixdict.jl new file mode 100644 index 0000000..5ef63da --- /dev/null +++ b/src/experimental/extendablesparsematrixdict.jl @@ -0,0 +1,191 @@ +mutable struct ExtendableSparseMatrixParallelDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} + """ + Final matrix data + """ + cscmatrix::SparseMatrixCSC{Tv, Ti} + + """ + Linked list structure holding data of extension + """ + dictmatrices::Vector{SparseMatrixDict{Tv,Ti}} + + nodeparts::Vector{Ti} + partnodes::Vector{Vector{Ti}} + colparts::Vector{Vector{Ti}} +end + + +function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,p::Integer) where{Tv, Ti} + ExtendableSparseMatrixParallelDict(spzeros(Tv, Ti, m, n), + [SparseMatrixDict{Tv,Ti}(m,n) for i=1:p], + zeros(Ti,n), + Vector{Ti}[], + Vector{Ti}[] + ) +end + +function partcolors!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, partcolors) where {Tv, Ti} + ncol=maximum(partcolors) + colparts=[Ti[] for i=1:ncol] + for i=1:length(partcolors) + push!(colparts[partcolors[i]],i) + end + ext.colparts=colparts + ext +end + +function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,pc::Vector) where{Tv, Ti} + ext=ExtendableSparseMatrixParallelDict(m,n,length(pc)) + partcolors!(ext,pc) +end + + +ExtendableSparseMatrixParallelDict(n,m,p)=ExtendableSparseMatrixParallelDict{Float64,Int}(n,m,p) + + +function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},p::Integer) where {Tv,Ti} + m,n=size(ext.cscmatrix) + ext.cscmatrix=spzeros(Tv, Ti, m, n) + ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:p] + ext.nodeparts.=zero(Ti) + ext +end + +function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where {Tv,Ti} + reset!(ext,length(ext.dictmatrices)) +end + +function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},pc::Vector) where {Tv,Ti} + reset!(ext,length(pc)) + partcolors!(ext,pc) +end + + +function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti} + lnew=sumlength(ext.dictmatrices) + if lnew>0 + (;colptr,nzval,rowval,m,n)=ext.cscmatrix + l=lnew+nnz(ext.cscmatrix) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + ip=1 + for m in ext.dictmatrices + for (p,v) in m.values + ext.nodeparts[first(p)]=ip + I[i]=first(p) + J[i]=last(p) + V[i]=v + i=i+1 + end + ip=ip+1 + end + + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + + np=length(ext.dictmatrices) + ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:np] + ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+) + + n,m=size(ext) + pn=zeros(Int,np) + for i=1:n + if ext.nodeparts[i]>0 + pn[ext.nodeparts[i]]+=1 + end + end + partnodes=[zeros(Int,pn[i]) for i=1:np] + pn.=1 + for i=1:n + if ext.nodeparts[i]>0 + ip=ext.nodeparts[i] + partnodes[ip][pn[ip]]=i + pn[ip]+=1 + end + end + ext.partnodes=partnodes + end + ext +end + +function SparseArrays.sparse(ext::ExtendableSparseMatrixParallelDict) + flush!(ext) + ext.cscmatrix +end + + + +function Base.setindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, + v::Union{Number,AbstractVecOrMat}, + i::Integer, + j::Integer) where {Tv, Ti} + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = v + else + error("use rawupdateindex! for new entries into ExtendableSparseMatrixParallelDict") + end +end + + +function Base.getindex(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, + i::Integer, + j::Integer) where {Tv, Ti <: Integer} + k = findindex(ext.cscmatrix, i, j) + if k > 0 + return ext.cscmatrix.nzval[k] + elseif sumlength(ext.dictmatrices) == 0 + return zero(Tv) + else + error("flush! ExtendableSparseMatrixParallelDict before using getindex") + end +end + +function rawupdateindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, + op, + v, + i, + j, + tid) where {Tv, Ti <: Integer} + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + rawupdateindex!(ext.dictmatrices[tid],op,v,i,j) + end +end + +function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, x) where {Tv,Ti} + A=ext.cscmatrix + colparts=ext.colparts + partnodes=ext.partnodes + rows = rowvals(A) + vals = nonzeros(A) + + r.=zero(Tv) + m,n=size(A) + for icol=1:length(colparts) + part=colparts[icol] + @tasks for ip=1:length(part) + @inbounds begin + for j in partnodes[part[ip]] + for i in nzrange(A,j) + row = rows[i] + val = vals[i] + r[row]+=val*x[j] + end + end + end + end + end + r +end + diff --git a/src/experimental/parallel_testtools.jl b/src/experimental/parallel_testtools.jl index a007ff2..4f99283 100644 --- a/src/experimental/parallel_testtools.jl +++ b/src/experimental/parallel_testtools.jl @@ -28,6 +28,22 @@ function part2d(X,Y, nt) partitions end +function colpart2d(X,Y,nt) + Nx=length(X) + Ny=length(Y) + p=part2d(X,Y,nt) + pc=zeros(Int,sum(length,p)) + jp=1 + for icol=1:length(p) + for ip=1:length(p[icol]) + pc[jp]=icol + jp+=1 + end + end + p,pc +end + + """ showgrid(Makie, ColorSchemes, X,Y,nt) @@ -72,6 +88,13 @@ function assembleedge!(A,v,k,l) rawupdateindex!(A,+,v,l,l) end +function assembleedge!(A,v,k,l,tid) + rawupdateindex!(A,+,v,k,k,tid) + rawupdateindex!(A,+,-v,k,l,tid) + rawupdateindex!(A,+,-v,l,k,tid) + rawupdateindex!(A,+,v,l,l,tid) +end + """ $(SIGNATURES) @@ -98,6 +121,25 @@ function assemblecell!(A,lindexes,X,Y,i,j,d) rawupdateindex!(A,+,v*d,ij11,ij11) end +function assemblecell!(A,lindexes,X,Y,i,j,d,tid) + hx=X[i+1]-X[i] + hy=Y[j+1]-Y[j] + ij00=lindexes[i,j] + ij10=lindexes[i+1,j] + ij11=lindexes[i+1,j+1] + ij01=lindexes[i,j+1] + + assembleedge!(A,0.5*hx/hy,ij00,ij01,tid) + assembleedge!(A,0.5*hx/hy,ij10,ij11,tid) + assembleedge!(A,0.5*hy/hx,ij00,ij10,tid) + assembleedge!(A,0.5*hy/hx,ij01,ij11,tid) + v=0.25*hx*hy + rawupdateindex!(A,+,v*d,ij00,ij00,tid) + rawupdateindex!(A,+,v*d,ij01,ij01,tid) + rawupdateindex!(A,+,v*d,ij10,ij10,tid) + rawupdateindex!(A,+,v*d,ij11,ij11,tid) +end + """ $(SIGNATURES) @@ -113,6 +155,14 @@ function assemblepartition!(A,lindexes,X,Y,xp,yp,d) end end +function assemblepartition!(A,lindexes,X,Y,xp,yp,d,tid) + for j in yp + for i in xp + assemblecell!(A,lindexes,X,Y,i,j,d,tid) + end + end +end + """ partassemble!(A,N,nt=1;xrange=(0,1),yrange=(0,1), d=0.1) @@ -133,10 +183,39 @@ function partassemble!(A,X,Y,nt=1;d=0.1) else p=part2d(X,Y,nt) for icol=1:length(p) - Threads.@threads for (xp, yp) in p[icol] + @tasks for (xp, yp) in p[icol] assemblepartition!(A,lindexes,X,Y,xp,yp,d) end end end flush!(A) end + + +function partassemble!(A::ExtendableSparseMatrixParallelDict,X,Y,nt=1;d=0.1, reset=true) + Nx=length(X) + Ny=length(Y) + size(A,1)==Nx*Ny || error("incompatible size of A") + size(A,2)==Nx*Ny || error("incompatible size of A") + + lindexes=LinearIndices((1:Nx,1:Ny)) + if nt==1 + reset!(A,1) + assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1) + else + p,pc=colpart2d(X,Y,nt) + if reset + reset!(A,pc) + end + jp0=0 + for icol=1:length(p) + npc=length(p[icol]) + @tasks for ip=1:npc + (xp, yp)=p[icol][ip] + assemblepartition!(A,lindexes,X,Y,xp,yp,d,jp0+ip) + end + jp0+=npc + end + end + flush!(A) +end diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl new file mode 100644 index 0000000..112f949 --- /dev/null +++ b/src/experimental/sparsematrixdict.jl @@ -0,0 +1,64 @@ +mutable struct SparseMatrixDict{Tv,Ti} <: AbstractSparseMatrix{Tv,Ti} + m::Ti + n::Ti + values::Dict{Pair{Ti,Ti}, Tv} + SparseMatrixDict{Tv,Ti}(m,n) where {Tv,Ti} = new(m,n,Dict{Pair{Ti,Ti}, Tv}()) +end + +function reset!(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} + m.values=Dict{Pair{Ti,Ti}, Tv}() +end + +function Base.setindex!(m::SparseMatrixDict,v,i,j) + m.values[Pair(i,j)]=v +end + +function rawupdateindex!(m::SparseMatrixDict{Tv,Ti},op,v,i,j) where {Tv,Ti} + p=Pair(i,j) + haskey(m.values,p) ? vnew=op(m.values[p],v) : vnew=op(zero(Tv),v) + m.values[p]=vnew +end + +function Base.getindex(m::SparseMatrixDict{Tv},i,j) where Tv + haskey(m.values,Pair(i,j)) ? m.values[Pair(i,j)] : zero(Tv) +end + +Base.size(m::SparseMatrixDict)=(m.m,m.n) + +flush!(m::SparseMatrixDict)=nothing + +function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} + l=length(m.values) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + for (p,v) in m.values + I[i]=first(p) + J[i]=last(p) + V[i]=v + i=i+1 + end + SparseArrays.sparse!(I,J,V,size(m)...,+) +end + +sumlength(mv::Vector{SparseMatrixDict{Tv,Ti}}) where{Tv,Ti}=sum(m->length(m.values),mv) + +function SparseArrays.sparse(mv::Vector{SparseMatrixDict{Tv,Ti}}) where {Tv,Ti} + l=sumlength(mv) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + for m in mv + for (p,v) in m.values + I[i]=first(p) + J[i]=last(p) + V[i]=v + i=i+1 + end + end + SparseArrays.sparse!(I,J,V,size(mv[1])...,+) +end + + diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl index ece23bd..691e158 100644 --- a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl +++ b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl @@ -1,7 +1,4 @@ -using SparseArrays -using ExtendableSparse - mutable struct SuperSparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti} """ Number of rows @@ -70,7 +67,7 @@ function SuperSparseMatrixLNK{Tv, Ti}(m, n) where {Tv, Ti <: Integer} end -function findindex(lnk::SuperSparseMatrixLNK, i, j) +function ExtendableSparse.findindex(lnk::SuperSparseMatrixLNK, i, j) if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) throw(BoundsError(lnk, (i, j))) end diff --git a/test/experimental_dict.jl b/test/experimental_dict.jl new file mode 100644 index 0000000..f88715a --- /dev/null +++ b/test/experimental_dict.jl @@ -0,0 +1,130 @@ +using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental +using DocStringExtensions +using BenchmarkTools +using Test + + +function test_correctness_update(N) + X=1:N + Y=1:N + A=ExtendableSparseMatrixParallelDict{Float64,Int}(N^2,N^2,1) + allnp=[4,5,6,7,8] + + # Assembele without partitioning + # this gives the "base truth" to compare with + partassemble!(A,X,Y) + + # Save the nonzeros + nz=copy(nonzeros(A)) + for np in allnp + # Reset the nonzeros, keeping the structure intact + nonzeros(A).=0 + # Parallel assembly whith np threads + partassemble!(A,X,Y, np) + @test nonzeros(A)≈nz + end +end + +""" + test_correctness_build(N) + +Test correctness of parallel assembly on NxN grid during +build phase, assuming that no structure has been assembled. +""" +function test_correctness_build(N) + X=1:N + Y=1:N + allnp=[4,5,6,7,8] + # Get the "ground truth" + A=ExtendableSparseMatrix(N^2,N^2) + partassemble!(A,X,Y) + nz=copy(nonzeros(A)) + for np in allnp + # Make a new matrix and assemble parallel. + # this should result in the same nonzeros + A=ExtendableSparseMatrixParallelDict(N^2,N^2,1) + partassemble!(A,X,Y, np) + @test nonzeros(A)≈nz + end +end + +function test_correctness_mul(N; nps=5) + X=1:N + Y=1:N + allnp=[4,5,6,7,8] + A0=ExtendableSparseMatrix(N^2,N^2) + partassemble!(A0,X,Y) + + for np in allnp + A=ExtendableSparseMatrixParallelDict(N^2,N^2,1) + partassemble!(A,X,Y,np) + b=rand(N^2) + @test A*b ≈ A0*b + end +end + +function speedup_update(N; allnp=[4,5,6,7,8,9,10]) + X=1:N + Y=1:N + A=ExtendableSparseMatrix(N^2,N^2) + partassemble!(A,X,Y) + nz=copy(nonzeros(A)) + # Get the base timing + # During setup, set matrix entries to zero while keeping the structure + t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(nonzeros($A).=0) + result=[] + A=ExtendableSparseMatrixParallelDict(N^2,N^2,1) + for np in allnp + # Get the parallel timing + # During setup, set matrix entries to zero while keeping the structure + partassemble!(A,X,Y,np) + t=@belapsed partassemble!($A,$X,$Y,$np,reset=false) seconds=1 setup=(nonzeros($A).=0) + @assert nonzeros(A)≈nz + push!(result,(np,round(t0/t,digits=2))) + end + result +end + +function speedup_build(N; allnp=[4,5,6,7,8,9,10]) + X=1:N + Y=1:N + A=ExtendableSparseMatrixParallelDict(N^2,N^2,1) + partassemble!(A,X,Y) + nz=copy(nonzeros(A)) + reset!(A) + partassemble!(A,X,Y) + @assert nonzeros(A)≈(nz) + + # Get the base timing + # During setup, reset matrix to empty state. + t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A)) + + result=[] + for np in allnp + # Get the parallel timing + # During setup, reset matrix to empty state. + t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(reset!($A)) + @assert nonzeros(A)≈nz + push!(result,(np,round(t0/t,digits=2))) + end + result +end + +function speedup_mul(N; allnp=[4,5,6,7,8,9,10]) + X=1:N + Y=1:N + + A0=ExtendableSparseMatrix(N^2,N^2) + partassemble!(A0,X,Y) + b=rand(N^2) + t0=@belapsed $A0*$b seconds=1 + + result=[] + for np in allnp + A=ExtendableSparseMatrixParallelDict(N^2,N^2,1) + partassemble!(A,X,Y,np) + t=@belapsed $A*$b seconds=1 + push!(result,(np,round(t0/t,digits=2))) + end + result +end From 96f015d2f4e782d023ae69ba6e359eac6103a1e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Wed, 22 May 2024 22:51:27 +0200 Subject: [PATCH 20/44] Parallelism with Dict based matrices --- src/experimental/Experimental.jl | 4 + src/experimental/abstractextendable.jl | 5 +- .../extendablesparsematrixdict.jl | 150 +++----------- .../extendablesparsematrixparalleldict.jl | 194 ++++++++++++++++++ src/experimental/sparsematrixdict.jl | 21 +- test/ExperimentalDict.jl | 36 ++++ ...al_parallel.jl => ExperimentalParallel.jl} | 0 ...al_dict.jl => ExperimentalParallelDict.jl} | 19 +- 8 files changed, 285 insertions(+), 144 deletions(-) create mode 100644 src/experimental/extendablesparsematrixparalleldict.jl create mode 100644 test/ExperimentalDict.jl rename test/{experimental_parallel.jl => ExperimentalParallel.jl} (100%) rename test/{experimental_dict.jl => ExperimentalParallelDict.jl} (90%) diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl index f7aa518..9fdf1e4 100644 --- a/src/experimental/Experimental.jl +++ b/src/experimental/Experimental.jl @@ -44,8 +44,12 @@ include("sparsematrixdict.jl") export SparseMatrixDict include("extendablesparsematrixdict.jl") +export ExtendableSparseMatrixDict + +include("extendablesparsematrixparalleldict.jl") export ExtendableSparseMatrixParallelDict, partcolors! + include("parallel_testtools.jl") export part2d, showgrid, partassemble!, assemblepartition! diff --git a/src/experimental/abstractextendable.jl b/src/experimental/abstractextendable.jl index d44b4d1..2cd72c8 100644 --- a/src/experimental/abstractextendable.jl +++ b/src/experimental/abstractextendable.jl @@ -4,7 +4,7 @@ SparseArrays.nnz(ext::AbstractExtendableSparseMatrix)=nnz(sparse(ext)) SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrix)=nonzeros(sparse(ext)) -Base.size(ext::AbstractExtendableSparseMatrix)=size(sparse(ext)) +Base.size(ext::AbstractExtendableSparseMatrix)=size(ext.cscmatrix) function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrix) A=sparse(ext) @@ -31,4 +31,5 @@ function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatr end end - + +LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrix{Tv,Ti}, x) where {Tv,Ti} = mul!(r,sparse(ext),x) diff --git a/src/experimental/extendablesparsematrixdict.jl b/src/experimental/extendablesparsematrixdict.jl index 5ef63da..6641e5a 100644 --- a/src/experimental/extendablesparsematrixdict.jl +++ b/src/experimental/extendablesparsematrixdict.jl @@ -1,68 +1,34 @@ -mutable struct ExtendableSparseMatrixParallelDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} +mutable struct ExtendableSparseMatrixDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} """ Final matrix data """ cscmatrix::SparseMatrixCSC{Tv, Ti} """ - Linked list structure holding data of extension + Vector of dictionaries for new entries """ - dictmatrices::Vector{SparseMatrixDict{Tv,Ti}} - - nodeparts::Vector{Ti} - partnodes::Vector{Vector{Ti}} - colparts::Vector{Vector{Ti}} -end - - -function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,p::Integer) where{Tv, Ti} - ExtendableSparseMatrixParallelDict(spzeros(Tv, Ti, m, n), - [SparseMatrixDict{Tv,Ti}(m,n) for i=1:p], - zeros(Ti,n), - Vector{Ti}[], - Vector{Ti}[] - ) + dictmatrix::SparseMatrixDict{Tv,Ti} end -function partcolors!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, partcolors) where {Tv, Ti} - ncol=maximum(partcolors) - colparts=[Ti[] for i=1:ncol] - for i=1:length(partcolors) - push!(colparts[partcolors[i]],i) - end - ext.colparts=colparts - ext -end -function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,pc::Vector) where{Tv, Ti} - ext=ExtendableSparseMatrixParallelDict(m,n,length(pc)) - partcolors!(ext,pc) +function ExtendableSparseMatrixDict{Tv, Ti}(n::Integer,m::Integer) where{Tv, Ti<:Integer} + ExtendableSparseMatrixDict(spzeros(Tv, Ti, m, n), + SparseMatrixDict{Tv,Ti}(m,n) + ) end +ExtendableSparseMatrixDict(n::Integer,m::Integer)=ExtendableSparseMatrixDict{Float64,Int}(n,m) -ExtendableSparseMatrixParallelDict(n,m,p)=ExtendableSparseMatrixParallelDict{Float64,Int}(n,m,p) - - -function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},p::Integer) where {Tv,Ti} +function reset!(ext::ExtendableSparseMatrixDict{Tv,Ti}) where {Tv,Ti} m,n=size(ext.cscmatrix) ext.cscmatrix=spzeros(Tv, Ti, m, n) - ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:p] - ext.nodeparts.=zero(Ti) + ext.dictmatrix=SparseMatrixDict{Tv,Ti}(m,n) ext end -function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where {Tv,Ti} - reset!(ext,length(ext.dictmatrices)) -end - -function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},pc::Vector) where {Tv,Ti} - reset!(ext,length(pc)) - partcolors!(ext,pc) -end - -function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti} - lnew=sumlength(ext.dictmatrices) +function flush!(ext::ExtendableSparseMatrixDict{Tv,Ti}) where{Tv,Ti} + lnew=length(ext.dictmatrix.values) if lnew>0 (;colptr,nzval,rowval,m,n)=ext.cscmatrix l=lnew+nnz(ext.cscmatrix) @@ -70,18 +36,6 @@ function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti} J=Vector{Ti}(undef,l) V=Vector{Tv}(undef,l) i=1 - ip=1 - for m in ext.dictmatrices - for (p,v) in m.values - ext.nodeparts[first(p)]=ip - I[i]=first(p) - J[i]=last(p) - V[i]=v - i=i+1 - end - ip=ip+1 - end - for icsc=1:length(colptr)-1 for j=colptr[icsc]:colptr[icsc+1]-1 I[i]=icsc @@ -91,39 +45,25 @@ function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti} end end - np=length(ext.dictmatrices) - ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:np] - ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+) - - n,m=size(ext) - pn=zeros(Int,np) - for i=1:n - if ext.nodeparts[i]>0 - pn[ext.nodeparts[i]]+=1 - end + for (p,v) in ext.dictmatrix.values + I[i]=first(p) + J[i]=last(p) + V[i]=v + i=i+1 end - partnodes=[zeros(Int,pn[i]) for i=1:np] - pn.=1 - for i=1:n - if ext.nodeparts[i]>0 - ip=ext.nodeparts[i] - partnodes[ip][pn[ip]]=i - pn[ip]+=1 - end - end - ext.partnodes=partnodes + + ext.dictmatrix=SparseMatrixDict{Tv,Ti}(m,n) + ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+) end ext end - -function SparseArrays.sparse(ext::ExtendableSparseMatrixParallelDict) + +function SparseArrays.sparse(ext::ExtendableSparseMatrixDict) flush!(ext) ext.cscmatrix end - - -function Base.setindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, +function Base.setindex!(ext::ExtendableSparseMatrixDict{Tv, Ti}, v::Union{Number,AbstractVecOrMat}, i::Integer, j::Integer) where {Tv, Ti} @@ -131,61 +71,31 @@ function Base.setindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, if k > 0 ext.cscmatrix.nzval[k] = v else - error("use rawupdateindex! for new entries into ExtendableSparseMatrixParallelDict") + setindex!(ext.dictmatrix,v,i,j) end end -function Base.getindex(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, +function Base.getindex(ext::ExtendableSparseMatrixDict{Tv, Ti}, i::Integer, j::Integer) where {Tv, Ti <: Integer} k = findindex(ext.cscmatrix, i, j) if k > 0 - return ext.cscmatrix.nzval[k] - elseif sumlength(ext.dictmatrices) == 0 - return zero(Tv) + ext.cscmatrix.nzval[k] else - error("flush! ExtendableSparseMatrixParallelDict before using getindex") + getindex(ext.dictmatrix,i,j) end end -function rawupdateindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, +function rawupdateindex!(ext::ExtendableSparseMatrixDict{Tv, Ti}, op, v, i, - j, - tid) where {Tv, Ti <: Integer} + j) where {Tv, Ti <: Integer} k = findindex(ext.cscmatrix, i, j) if k > 0 ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) else - rawupdateindex!(ext.dictmatrices[tid],op,v,i,j) + rawupdateindex!(ext.dictmatrix,op,v,i,j) end end - -function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, x) where {Tv,Ti} - A=ext.cscmatrix - colparts=ext.colparts - partnodes=ext.partnodes - rows = rowvals(A) - vals = nonzeros(A) - - r.=zero(Tv) - m,n=size(A) - for icol=1:length(colparts) - part=colparts[icol] - @tasks for ip=1:length(part) - @inbounds begin - for j in partnodes[part[ip]] - for i in nzrange(A,j) - row = rows[i] - val = vals[i] - r[row]+=val*x[j] - end - end - end - end - end - r -end - diff --git a/src/experimental/extendablesparsematrixparalleldict.jl b/src/experimental/extendablesparsematrixparalleldict.jl new file mode 100644 index 0000000..241964d --- /dev/null +++ b/src/experimental/extendablesparsematrixparalleldict.jl @@ -0,0 +1,194 @@ +mutable struct ExtendableSparseMatrixParallelDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} + """ + Final matrix data + """ + cscmatrix::SparseMatrixCSC{Tv, Ti} + + """ + Vector of dictionaries for new entries + """ + dictmatrices::Vector{SparseMatrixDict{Tv,Ti}} + + nodeparts::Vector{Ti} + partnodes::Vector{Vector{Ti}} + colparts::Vector{Vector{Ti}} +end + + +function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,p::Integer) where{Tv, Ti} + ExtendableSparseMatrixParallelDict(spzeros(Tv, Ti, m, n), + [SparseMatrixDict{Tv,Ti}(m,n) for i=1:p], + zeros(Ti,n), + Vector{Ti}[], + Vector{Ti}[] + ) +end + +function partcolors!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, partcolors) where {Tv, Ti} + ncol=maximum(partcolors) + colparts=[Ti[] for i=1:ncol] + for i=1:length(partcolors) + push!(colparts[partcolors[i]],i) + end + ext.colparts=colparts + ext +end + +function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,pc::Vector) where{Tv, Ti} + ext=ExtendableSparseMatrixParallelDict(m,n,length(pc)) + partcolors!(ext,pc) +end + + +ExtendableSparseMatrixParallelDict(n,m,p)=ExtendableSparseMatrixParallelDict{Float64,Int}(n,m,p) + + +function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},p::Integer) where {Tv,Ti} + m,n=size(ext.cscmatrix) + ext.cscmatrix=spzeros(Tv, Ti, m, n) + ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:p] + ext.nodeparts.=zero(Ti) + ext +end + +function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where {Tv,Ti} + reset!(ext,length(ext.dictmatrices)) +end + +function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},pc::Vector) where {Tv,Ti} + reset!(ext,length(pc)) + partcolors!(ext,pc) +end + + +function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti} + lnew=sumlength(ext.dictmatrices) + if lnew>0 + (;colptr,nzval,rowval,m,n)=ext.cscmatrix + l=lnew+nnz(ext.cscmatrix) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + + ip=1 + for m in ext.dictmatrices + for (p,v) in m.values + ext.nodeparts[last(p)]=ip + I[i]=first(p) + J[i]=last(p) + V[i]=v + i=i+1 + end + ip=ip+1 + end + + + np=length(ext.dictmatrices) + ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:np] + ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+) + + npts::Vector{Ti}=ext.nodeparts + pn=zeros(Ti,np) + for i=1:n + npi=npts[i] + if npi>0 + pn[npi]+=1 + end + end + partnodes=[zeros(Int,pn[i]) for i=1:np] + pn.=1 + for i=1:n + npi=ext.nodeparts[i] + if npi>0 + partnodes[npi][pn[npi]]=i + pn[npi]+=1 + end + end + ext.partnodes=partnodes + end + ext +end + +function SparseArrays.sparse(ext::ExtendableSparseMatrixParallelDict) + flush!(ext) + ext.cscmatrix +end + + + +function Base.setindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, + v::Union{Number,AbstractVecOrMat}, + i::Integer, + j::Integer) where {Tv, Ti} + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = v + else + error("use rawupdateindex! for new entries into ExtendableSparseMatrixParallelDict") + end +end + + +function Base.getindex(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, + i::Integer, + j::Integer) where {Tv, Ti <: Integer} + k = findindex(ext.cscmatrix, i, j) + if k > 0 + return ext.cscmatrix.nzval[k] + elseif sumlength(ext.dictmatrices) == 0 + return zero(Tv) + else + error("flush! ExtendableSparseMatrixParallelDict before using getindex") + end +end + +function rawupdateindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, + op, + v, + i, + j, + tid) where {Tv, Ti <: Integer} + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + rawupdateindex!(ext.dictmatrices[tid],op,v,i,j) + end +end + +function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, x) where {Tv,Ti} + A=ext.cscmatrix + colparts=ext.colparts + partnodes=ext.partnodes + rows = rowvals(A) + vals = nonzeros(A) + + r.=zero(Tv) + m,n=size(A) + for icol=1:length(colparts) + part=colparts[icol] + @tasks for ip=1:length(part) + @inbounds begin + for j in partnodes[part[ip]] + for i in nzrange(A,j) + row = rows[i] + val = vals[i] + r[row]+=val*x[j] + end + end + end + end + end + r +end + diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl index 112f949..2288c89 100644 --- a/src/experimental/sparsematrixdict.jl +++ b/src/experimental/sparsematrixdict.jl @@ -15,33 +15,17 @@ end function rawupdateindex!(m::SparseMatrixDict{Tv,Ti},op,v,i,j) where {Tv,Ti} p=Pair(i,j) - haskey(m.values,p) ? vnew=op(m.values[p],v) : vnew=op(zero(Tv),v) - m.values[p]=vnew + m.values[p]=op(get(m.values, p, zero(Tv)),v) end function Base.getindex(m::SparseMatrixDict{Tv},i,j) where Tv - haskey(m.values,Pair(i,j)) ? m.values[Pair(i,j)] : zero(Tv) + get(m.values,Pair(i,j),zero(Tv)) end Base.size(m::SparseMatrixDict)=(m.m,m.n) flush!(m::SparseMatrixDict)=nothing -function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} - l=length(m.values) - I=Vector{Ti}(undef,l) - J=Vector{Ti}(undef,l) - V=Vector{Tv}(undef,l) - i=1 - for (p,v) in m.values - I[i]=first(p) - J[i]=last(p) - V[i]=v - i=i+1 - end - SparseArrays.sparse!(I,J,V,size(m)...,+) -end - sumlength(mv::Vector{SparseMatrixDict{Tv,Ti}}) where{Tv,Ti}=sum(m->length(m.values),mv) function SparseArrays.sparse(mv::Vector{SparseMatrixDict{Tv,Ti}}) where {Tv,Ti} @@ -62,3 +46,4 @@ function SparseArrays.sparse(mv::Vector{SparseMatrixDict{Tv,Ti}}) where {Tv,Ti} end +SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} = sparse([m]) diff --git a/test/ExperimentalDict.jl b/test/ExperimentalDict.jl new file mode 100644 index 0000000..a8b0375 --- /dev/null +++ b/test/ExperimentalDict.jl @@ -0,0 +1,36 @@ +module ExperimentalDict + +using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental +using DocStringExtensions +using BenchmarkTools +using Test + + +function ExtendableSparse.reset!(A::ExtendableSparseMatrix) + A.cscmatrix=spzeros(size(A)...) + A.lnkmatrix=nothing +end + + +function test_correctness_build(N) + X=1:N + Y=1:N + A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2) + A=ExtendableSparseMatrixDict{Float64,Int}(N^2,N^2) + partassemble!(A0,X,Y) + partassemble!(A,X,Y) + @test sparse(A0)≈sparse(A) +end + +function speed_build(N) + X=1:N + Y=1:N + A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2) + A=ExtendableSparseMatrixDict{Float64,Int}(N^2,N^2) + + tlnk= @belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0)) + tdict= @belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A)) + tdict/tlnk +end + +end diff --git a/test/experimental_parallel.jl b/test/ExperimentalParallel.jl similarity index 100% rename from test/experimental_parallel.jl rename to test/ExperimentalParallel.jl diff --git a/test/experimental_dict.jl b/test/ExperimentalParallelDict.jl similarity index 90% rename from test/experimental_dict.jl rename to test/ExperimentalParallelDict.jl index f88715a..96d2d95 100644 --- a/test/experimental_dict.jl +++ b/test/ExperimentalParallelDict.jl @@ -1,3 +1,5 @@ +module ExperimentalParallelDict + using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental using DocStringExtensions using BenchmarkTools @@ -48,10 +50,9 @@ function test_correctness_build(N) end end -function test_correctness_mul(N; nps=5) +function test_correctness_mul(N; allnp=[4,5,6,7,8]) X=1:N Y=1:N - allnp=[4,5,6,7,8] A0=ExtendableSparseMatrix(N^2,N^2) partassemble!(A0,X,Y) @@ -88,16 +89,23 @@ end function speedup_build(N; allnp=[4,5,6,7,8,9,10]) X=1:N Y=1:N + A0=ExtendableSparseMatrixParallelDict(N^2,N^2,1) A=ExtendableSparseMatrixParallelDict(N^2,N^2,1) + partassemble!(A0,X,Y) + nz=copy(nonzeros(A0)) + reset!(A0) + partassemble!(A0,X,Y) + @assert nonzeros(A0)≈(nz) + partassemble!(A,X,Y) nz=copy(nonzeros(A)) reset!(A) partassemble!(A,X,Y) @assert nonzeros(A)≈(nz) - + # Get the base timing # During setup, reset matrix to empty state. - t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A)) + t0=@belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0)) result=[] for np in allnp @@ -128,3 +136,6 @@ function speedup_mul(N; allnp=[4,5,6,7,8,9,10]) end result end + +end + From 79432f336ae1c304197e6adebf3b63e1d91b170e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Thu, 23 May 2024 00:02:55 +0200 Subject: [PATCH 21/44] AbstractExtendableSparseMatrix for "old" code. --- src/ExtendableSparse.jl | 1 + src/experimental/Experimental.jl | 4 +- src/experimental/abstractextendable.jl | 35 --- src/matrix/abstractextendable.jl | 299 ++++++++++++++++++++++ src/matrix/extendable.jl | 337 ++----------------------- 5 files changed, 316 insertions(+), 360 deletions(-) delete mode 100644 src/experimental/abstractextendable.jl create mode 100644 src/matrix/abstractextendable.jl diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 5bdc442..8ab64ba 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -23,6 +23,7 @@ import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros include("matrix/sparsematrixcsc.jl") include("matrix/sparsematrixlnk.jl") +include("matrix/abstractextendable.jl") include("matrix/extendable.jl") export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset! diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl index 9fdf1e4..e9e64e9 100644 --- a/src/experimental/Experimental.jl +++ b/src/experimental/Experimental.jl @@ -4,7 +4,7 @@ using LinearAlgebra using SparseArrays: AbstractSparseMatrixCSC import SparseArrays: nonzeros, getcolptr,nzrange import ExtendableSparse: flush!, reset!, rawupdateindex!, findindex -using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash +using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash, AbstractExtendableSparseMatrix using DocStringExtensions using Metis using Base.Threads @@ -38,8 +38,6 @@ export ILUAMPreconditioner, PILUAMPreconditioner export reorderlinsys, nnz_noflush -include("abstractextendable.jl") - include("sparsematrixdict.jl") export SparseMatrixDict diff --git a/src/experimental/abstractextendable.jl b/src/experimental/abstractextendable.jl deleted file mode 100644 index 2cd72c8..0000000 --- a/src/experimental/abstractextendable.jl +++ /dev/null @@ -1,35 +0,0 @@ -abstract type AbstractExtendableSparseMatrix{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end - -SparseArrays.nnz(ext::AbstractExtendableSparseMatrix)=nnz(sparse(ext)) - -SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrix)=nonzeros(sparse(ext)) - -Base.size(ext::AbstractExtendableSparseMatrix)=size(ext.cscmatrix) - -function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrix) - A=sparse(ext) - xnnz = nnz(A) - m, n = size(A) - print(io, - m, - "×", - n, - " ", - typeof(ext), - " with ", - xnnz, - " stored ", - xnnz == 1 ? "entry" : "entries") - - if !haskey(io, :compact) - io = IOContext(io, :compact => true) - end - - if !(m == 0 || n == 0 || xnnz == 0) - print(io, ":\n") - Base.print_array(IOContext(io), A) - end -end - - -LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrix{Tv,Ti}, x) where {Tv,Ti} = mul!(r,sparse(ext),x) diff --git a/src/matrix/abstractextendable.jl b/src/matrix/abstractextendable.jl new file mode 100644 index 0000000..589376c --- /dev/null +++ b/src/matrix/abstractextendable.jl @@ -0,0 +1,299 @@ +""" + +Must implement: +sparse +Constructor from SparseMatrixCSC +rawupdateindex! +""" + +abstract type AbstractExtendableSparseMatrix{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and return number of nonzeros in ext.cscmatrix. +""" +SparseArrays.nnz(ext::AbstractExtendableSparseMatrix)=nnz(sparse(ext)) + +""" +$(SIGNATURES) + +[`flush!`](@ref) and return nonzeros in ext.cscmatrix. +""" +SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrix)=nonzeros(sparse(ext)) + +Base.size(ext::AbstractExtendableSparseMatrix)=size(ext.cscmatrix) + + + +""" +$(SIGNATURES) + +Return element type. +""" +Base.eltype(::AbstractExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti} = Tv + + + +""" +$(SIGNATURES) + + Create SparseMatrixCSC from ExtendableSparseMatrix +""" +SparseArrays.SparseMatrixCSC(A::AbstractExtendableSparseMatrix)=sparse(A) + + + + +function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrix) + A=sparse(ext) + xnnz = nnz(A) + m, n = size(A) + print(io, + m, + "×", + n, + " ", + typeof(ext), + " with ", + xnnz, + " stored ", + xnnz == 1 ? "entry" : "entries") + + if !haskey(io, :compact) + io = IOContext(io, :compact => true) + end + + if !(m == 0 || n == 0 || xnnz == 0) + print(io, ":\n") + Base.print_array(IOContext(io), A) + end +end + + +""" +$(SIGNATURES) + +[`flush!`](@ref) and return rowvals in ext.cscmatrix. +""" +SparseArrays.rowvals(ext::AbstractExtendableSparseMatrix)=rowvals(sparse(ext)) + + +""" +$(SIGNATURES) + +[`flush!`](@ref) and return colptr of in ext.cscmatrix. +""" +SparseArrays.getcolptr(ext::AbstractExtendableSparseMatrix)=getcolptr(sparse(ext)) + + +""" +$(SIGNATURES) + +[`flush!`](@ref) and return findnz(ext.cscmatrix). +""" +SparseArrays.findnz(ext::AbstractExtendableSparseMatrix)=findnz(sparse(ext)) + + +@static if VERSION >= v"1.7" + SparseArrays._checkbuffers(ext::AbstractExtendableSparseMatrix)= SparseArrays._checkbuffers(sparse(ext)) +end + +""" + A\b + +[`\\`](@ref) for ExtendableSparse. It calls the LU factorization form Sparspak.jl, unless GPL components +are allowed in the Julia sysimage and the floating point type of the matrix is Float64 or Complex64. +In that case, Julias standard `\` is called, which is realized via UMFPACK. +""" +function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrix{Tv, Ti}, + b::AbstractVector) where {Tv, Ti} + SparspakLU(sparse(ext)) \ b +end + + +""" +$(SIGNATURES) + +[`\\`](@ref) for Symmetric{ExtendableSparse} +""" +function LinearAlgebra.:\(symm_ext::Symmetric{Tm, T}, + b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrix{Tm,Ti}} + Symmetric(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ... +end + +""" +$(SIGNATURES) + +[`\\`](@ref) for Hermitian{ExtendableSparse} +""" +function LinearAlgebra.:\(symm_ext::Hermitian{Tm, T}, + b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrix{Tm,Ti}} + Hermitian(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ... +end + +if USE_GPL_LIBS + for (Tv) in (:Float64, :ComplexF64) + @eval begin function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrix{$Tv, Ti}, + B::AbstractVector) where {Ti} + sparse(ext) \ B + end end + + @eval begin function LinearAlgebra.:\(symm_ext::Symmetric{$Tv, + AbstractExtendableSparseMatrix{ + $Tv, + Ti + }}, + B::AbstractVector) where {Ti} + symm_csc = Symmetric(sparse(symm_ext.data), Symbol(symm_ext.uplo)) + symm_csc \ B + end end + + @eval begin function LinearAlgebra.:\(symm_ext::Hermitian{$Tv, + AbstractExtendableSparseMatrix{ + $Tv, + Ti + }}, + B::AbstractVector) where {Ti} + symm_csc = Hermitian(sparse(symm_ext.data), Symbol(symm_ext.uplo)) + symm_csc \ B + end end + end +end # USE_GPL_LIBS + +""" +$(SIGNATURES) + +[`flush!`](@ref) and ldiv with ext.cscmatrix +""" +function LinearAlgebra.ldiv!(r, ext::AbstractExtendableSparseMatrix, x) + LinearAlgebra.ldiv!(r, sparse(ext), x) +end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and multiply with ext.cscmatrix +""" +function LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrix, x) + LinearAlgebra.mul!(r, sparse(ext), x) +end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and calculate norm from cscmatrix +""" +function LinearAlgebra.norm(A::AbstractExtendableSparseMatrix, p::Real = 2) + return LinearAlgebra.norm(sparse(A), p) +end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and calculate opnorm from cscmatrix +""" +function LinearAlgebra.opnorm(A::AbstractExtendableSparseMatrix, p::Real = 2) + return LinearAlgebra.opnorm(sparse(A), p) +end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and calculate cond from cscmatrix +""" +function LinearAlgebra.cond(A::AbstractExtendableSparseMatrix, p::Real = 2) + return LinearAlgebra.cond(sparse(A), p) +end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and check for symmetry of cscmatrix +""" +function LinearAlgebra.issymmetric(A::AbstractExtendableSparseMatrix) + return LinearAlgebra.issymmetric(sparse(A)) +end + + + + + + +function Base.:+(A::T, B::T) where T<:AbstractExtendableSparseMatrix + T(sparse(A) + sparse(B)) +end + +function Base.:-(A::T, B::T) where T<:AbstractExtendableSparseMatrix + T(sparse(A) - sparse(B)) +end + +function Base.:*(A::T, B::T) where T<:AbstractExtendableSparseMatrix + T(sparse(A) * sparse(B)) +end + +""" +$(SIGNATURES) +""" +function Base.:*(d::Diagonal, ext::T)where T<:AbstractExtendableSparseMatrix + return T(d * sparse(ext)) +end + +""" +$(SIGNATURES) +""" +function Base.:*(ext::T, d::Diagonal) where T<:AbstractExtendableSparseMatrix + return T(sparse(ext) * d) +end + + +""" +$(SIGNATURES) + +Add SparseMatrixCSC matrix and [`ExtendableSparseMatrix`](@ref) ext. +""" +function Base.:+(ext::AbstractExtendableSparseMatrix, csc::SparseMatrixCSC) + return sparse(ext) + csc +end + + +""" +$(SIGNATURES) + +Subtract SparseMatrixCSC matrix from [`ExtendableSparseMatrix`](@ref) ext. +""" +function Base.:-(ext::AbstractExtendableSparseMatrix, csc::SparseMatrixCSC) + return sparse(ext) - csc +end + +""" +$(SIGNATURES) + +Subtract [`ExtendableSparseMatrix`](@ref) ext from SparseMatrixCSC. +""" +function Base.:-(csc::SparseMatrixCSC, ext::AbstractExtendableSparseMatrix) + return csc - sparse(ext) +end + +""" +$(SIGNATURES) +""" +function SparseArrays.dropzeros!(ext::AbstractExtendableSparseMatrix) + dropzeros!(sparse(ext)) +end + + + +function mark_dirichlet(A::AbstractExtendableSparseMatrix;penalty=1.0e20) + mark_dirichlet(sparse(A);penalty) +end + +function eliminate_dirichlet(A::T,dirichlet) where T<:AbstractExtendableSparseMatrix + T(eliminate_dirichlet(sparse(A),dirichlet)) +end + +function eliminate_dirichlet!(A::AbstractExtendableSparseMatrix,dirichlet) + eliminate_dirichlet!(sparse(A),dirichlet) + A +end + diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl index df67dc7..a655c12 100644 --- a/src/matrix/extendable.jl +++ b/src/matrix/extendable.jl @@ -7,7 +7,7 @@ either in cscmatrix, or in lnkmatrix, never in both. $(TYPEDFIELDS) """ -mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractSparseMatrixCSC{Tv, Ti} +mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} """ Final matrix data """ @@ -82,9 +82,12 @@ $(SIGNATURES) Create ExtendableSparseMatrix from SparseMatrixCSC """ - function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} - return ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc)) + ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc)) +end + +function ExtendableSparseMatrix{Tv,Ti}(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} + ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc)) end """ @@ -111,12 +114,15 @@ ExtendableSparseMatrix(A::AbstractMatrix) = ExtendableSparseMatrix(sparse(A)) Create ExtendableSparseMatrix from triplet (COO) data. """ ExtendableSparseMatrix(I, J, V::AbstractVector) = ExtendableSparseMatrix(sparse(I, J, V)) + function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n) ExtendableSparseMatrix(sparse(I, J, V, m, n)) end + function ExtendableSparseMatrix(I, J, V::AbstractVector, combine::Function) ExtendableSparseMatrix(sparse(I, J, V, combine)) end + function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n, combine::Function) ExtendableSparseMatrix(sparse(I, J, V, m, n, combine)) end @@ -136,17 +142,6 @@ end -""" -$(SIGNATURES) - - Create SparseMatrixCSC from ExtendableSparseMatrix -""" -function SparseArrays.SparseMatrixCSC(A::ExtendableSparseMatrix) - flush!(A) - A.cscmatrix -end - - """ $(SIGNATURES) @@ -285,42 +280,7 @@ function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti}, end end -""" -$(SIGNATURES) -Size of ExtendableSparseMatrix. -""" -Base.size(ext::ExtendableSparseMatrix) = (ext.cscmatrix.m, ext.cscmatrix.n) - -""" -$(SIGNATURES) - -Show matrix -""" -function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrix) - flush!(ext) - xnnz = nnz(ext) - m, n = size(ext) - print(io, - m, - "×", - n, - " ", - typeof(ext), - " with ", - xnnz, - " stored ", - xnnz == 1 ? "entry" : "entries") - - if !haskey(io, :compact) - io = IOContext(io, :compact => true) - end - - if !(m == 0 || n == 0 || xnnz == 0) - print(io, ":\n") - Base.print_array(IOContext(io), ext.cscmatrix) - end -end """ $(SIGNATURES) @@ -336,276 +296,25 @@ function flush!(ext::ExtendableSparseMatrix) end return ext end -""" -$(SIGNATURES) - -Reset ExtenableSparseMatrix into state similar to that after creation. -""" -function reset!(A::ExtendableSparseMatrix) - A.cscmatrix=spzeros(size(A)...) - A.lnkmatrix=nothing -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and return number of nonzeros in ext.cscmatrix. -""" -function SparseArrays.nnz(ext::ExtendableSparseMatrix) - flush!(ext) - return nnz(ext.cscmatrix) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and return nonzeros in ext.cscmatrix. -""" -function SparseArrays.nonzeros(ext::ExtendableSparseMatrix) - flush!(ext) - return nonzeros(ext.cscmatrix) -end - -""" -$(SIGNATURES) - -Return element type. -""" -Base.eltype(::ExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti} = Tv - -""" -$(SIGNATURES) - -[`flush!`](@ref) and return rowvals in ext.cscmatrix. -""" -function SparseArrays.rowvals(ext::ExtendableSparseMatrix) - flush!(ext) - rowvals(ext.cscmatrix) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and return colptr of in ext.cscmatrix. -""" -function SparseArrays.getcolptr(ext::ExtendableSparseMatrix) - flush!(ext) - return getcolptr(ext.cscmatrix) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and return findnz(ext.cscmatrix). -""" -function SparseArrays.findnz(ext::ExtendableSparseMatrix) - flush!(ext) - return findnz(ext.cscmatrix) -end - -@static if VERSION >= v"1.7" - function SparseArrays._checkbuffers(ext::ExtendableSparseMatrix) - flush!(ext) - SparseArrays._checkbuffers(ext.cscmatrix) - end -end - -""" - A\b - -[`\\`](@ref) for ExtendableSparse. It calls the LU factorization form Sparspak.jl, unless GPL components -are allowed in the Julia sysimage and the floating point type of the matrix is Float64 or Complex64. -In that case, Julias standard `\` is called, which is realized via UMFPACK. -""" -function LinearAlgebra.:\(ext::ExtendableSparseMatrix{Tv, Ti}, - b::AbstractVector) where {Tv, Ti} - flush!(ext) - SparspakLU(ext) \ b -end - -""" -$(SIGNATURES) - -[`\\`](@ref) for Symmetric{ExtendableSparse} -""" -function LinearAlgebra.:\(symm_ext::Symmetric{Tm, ExtendableSparseMatrix{Tm, Ti}}, - b::AbstractVector) where {Tm, Ti} - symm_ext.data \ b # no ldlt yet ... -end - -""" -$(SIGNATURES) - -[`\\`](@ref) for Hermitian{ExtendableSparse} -""" -function LinearAlgebra.:\(symm_ext::Hermitian{Tm, ExtendableSparseMatrix{Tm, Ti}}, - b::AbstractVector) where {Tm, Ti} - symm_ext.data \ B # no ldlt yet ... -end - -if USE_GPL_LIBS - for (Tv) in (:Float64, :ComplexF64) - @eval begin function LinearAlgebra.:\(ext::ExtendableSparseMatrix{$Tv, Ti}, - B::AbstractVector) where {Ti} - flush!(ext) - ext.cscmatrix \ B - end end - - @eval begin function LinearAlgebra.:\(symm_ext::Symmetric{$Tv, - ExtendableSparseMatrix{ - $Tv, - Ti - }}, - B::AbstractVector) where {Ti} - flush!(symm_ext.data) - symm_csc = Symmetric(symm_ext.data.cscmatrix, Symbol(symm_ext.uplo)) - symm_csc \ B - end end - - @eval begin function LinearAlgebra.:\(symm_ext::Hermitian{$Tv, - ExtendableSparseMatrix{ - $Tv, - Ti - }}, - B::AbstractVector) where {Ti} - flush!(symm_ext.data) - symm_csc = Hermitian(symm_ext.data.cscmatrix, Symbol(symm_ext.uplo)) - symm_csc \ B - end end - end -end # USE_GPL_LIBS -""" -$(SIGNATURES) -[`flush!`](@ref) and ldiv with ext.cscmatrix -""" -function LinearAlgebra.ldiv!(r, ext::ExtendableSparse.ExtendableSparseMatrix, x) +function SparseArrays.sparse(ext::ExtendableSparseMatrix) flush!(ext) - return LinearAlgebra.ldiv!(r, ext.cscmatrix, x) + ext.cscmatrix end -""" -$(SIGNATURES) - -[`flush!`](@ref) and multiply with ext.cscmatrix -""" -function LinearAlgebra.mul!(r, ext::ExtendableSparse.ExtendableSparseMatrix, x) - flush!(ext) - return LinearAlgebra.mul!(r, ext.cscmatrix, x) -end """ $(SIGNATURES) -[`flush!`](@ref) and calculate norm from cscmatrix -""" -function LinearAlgebra.norm(A::ExtendableSparseMatrix, p::Real = 2) - flush!(A) - return LinearAlgebra.norm(A.cscmatrix, p) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and calculate opnorm from cscmatrix -""" -function LinearAlgebra.opnorm(A::ExtendableSparseMatrix, p::Real = 2) - flush!(A) - return LinearAlgebra.opnorm(A.cscmatrix, p) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and calculate cond from cscmatrix -""" -function LinearAlgebra.cond(A::ExtendableSparseMatrix, p::Real = 2) - flush!(A) - return LinearAlgebra.cond(A.cscmatrix, p) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and check for symmetry of cscmatrix -""" -function LinearAlgebra.issymmetric(A::ExtendableSparseMatrix) - flush!(A) - return LinearAlgebra.issymmetric(A.cscmatrix) -end - -""" -$(SIGNATURES) - -Add SparseMatrixCSC matrix and [`ExtendableSparseMatrix`](@ref) ext. -""" -function Base.:+(ext::ExtendableSparseMatrix, csc::SparseMatrixCSC) - flush!(ext) - return ext.cscmatrix + csc -end - -function Base.:+(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix) - flush!(A) - flush!(B) - return ExtendableSparseMatrix(A.cscmatrix + B.cscmatrix) -end - -function Base.:-(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix) - flush!(A) - flush!(B) - return ExtendableSparseMatrix(A.cscmatrix - B.cscmatrix) -end - -function Base.:*(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix) - flush!(A) - flush!(B) - return ExtendableSparseMatrix(A.cscmatrix * B.cscmatrix) -end - -""" -$(SIGNATURES) -""" -function Base.:*(d::Diagonal, ext::ExtendableSparseMatrix) - flush!(ext) - return ExtendableSparseMatrix(d * ext.cscmatrix) -end - -""" -$(SIGNATURES) -""" -function Base.:*(ext::ExtendableSparseMatrix, d::Diagonal) - flush!(ext) - return ExtendableSparseMatrix(ext.cscmatrix * d) -end - -""" -$(SIGNATURES) - -Subtract SparseMatrixCSC matrix from [`ExtendableSparseMatrix`](@ref) ext. +Reset ExtenableSparseMatrix into state similar to that after creation. """ -function Base.:-(ext::ExtendableSparseMatrix, csc::SparseMatrixCSC) - flush!(ext) - return ext.cscmatrix - csc +function reset!(A::ExtendableSparseMatrix) + A.cscmatrix=spzeros(size(A)...) + A.lnkmatrix=nothing end -""" -$(SIGNATURES) - -Subtract [`ExtendableSparseMatrix`](@ref) ext from SparseMatrixCSC. -""" -function Base.:-(csc::SparseMatrixCSC, ext::ExtendableSparseMatrix) - flush!(ext) - return csc - ext.cscmatrix -end -""" -$(SIGNATURES) -""" -function SparseArrays.dropzeros!(ext::ExtendableSparseMatrix) - flush!(ext) - dropzeros!(ext.cscmatrix) -end """ $(SIGNATURES) @@ -652,19 +361,3 @@ function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti} end -function mark_dirichlet(A::ExtendableSparseMatrix;penalty=1.0e20) - flush!(A) - mark_dirichlet(A.cscmatrix;penalty) -end - -function eliminate_dirichlet(A::ExtendableSparseMatrix,dirichlet) - flush!(A) - ExtendableSparseMatrix(eliminate_dirichlet(A.cscmatrix,dirichlet)) -end - -function eliminate_dirichlet!(A::ExtendableSparseMatrix,dirichlet) - flush!(A) - eliminate_dirichlet!(A.cscmatrix,dirichlet) - A -end - From 94250d1ecd70ce027936eabebbdd21223f8a891f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Thu, 23 May 2024 11:11:52 +0200 Subject: [PATCH 22/44] add Experimental tests to CI --- test/ExperimentalDict.jl | 1 - test/ExperimentalParallelDict.jl | 1 - test/runtests.jl | 12 ++++++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/test/ExperimentalDict.jl b/test/ExperimentalDict.jl index a8b0375..7bb8211 100644 --- a/test/ExperimentalDict.jl +++ b/test/ExperimentalDict.jl @@ -1,7 +1,6 @@ module ExperimentalDict using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental -using DocStringExtensions using BenchmarkTools using Test diff --git a/test/ExperimentalParallelDict.jl b/test/ExperimentalParallelDict.jl index 96d2d95..6d817a0 100644 --- a/test/ExperimentalParallelDict.jl +++ b/test/ExperimentalParallelDict.jl @@ -1,7 +1,6 @@ module ExperimentalParallelDict using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental -using DocStringExtensions using BenchmarkTools using Test diff --git a/test/runtests.jl b/test/runtests.jl index 4d7ae07..1c63f1c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,6 +8,18 @@ using BenchmarkTools using MultiFloats using ForwardDiff +@testset "ExperimentalDict" begin + include("ExperimentalDict.jl") + ExperimentalDict.test_correctness_build(100) +end + +@testset "ExperimentalParallelDict" begin + include("ExperimentalParallelDict.jl") + ExperimentalParallelDict.test_correctness_update(200) + ExperimentalParallelDict.test_correctness_build(200) + ExperimentalParallelDict.test_correctness_mul(200) +end + @testset "Constructors" begin include("test_constructors.jl") end @testset "Copy-Methods" begin include("test_copymethods.jl") end From e6887fc5576b13b24800051b976225baee033b3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Thu, 23 May 2024 11:41:16 +0200 Subject: [PATCH 23/44] add ExperimentalParallelLocking to tests --- src/matrix/extendable.jl | 6 +++- ...llel.jl => ExperimentalParallelLocking.jl} | 33 +++++++------------ test/runtests.jl | 15 +++++++++ 3 files changed, 32 insertions(+), 22 deletions(-) rename test/{ExperimentalParallel.jl => ExperimentalParallelLocking.jl} (85%) diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl index a655c12..572227e 100644 --- a/src/matrix/extendable.jl +++ b/src/matrix/extendable.jl @@ -30,7 +30,11 @@ mutable struct Locking locking::Bool end -const locking=Locking(true) +# +# Locking functionality just for developing parallelization. +# To be removed before merging into main branch. +# +const locking=Locking(false) function with_locking!(l::Bool) global locking diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallelLocking.jl similarity index 85% rename from test/ExperimentalParallel.jl rename to test/ExperimentalParallelLocking.jl index fa42d1d..4ad7ff2 100644 --- a/test/ExperimentalParallel.jl +++ b/test/ExperimentalParallelLocking.jl @@ -1,6 +1,8 @@ +module ExperimentalParallelLocking + using ExtendableSparse,SparseArrays +using ExtendableSparse: with_locking! using ExtendableSparse.Experimental -using DocStringExtensions using BenchmarkTools using Test @@ -11,6 +13,7 @@ Test correctness of parallel assembly on NxN grid during update phase, assuming that the structure has been assembled. """ function test_correctness_update(N) + with_locking!(true) X=1:N Y=1:N A=ExtendableSparseMatrix(N^2,N^2) @@ -29,6 +32,7 @@ function test_correctness_update(N) partassemble!(A,X,Y, np) @test nonzeros(A)≈nz end + with_locking!(false) end """ @@ -38,6 +42,7 @@ Test correctness of parallel assembly on NxN grid during build phase, assuming that no structure has been assembled. """ function test_correctness_build(N) + with_locking!(true) X=1:N Y=1:N allnp=[4,5,6,7,8] @@ -52,20 +57,10 @@ function test_correctness_build(N) partassemble!(A,X,Y, np) @test nonzeros(A)≈nz end + with_locking!(false) end -@testset "update correctness" begin - test_correctness_update(50) - test_correctness_update(100) - test_correctness_update(rand(30:200)) -end - -@testset "build correctness" begin - test_correctness_build(50) - test_correctness_build(100) - test_correctness_build(rand(30:200)) -end """ speedup_update(N) @@ -74,6 +69,7 @@ Benchmark parallel speedup of update phase of parallel assembly on NxN grid. Check for correctness as well. """ function speedup_update(N; allnp=[4,5,6,7,8,9,10]) + with_locking!(true) X=1:N Y=1:N A=ExtendableSparseMatrix(N^2,N^2) @@ -90,18 +86,10 @@ function speedup_update(N; allnp=[4,5,6,7,8,9,10]) @assert nonzeros(A)≈nz push!(result,(np,round(t0/t,digits=2))) end + with_locking!(false) result end -""" - reset!(A) - -Reset ExtenableSparseMatrix into state similar to that after creation. -""" -function ExtendableSparse.reset!(A::ExtendableSparseMatrix) - A.cscmatrix=spzeros(size(A)...) - A.lnkmatrix=nothing -end """ speedup_build(N) @@ -112,6 +100,7 @@ Check for correctness as well. Works in the moment with locking. """ function speedup_build(N; allnp=[4,5,6,7,8,9,10]) + with_locking!(true) X=1:N Y=1:N A=ExtendableSparseMatrix(N^2,N^2) @@ -133,5 +122,7 @@ function speedup_build(N; allnp=[4,5,6,7,8,9,10]) @assert nonzeros(A)≈nz push!(result,(np,round(t0/t,digits=2))) end + with_locking!(false) result end +end diff --git a/test/runtests.jl b/test/runtests.jl index 1c63f1c..e7dc9d7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,6 +8,21 @@ using BenchmarkTools using MultiFloats using ForwardDiff + +@testset "ExperimentalParallelLocking" begin + include("ExperimentalParallelLocking.jl") + @testset "update correctness" begin + ExperimentalParallelLocking.test_correctness_update(50) + ExperimentalParallelLocking.test_correctness_update(100) + ExperimentalParallelLocking.test_correctness_update(rand(30:200)) + end + + @testset "build correctness" begin + ExperimentalParallelLocking.test_correctness_build(50) + ExperimentalParallelLocking.test_correctness_build(100) + ExperimentalParallelLocking.test_correctness_build(rand(30:200)) + end +end @testset "ExperimentalDict" begin include("ExperimentalDict.jl") ExperimentalDict.test_correctness_build(100) From 58fbb32d8ae5341bf7fdd5b9f4939adae0f854a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Sun, 26 May 2024 19:57:46 +0200 Subject: [PATCH 24/44] Generic extendable sparse matrix structs for parallel, scalar --- src/experimental/Experimental.jl | 25 +- .../extendablesparsematrixdict.jl | 101 ---- .../extendablesparsematrixparallel.jl | 159 ++++++ .../extendablesparsematrixparalleldict.jl | 194 -------- .../extendablesparsematrixscalar.jl | 75 +++ src/experimental/parallel_testtools.jl | 4 +- src/experimental/sparsematrixdict.jl | 82 +++- src/experimental/sparsematrixlnkdict.jl | 452 ++++++++++++++++++ ...mental_rect.jl => ExperimentalParallel.jl} | 0 9 files changed, 782 insertions(+), 310 deletions(-) delete mode 100644 src/experimental/extendablesparsematrixdict.jl create mode 100644 src/experimental/extendablesparsematrixparallel.jl delete mode 100644 src/experimental/extendablesparsematrixparalleldict.jl create mode 100644 src/experimental/extendablesparsematrixscalar.jl create mode 100644 src/experimental/sparsematrixlnkdict.jl rename test/{experimental_rect.jl => ExperimentalParallel.jl} (100%) diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl index e9e64e9..37a34d1 100644 --- a/src/experimental/Experimental.jl +++ b/src/experimental/Experimental.jl @@ -41,13 +41,34 @@ export reorderlinsys, nnz_noflush include("sparsematrixdict.jl") export SparseMatrixDict -include("extendablesparsematrixdict.jl") +include("sparsematrixlnkdict.jl") +export SparseMatrixLNKDict + +include("extendablesparsematrixscalar.jl") +export ExtendableSparseMatrixScalar + +const ExtendableSparseMatrixDict{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixDict{Tv,Ti},Tv,Ti} export ExtendableSparseMatrixDict -include("extendablesparsematrixparalleldict.jl") + +const ExtendableSparseMatrixLNKDict{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNKDict{Tv,Ti},Tv,Ti} +export ExtendableSparseMatrixLNKDict + +const ExtendableSparseMatrixLNK{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNK{Tv,Ti},Tv,Ti} +export ExtendableSparseMatrixLNK + + +include("extendablesparsematrixparallel.jl") +const ExtendableSparseMatrixParallelDict{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixDict{Tv,Ti},Tv,Ti} +ExtendableSparseMatrixParallelDict(m,n,p)= ExtendableSparseMatrixParallelDict{Float64,Int64}(m,n,p) export ExtendableSparseMatrixParallelDict, partcolors! +const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixLNKDict{Tv,Ti},Tv,Ti} +ExtendableSparseMatrixParallelLNKDict(m,n,p)= ExtendableSparseMatrixParallelLNKDict{Float64,Int64}(m,n,p) +export ExtendableSparseMatrixParallelLNKDict, partcolors! + + include("parallel_testtools.jl") export part2d, showgrid, partassemble!, assemblepartition! diff --git a/src/experimental/extendablesparsematrixdict.jl b/src/experimental/extendablesparsematrixdict.jl deleted file mode 100644 index 6641e5a..0000000 --- a/src/experimental/extendablesparsematrixdict.jl +++ /dev/null @@ -1,101 +0,0 @@ -mutable struct ExtendableSparseMatrixDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} - """ - Final matrix data - """ - cscmatrix::SparseMatrixCSC{Tv, Ti} - - """ - Vector of dictionaries for new entries - """ - dictmatrix::SparseMatrixDict{Tv,Ti} -end - - -function ExtendableSparseMatrixDict{Tv, Ti}(n::Integer,m::Integer) where{Tv, Ti<:Integer} - ExtendableSparseMatrixDict(spzeros(Tv, Ti, m, n), - SparseMatrixDict{Tv,Ti}(m,n) - ) -end - -ExtendableSparseMatrixDict(n::Integer,m::Integer)=ExtendableSparseMatrixDict{Float64,Int}(n,m) - -function reset!(ext::ExtendableSparseMatrixDict{Tv,Ti}) where {Tv,Ti} - m,n=size(ext.cscmatrix) - ext.cscmatrix=spzeros(Tv, Ti, m, n) - ext.dictmatrix=SparseMatrixDict{Tv,Ti}(m,n) - ext -end - - -function flush!(ext::ExtendableSparseMatrixDict{Tv,Ti}) where{Tv,Ti} - lnew=length(ext.dictmatrix.values) - if lnew>0 - (;colptr,nzval,rowval,m,n)=ext.cscmatrix - l=lnew+nnz(ext.cscmatrix) - I=Vector{Ti}(undef,l) - J=Vector{Ti}(undef,l) - V=Vector{Tv}(undef,l) - i=1 - for icsc=1:length(colptr)-1 - for j=colptr[icsc]:colptr[icsc+1]-1 - I[i]=icsc - J[i]=rowval[j] - V[i]=nzval[j] - i=i+1 - end - end - - for (p,v) in ext.dictmatrix.values - I[i]=first(p) - J[i]=last(p) - V[i]=v - i=i+1 - end - - ext.dictmatrix=SparseMatrixDict{Tv,Ti}(m,n) - ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+) - end - ext -end - -function SparseArrays.sparse(ext::ExtendableSparseMatrixDict) - flush!(ext) - ext.cscmatrix -end - -function Base.setindex!(ext::ExtendableSparseMatrixDict{Tv, Ti}, - v::Union{Number,AbstractVecOrMat}, - i::Integer, - j::Integer) where {Tv, Ti} - k = findindex(ext.cscmatrix, i, j) - if k > 0 - ext.cscmatrix.nzval[k] = v - else - setindex!(ext.dictmatrix,v,i,j) - end -end - - -function Base.getindex(ext::ExtendableSparseMatrixDict{Tv, Ti}, - i::Integer, - j::Integer) where {Tv, Ti <: Integer} - k = findindex(ext.cscmatrix, i, j) - if k > 0 - ext.cscmatrix.nzval[k] - else - getindex(ext.dictmatrix,i,j) - end -end - -function rawupdateindex!(ext::ExtendableSparseMatrixDict{Tv, Ti}, - op, - v, - i, - j) where {Tv, Ti <: Integer} - k = findindex(ext.cscmatrix, i, j) - if k > 0 - ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) - else - rawupdateindex!(ext.dictmatrix,op,v,i,j) - end -end diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl new file mode 100644 index 0000000..49566b1 --- /dev/null +++ b/src/experimental/extendablesparsematrixparallel.jl @@ -0,0 +1,159 @@ +mutable struct ExtendableSparseMatrixXParallel{Tm, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} + """ + Final matrix data + """ + cscmatrix::SparseMatrixCSC{Tv, Ti} + + """ + Vector of dictionaries for new entries + """ + xmatrices::Vector{Tm} + + nodeparts::Vector{Ti} + partnodes::Vector{Vector{Ti}} + colparts::Vector{Vector{Ti}} +end + + +function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m,p::Integer) where{Tm, Tv, Ti} + ExtendableSparseMatrixXParallel(spzeros(Tv, Ti, m, n), + [Tm(m,n) for i=1:p], + zeros(Ti,n), + Vector{Ti}[], + Vector{Ti}[] + ) +end + +function partcolors!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}, partcolors) where {Tm, Tv, Ti} + ncol=maximum(partcolors) + colparts=[Ti[] for i=1:ncol] + for i=1:length(partcolors) + push!(colparts[partcolors[i]],i) + end + ext.colparts=colparts + ext +end + +function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m, pc::Vector) where{Tm, Tv, Ti} + ext=ExtendableSparseMatrixXParallel(m,n,length(pc)) + partcolors!(ext,pc) +end + + +function reset!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti},p::Integer) where {Tm,Tv,Ti} + m,n=size(ext.cscmatrix) + ext.cscmatrix=spzeros(Tv, Ti, m, n) + ext.xmatrices=[Tm(m,n) for i=1:p] + ext.nodeparts.=zero(Ti) + ext +end + +function reset!(ext::ExtendableSparseMatrixXParallel) + reset!(ext,length(ext.xmatrices)) +end + +function reset!(ext::ExtendableSparseMatrixXParallel,pc::Vector) + reset!(ext,length(pc)) + partcolors!(ext,pc) +end + +function flush!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}) where{Tm,Tv,Ti} + ext.cscmatrix=sum!(ext.nodeparts, ext.xmatrices, ext.cscmatrix) + np=length(ext.xmatrices) + (m,n)=size(ext.cscmatrix) + ext.xmatrices=[Tm(m,n) for i=1:np] + + npts::Vector{Ti}=ext.nodeparts + pn=zeros(Ti,np) + for i=1:n + npi=npts[i] + if npi>0 + pn[npi]+=1 + end + end + partnodes=[zeros(Int,pn[i]) for i=1:np] + pn.=1 + for i=1:n + npi=ext.nodeparts[i] + if npi>0 + partnodes[npi][pn[npi]]=i + pn[npi]+=1 + end + end + ext.partnodes=partnodes + ext +end + + +function SparseArrays.sparse(ext::ExtendableSparseMatrixXParallel) + flush!(ext) + ext.cscmatrix +end + + + +function Base.setindex!(ext::ExtendableSparseMatrixXParallel, + v::Union{Number,AbstractVecOrMat}, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = v + else + error("use rawupdateindex! for new entries into ExtendableSparseMatrixXParallel") + end +end + + +function Base.getindex(ext::ExtendableSparseMatrixXParallel, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + return ext.cscmatrix.nzval[k] + elseif sum(nnz,ext.xmatrices) == 0 + return zero(eltype(ext.cscmatrix)) + else + error("flush! ExtendableSparseMatrixXParallel before using getindex") + end +end + +function rawupdateindex!(ext::ExtendableSparseMatrixXParallel, + op, + v, + i, + j, + tid) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + rawupdateindex!(ext.xmatrices[tid],op,v,i,j) + end +end + +function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x) + A=ext.cscmatrix + colparts=ext.colparts + partnodes=ext.partnodes + rows = SparseArrays.rowvals(A) + vals = nonzeros(A) + + r.=zero(Tv) + m,n=size(A) + for icol=1:length(colparts) + part=colparts[icol] + @tasks for ip=1:length(part) + @inbounds begin + for j in partnodes[part[ip]] + for i in nzrange(A,j) + row = rows[i] + val = vals[i] + r[row]+=val*x[j] + end + end + end + end + end + r +end diff --git a/src/experimental/extendablesparsematrixparalleldict.jl b/src/experimental/extendablesparsematrixparalleldict.jl deleted file mode 100644 index 241964d..0000000 --- a/src/experimental/extendablesparsematrixparalleldict.jl +++ /dev/null @@ -1,194 +0,0 @@ -mutable struct ExtendableSparseMatrixParallelDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} - """ - Final matrix data - """ - cscmatrix::SparseMatrixCSC{Tv, Ti} - - """ - Vector of dictionaries for new entries - """ - dictmatrices::Vector{SparseMatrixDict{Tv,Ti}} - - nodeparts::Vector{Ti} - partnodes::Vector{Vector{Ti}} - colparts::Vector{Vector{Ti}} -end - - -function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,p::Integer) where{Tv, Ti} - ExtendableSparseMatrixParallelDict(spzeros(Tv, Ti, m, n), - [SparseMatrixDict{Tv,Ti}(m,n) for i=1:p], - zeros(Ti,n), - Vector{Ti}[], - Vector{Ti}[] - ) -end - -function partcolors!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, partcolors) where {Tv, Ti} - ncol=maximum(partcolors) - colparts=[Ti[] for i=1:ncol] - for i=1:length(partcolors) - push!(colparts[partcolors[i]],i) - end - ext.colparts=colparts - ext -end - -function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,pc::Vector) where{Tv, Ti} - ext=ExtendableSparseMatrixParallelDict(m,n,length(pc)) - partcolors!(ext,pc) -end - - -ExtendableSparseMatrixParallelDict(n,m,p)=ExtendableSparseMatrixParallelDict{Float64,Int}(n,m,p) - - -function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},p::Integer) where {Tv,Ti} - m,n=size(ext.cscmatrix) - ext.cscmatrix=spzeros(Tv, Ti, m, n) - ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:p] - ext.nodeparts.=zero(Ti) - ext -end - -function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where {Tv,Ti} - reset!(ext,length(ext.dictmatrices)) -end - -function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},pc::Vector) where {Tv,Ti} - reset!(ext,length(pc)) - partcolors!(ext,pc) -end - - -function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti} - lnew=sumlength(ext.dictmatrices) - if lnew>0 - (;colptr,nzval,rowval,m,n)=ext.cscmatrix - l=lnew+nnz(ext.cscmatrix) - I=Vector{Ti}(undef,l) - J=Vector{Ti}(undef,l) - V=Vector{Tv}(undef,l) - i=1 - - for icsc=1:length(colptr)-1 - for j=colptr[icsc]:colptr[icsc+1]-1 - I[i]=icsc - J[i]=rowval[j] - V[i]=nzval[j] - i=i+1 - end - end - - ip=1 - for m in ext.dictmatrices - for (p,v) in m.values - ext.nodeparts[last(p)]=ip - I[i]=first(p) - J[i]=last(p) - V[i]=v - i=i+1 - end - ip=ip+1 - end - - - np=length(ext.dictmatrices) - ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:np] - ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+) - - npts::Vector{Ti}=ext.nodeparts - pn=zeros(Ti,np) - for i=1:n - npi=npts[i] - if npi>0 - pn[npi]+=1 - end - end - partnodes=[zeros(Int,pn[i]) for i=1:np] - pn.=1 - for i=1:n - npi=ext.nodeparts[i] - if npi>0 - partnodes[npi][pn[npi]]=i - pn[npi]+=1 - end - end - ext.partnodes=partnodes - end - ext -end - -function SparseArrays.sparse(ext::ExtendableSparseMatrixParallelDict) - flush!(ext) - ext.cscmatrix -end - - - -function Base.setindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, - v::Union{Number,AbstractVecOrMat}, - i::Integer, - j::Integer) where {Tv, Ti} - k = findindex(ext.cscmatrix, i, j) - if k > 0 - ext.cscmatrix.nzval[k] = v - else - error("use rawupdateindex! for new entries into ExtendableSparseMatrixParallelDict") - end -end - - -function Base.getindex(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, - i::Integer, - j::Integer) where {Tv, Ti <: Integer} - k = findindex(ext.cscmatrix, i, j) - if k > 0 - return ext.cscmatrix.nzval[k] - elseif sumlength(ext.dictmatrices) == 0 - return zero(Tv) - else - error("flush! ExtendableSparseMatrixParallelDict before using getindex") - end -end - -function rawupdateindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti}, - op, - v, - i, - j, - tid) where {Tv, Ti <: Integer} - k = findindex(ext.cscmatrix, i, j) - if k > 0 - ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) - else - rawupdateindex!(ext.dictmatrices[tid],op,v,i,j) - end -end - -function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, x) where {Tv,Ti} - A=ext.cscmatrix - colparts=ext.colparts - partnodes=ext.partnodes - rows = rowvals(A) - vals = nonzeros(A) - - r.=zero(Tv) - m,n=size(A) - for icol=1:length(colparts) - part=colparts[icol] - @tasks for ip=1:length(part) - @inbounds begin - for j in partnodes[part[ip]] - for i in nzrange(A,j) - row = rows[i] - val = vals[i] - r[row]+=val*x[j] - end - end - end - end - end - r -end - diff --git a/src/experimental/extendablesparsematrixscalar.jl b/src/experimental/extendablesparsematrixscalar.jl new file mode 100644 index 0000000..d7fdc67 --- /dev/null +++ b/src/experimental/extendablesparsematrixscalar.jl @@ -0,0 +1,75 @@ +mutable struct ExtendableSparseMatrixScalar{Tm, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} + """ + Final matrix data + """ + cscmatrix::SparseMatrixCSC{Tv, Ti} + + """ + Matrix for new entries + """ + xmatrix::Tm +end + + +function ExtendableSparseMatrixScalar{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm, Tv, Ti<:Integer} + ExtendableSparseMatrixScalar(spzeros(Tv, Ti, m, n), + Tm(m,n) + ) +end + + +function reset!(ext::ExtendableSparseMatrixScalar{Tm,Tv,Ti}) where {Tm,Tv,Ti} + m,n=size(ext.cscmatrix) + ext.cscmatrix=spzeros(Tv, Ti, m, n) + ext.xmatrix=Tm(m,n) + ext +end + + +function flush!(ext::ExtendableSparseMatrixScalar{Tm,Tv,Ti}) where{Tm,Tv,Ti} + ext.cscmatrix=ext.xmatrix+ext.cscmatrix + ext.xmatrix=Tm(size(ext.cscmatrix)...) + ext +end + +function SparseArrays.sparse(ext::ExtendableSparseMatrixScalar) + flush!(ext) + ext.cscmatrix +end + +function Base.setindex!(ext::ExtendableSparseMatrixScalar, + v::Union{Number,AbstractVecOrMat}, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = v + else + setindex!(ext.xmatrix,v,i,j) + end +end + + +function Base.getindex(ext::ExtendableSparseMatrixScalar, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] + else + getindex(ext.xmatrix,i,j) + end +end + +function rawupdateindex!(ext::ExtendableSparseMatrixScalar, + op, + v, + i, + j) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + rawupdateindex!(ext.xmatrix,op,v,i,j) + end +end diff --git a/src/experimental/parallel_testtools.jl b/src/experimental/parallel_testtools.jl index 4f99283..fd261cb 100644 --- a/src/experimental/parallel_testtools.jl +++ b/src/experimental/parallel_testtools.jl @@ -179,7 +179,7 @@ function partassemble!(A,X,Y,nt=1;d=0.1) lindexes=LinearIndices((1:Nx,1:Ny)) if nt==1 - assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d) + assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d) else p=part2d(X,Y,nt) for icol=1:length(p) @@ -192,7 +192,7 @@ function partassemble!(A,X,Y,nt=1;d=0.1) end -function partassemble!(A::ExtendableSparseMatrixParallelDict,X,Y,nt=1;d=0.1, reset=true) +function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict},X,Y,nt=1;d=0.1, reset=true) Nx=length(X) Ny=length(Y) size(A,1)==Nx*Ny || error("incompatible size of A") diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl index 2288c89..666aa10 100644 --- a/src/experimental/sparsematrixdict.jl +++ b/src/experimental/sparsematrixdict.jl @@ -1,3 +1,8 @@ +""" + $(TYPEDEF) + +Sparse matrix where entries are organized as dictionary. +""" mutable struct SparseMatrixDict{Tv,Ti} <: AbstractSparseMatrix{Tv,Ti} m::Ti n::Ti @@ -24,26 +29,81 @@ end Base.size(m::SparseMatrixDict)=(m.m,m.n) -flush!(m::SparseMatrixDict)=nothing - -sumlength(mv::Vector{SparseMatrixDict{Tv,Ti}}) where{Tv,Ti}=sum(m->length(m.values),mv) - -function SparseArrays.sparse(mv::Vector{SparseMatrixDict{Tv,Ti}}) where {Tv,Ti} - l=sumlength(mv) +function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} + l=length(m.values) I=Vector{Ti}(undef,l) J=Vector{Ti}(undef,l) V=Vector{Tv}(undef,l) i=1 - for m in mv - for (p,v) in m.values + for (p,v) in m.values + I[i]=first(p) + J[i]=last(p) + V[i]=v + i=i+1 + end + SparseArrays.sparse!(I,J,V,size(mv[1])...,+) +end + +function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} + lnew=length(dictmatrix.values) + if lnew>0 + (;colptr,nzval,rowval,m,n)=cscmatrix + l=lnew+nnz(cscmatrix) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + + for (p,v) in dictmatrix.values I[i]=first(p) J[i]=last(p) V[i]=v i=i+1 end + return SparseArrays.sparse!(I,J,V,m,n,+) end - SparseArrays.sparse!(I,J,V,size(mv[1])...,+) + cscmatrix end - -SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} = sparse([m]) +function sum!(nodeparts, dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} + lnew=sum(m->length(m.values),dictmatrices) + if lnew>0 + (;colptr,nzval,rowval,m,n)=cscmatrix + l=lnew+nnz(cscmatrix) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + + ip=1 + for m in dictmatrices + for (p,v) in m.values + nodeparts[last(p)]=ip + I[i]=first(p) + J[i]=last(p) + V[i]=v + i=i+1 + end + ip=ip+1 + end + return SparseArrays.sparse!(I,J,V,m,n,+) + end + return cscmatrix +end diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl new file mode 100644 index 0000000..07f25dc --- /dev/null +++ b/src/experimental/sparsematrixlnkdict.jl @@ -0,0 +1,452 @@ +""" + $(TYPEDEF) + +Modification of SparseMatrixLNK where the pointer to first index of +column j is stored in a dictionary. +""" +mutable struct SparseMatrixLNKDict{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti} + """ + Number of rows + """ + m::Ti + + """ + Number of columns + """ + n::Ti + + """ + Number of nonzeros + """ + nnz::Ti + + """ + Length of arrays + """ + nentries::Ti + + """ + Linked list of column entries. Initial length is n, + it grows with each new entry. + + colptr[index] contains the next + index in the list or zero, in the later case terminating the list which + starts at index 1<=j<=n for each column j. + """ + colptr::Vector{Ti} + + """ + Dictionary to store start indices of columns + """ + colstart::Dict{Ti,Ti} + + """ + Row numbers. For each index it contains the zero (initial state) + or the row numbers corresponding to the column entry list in colptr. + """ + rowval::Vector{Ti} + + """ + Nonzero entry values correspondin to each pair + (colptr[index],rowval[index]) + """ + nzval::Vector{Tv} +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +function SparseMatrixLNKDict{Tv, Ti}(m, n) where {Tv, Ti <: Integer} + SparseMatrixLNKDict{Tv, Ti}(m, n, 0, 0, zeros(Ti,10), Dict{Ti,Ti}(), zeros(Ti,10), zeros(Ti,10)) +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +function SparseMatrixLNKDict(valuetype::Type{Tv}, indextype::Type{Ti}, m, + n) where {Tv, Ti <: Integer} + SparseMatrixLNKDict{Tv, Ti}(m, n) +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +SparseMatrixLNKDict(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixLNKDict(Tv, Int, m, n) + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +SparseMatrixLNKDict(m, n) = SparseMatrixLNKDict(Float64, m, n) + +""" +$(SIGNATURES) + +Constructor from SparseMatrixCSC. + +""" +function SparseMatrixLNKDict(csc::SparseArrays.SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: + Integer} + lnk = SparseMatrixLNKDict{Tv, Ti}(csc.m, csc.n) + for j = 1:(csc.n) + for k = csc.colptr[j]:(csc.colptr[j + 1] - 1) + lnk[csc.rowval[k], j] = csc.nzval[k] + end + end + lnk +end + +function findindex(lnk::SparseMatrixLNKDict, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + k = get(lnk.colstart, j, 0) + if k==0 + return 0,0 + end + k0 = k + while k > 0 + if lnk.rowval[k] == i + return k, 0 + end + k0 = k + k = lnk.colptr[k] + end + return 0, k0 +end + +""" +$(SIGNATURES) + +Return value stored for entry or zero if not found +""" +function Base.getindex(lnk::SparseMatrixLNKDict{Tv, Ti}, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k == 0 + return zero(Tv) + else + return lnk.nzval[k] + end +end + +function addentry!(lnk::SparseMatrixLNKDict, i, j, k, k0) + # increase number of entries + lnk.nentries += 1 + if length(lnk.nzval) < lnk.nentries + newsize = Int(ceil(5.0 * lnk.nentries / 4.0)) + resize!(lnk.nzval, newsize) + resize!(lnk.rowval, newsize) + resize!(lnk.colptr, newsize) + end + + if k0==0 + lnk.colstart[j]=lnk.nentries + end + + # Append entry if not found + lnk.rowval[lnk.nentries] = i + + # Shift the end of the list + lnk.colptr[lnk.nentries] = 0 + + if k0>0 + lnk.colptr[k0] = lnk.nentries + end + + # Update number of nonzero entries + lnk.nnz += 1 + return lnk.nentries +end + +""" +$(SIGNATURES) + +Update value of existing entry, otherwise extend matrix if v is nonzero. +""" +function Base.setindex!(lnk::SparseMatrixLNKDict, v, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = v + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = v + end + return lnk +end + +""" +$(SIGNATURES) + +Update element of the matrix with operation `op`. +It assumes that `op(0,0)==0`. If `v` is zero, no new +entry is created. +""" +function updateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +""" +$(SIGNATURES) + +Update element of the matrix with operation `op`. +It assumes that `op(0,0)==0`. If `v` is zero a new entry +is created nevertheless. +""" +function rawupdateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + else + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +""" +$(SIGNATURES) + +Return tuple containing size of the matrix. +""" +Base.size(lnk::SparseMatrixLNKDict) = (lnk.m, lnk.n) + +""" +$(SIGNATURES) + +Return number of nonzero entries. +""" +SparseArrays.nnz(lnk::SparseMatrixLNKDict) = lnk.nnz + +""" +$(SIGNATURES) + +Dummy flush! method for SparseMatrixLNKDict. Just +used in test methods +""" +function flush!(lnk::SparseMatrixLNKDict{Tv, Ti}) where {Tv, Ti} + return lnk +end + +""" + $(SIGNATURES) +Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V. +""" +function add_via_COO(lnk::SparseMatrixLNKDict{Tv, Ti}, + csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} + (;colptr,nzval,rowval,m,n)=csc + l=nnz(lnk)+nnz(csc) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + for (j,k) in lnk.colstart + while k>0 + I[i]=lnk.rowval[k] + J[i]=j + V[i]=lnk.nzval[k] + k=lnk.colptr[k] + i=i+1 + end + end + return SparseArrays.sparse!(I,J,V,m,n,+) +end + + +""" + $(SIGNATURES) +Add lnk and csc without creation of intermediate data. +""" +function add_directly(lnk::SparseMatrixLNKDict{Tv, Ti}, + csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} + @assert(csc.m==lnk.m) + @assert(csc.n==lnk.n) + + # overallocate arrays in order to avoid + # presumably slower push! + xnnz = nnz(csc) + nnz(lnk) + colptr = Vector{Ti}(undef, csc.n + 1) + rowval = Vector{Ti}(undef, xnnz) + nzval = Vector{Tv}(undef, xnnz) + + # Detect the maximum column length of lnk + lnk_maxcol = 0 + for (j,k) in lnk.colstart + lcol = zero(Ti) + while k > 0 + lcol += 1 + k = lnk.colptr[k] + end + lnk_maxcol = max(lcol, lnk_maxcol) + end + + # pre-allocate column data + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol] + + inz = 1 # counts the nonzero entries in the new matrix + + in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1]) + + in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col) + + # loop over all columns + for j = 1:(csc.n) + # Copy extension entries into col and sort them + k = get(lnk.colstart, j, 0) + l_lnk_col = 0 + while k > 0 + if lnk.rowval[k] > 0 + l_lnk_col += 1 + col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k]) + end + k = lnk.colptr[k] + end + sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward) + + # jointly sort lnk and csc entries into new matrix data + # this could be replaced in a more transparent manner by joint sorting: + # make a joint array for csc and lnk col, sort them. + # Will this be faster? + + colptr[j] = inz + jlnk = one(Ti) # counts the entries in col + jcsc = csc.colptr[j] # counts entries in csc + + while true + if in_csc_col(jcsc, j) && + (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval || + !in_lnk_col(jlnk, l_lnk_col)) + # Insert entries from csc into new structure + rowval[inz] = csc.rowval[jcsc] + nzval[inz] = csc.nzval[jcsc] + jcsc += 1 + inz += 1 + elseif in_csc_col(jcsc, j) && + (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval) + # Add up entries from csc and lnk + rowval[inz] = csc.rowval[jcsc] + nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval + jcsc += 1 + inz += 1 + jlnk += 1 + elseif in_lnk_col(jlnk, l_lnk_col) + # Insert entries from lnk res. col into new structure + rowval[inz] = col[jlnk].rowval + nzval[inz] = col[jlnk].nzval + jlnk += 1 + inz += 1 + else + break + end + end + end + colptr[csc.n + 1] = inz + resize!(rowval, inz - 1) + resize!(nzval, inz - 1) + SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval) +end + + + +""" + $(SIGNATURES) + +Add SparseMatrixCSC matrix and [`SparseMatrixLNKDict`](@ref) lnk, returning a SparseMatrixCSC +""" +Base.:+(lnk::SparseMatrixLNKDict, csc::SparseMatrixCSC) = add_directly(lnk, csc) + +function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} + lnew=sum(nnz,lnkdictmatrices) + if lnew>0 + (;colptr,nzval,rowval,m,n)=cscmatrix + l=lnew+nnz(cscmatrix) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + + ip=1 + for lnk in lnkdictmatrices + for (j,k) in lnk.colstart + nodeparts[j]=ip + while k>0 + I[i]=lnk.rowval[k] + J[i]=j + V[i]=lnk.nzval[k] + k=lnk.colptr[k] + i=i+1 + end + end + ip=ip+1 + end + return SparseArrays.sparse!(I,J,V,m,n,+) + end + return cscmatrix +end + + + +""" +$(SIGNATURES) + +Constructor from SparseMatrixLNKDict. + +""" +function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixLNKDict)::SparseMatrixCSC + csc = spzeros(lnk.m, lnk.n) + lnk + csc +end + +function SparseArrays.sparse(lnk::SparseMatrixLNKDict) + lnk + spzeros(lnk.m, lnk.n) +end + +function Base.copy(S::SparseMatrixLNKDict) + SparseMatrixLNKDict(size(S, 1), + size(S, 2), + S.nnz, + S.nentries, + copy(S.colptr), + copy(S.colstart), + copy(S.rowvals), + copy(S.nzval)) +end diff --git a/test/experimental_rect.jl b/test/ExperimentalParallel.jl similarity index 100% rename from test/experimental_rect.jl rename to test/ExperimentalParallel.jl From db51b63cedaf918171dd534bd8c0d1de972729a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Sun, 26 May 2024 20:11:24 +0200 Subject: [PATCH 25/44] tests using generic extendable code --- .../extendablesparsematrixparallel.jl | 2 +- src/matrix/abstractextendable.jl | 7 ++- test/ExperimentalParallel.jl | 58 +++++++++++++------ ...erimentalDict.jl => ExperimentalScalar.jl} | 19 ++---- ...rallelDict.jl => ExperimentalXParallel.jl} | 28 ++++----- test/runtests.jl | 37 +++++++----- 6 files changed, 89 insertions(+), 62 deletions(-) rename test/{ExperimentalDict.jl => ExperimentalScalar.jl} (59%) rename test/{ExperimentalParallelDict.jl => ExperimentalXParallel.jl} (80%) diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl index 49566b1..2855855 100644 --- a/src/experimental/extendablesparsematrixparallel.jl +++ b/src/experimental/extendablesparsematrixparallel.jl @@ -139,7 +139,7 @@ function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x) rows = SparseArrays.rowvals(A) vals = nonzeros(A) - r.=zero(Tv) + r.=zero(eltype(ext)) m,n=size(A) for icol=1:length(colparts) part=colparts[icol] diff --git a/src/matrix/abstractextendable.jl b/src/matrix/abstractextendable.jl index 589376c..dae94bb 100644 --- a/src/matrix/abstractextendable.jl +++ b/src/matrix/abstractextendable.jl @@ -1,9 +1,10 @@ """ -Must implement: -sparse -Constructor from SparseMatrixCSC +Subtypes must implement: +- SparseArrays.sparse (may be should be sparse! ?) flush+return SparseMatrixCSC +- Constructor from SparseMatrixCSC rawupdateindex! +reset!: empty all internals, just keep size """ abstract type AbstractExtendableSparseMatrix{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallel.jl index 367e1ac..4800281 100644 --- a/test/ExperimentalParallel.jl +++ b/test/ExperimentalParallel.jl @@ -1,7 +1,9 @@ +module ExperimentalParallel + using ExtendableSparse,SparseArrays using ExtendableSparse.Experimental -using DocStringExtensions using BenchmarkTools +using OhMyThreads: @tasks using Test @@ -48,40 +50,60 @@ function test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10) end -function speedup_build_ESMP(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10]) +function speedup_build(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10]) m = n lindexes = LinearIndices((1:n,1:m)) X = collect(1:n) #LinRange(0,1,n) Y = collect(1:n) #LinRange(0,1,m) - - ExtendableSparse.with_locking!(false) - A = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m) - t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A)) - ExtendableSparse.with_locking!(true) - mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti) + + A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m) + t0=@belapsed assemble_ESMP($A0, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) seconds=1 setup=(reset!($A0)) + result=[] for nt in allnp A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false) t=@belapsed assemble_ESMP($A, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) setup=(ExtendableSparse.reset!($A)) seconds=1 + @assert A.cscmatrix≈A0.cscmatrix push!(result,(nt,round(t0/t,digits=2))) end - # #update - # times_update = zeros(k) - # for i=1:k - # times_update[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=true) - # end + result + +end + + +function speedup_update(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10]) + m = n + lindexes = LinearIndices((1:n,1:m)) + X = collect(1:n) #LinRange(0,1,n) + Y = collect(1:n) #LinRange(0,1,m) + + mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti) + + A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m) + assemble_ESMP(A0, n-1, m-1, mat_cell_node, X, Y) + t0=@belapsed assemble_ESMP($A0, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) seconds=1 setup=(nonzeros($A0.cscmatrix).=0) + + + + result=[] + + for nt in allnp + A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false) + assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false) + t=@belapsed assemble_ESMP($A, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) setup=(nonzeros($A.cscmatrix).=0) seconds=1 + @assert A.cscmatrix≈A0.cscmatrix + push!(result,(nt,round(t0/t,digits=2))) + end - # @info "TIMES: MIN, AVG, MAX" - # info_minmax(times_build, "build ") - # info_minmax(times_update, "update") result end + """ `generate_rectangle_grid(lindexes, Ti)` @@ -126,7 +148,7 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell end for level=1:A.depth - Threads.@threads for tid=1:A.nt + @tasks for tid=1:A.nt for cell in A.cellsforpart[(level-1)*A.nt+tid] assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, tid) end @@ -147,6 +169,7 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell end end + function assembleedge!(A::ExtendableSparseMatrixParallel{Tv, Ti},v,k,l,tid) where {Tv, Ti <: Integer} addtoentry!(A, k, k, tid, +v) addtoentry!(A, k, l, tid, -v) @@ -222,3 +245,4 @@ function assemblecell!(A::ExtendableSparseMatrix{Tv, Ti},n,m,mcn,X,Y,d,cell) whe A[ij10,ij10]+=v*d A[ij11,ij11]+=v*d end +end diff --git a/test/ExperimentalDict.jl b/test/ExperimentalScalar.jl similarity index 59% rename from test/ExperimentalDict.jl rename to test/ExperimentalScalar.jl index 7bb8211..58de771 100644 --- a/test/ExperimentalDict.jl +++ b/test/ExperimentalScalar.jl @@ -1,35 +1,28 @@ -module ExperimentalDict - +module ExperimentalScalar using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental using BenchmarkTools using Test -function ExtendableSparse.reset!(A::ExtendableSparseMatrix) - A.cscmatrix=spzeros(size(A)...) - A.lnkmatrix=nothing -end - - -function test_correctness_build(N) +function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}) X=1:N Y=1:N A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2) - A=ExtendableSparseMatrixDict{Float64,Int}(N^2,N^2) + A=Tm{Float64,Int}(N^2,N^2) partassemble!(A0,X,Y) partassemble!(A,X,Y) @test sparse(A0)≈sparse(A) end -function speed_build(N) +function speed_build(N,Tm::Type{<:AbstractSparseMatrix}) X=1:N Y=1:N A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2) - A=ExtendableSparseMatrixDict{Float64,Int}(N^2,N^2) + A=Tm{Float64,Int}(N^2,N^2) tlnk= @belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0)) tdict= @belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A)) - tdict/tlnk + tlnk/tdict end end diff --git a/test/ExperimentalParallelDict.jl b/test/ExperimentalXParallel.jl similarity index 80% rename from test/ExperimentalParallelDict.jl rename to test/ExperimentalXParallel.jl index 6d817a0..c6b0122 100644 --- a/test/ExperimentalParallelDict.jl +++ b/test/ExperimentalXParallel.jl @@ -1,14 +1,14 @@ -module ExperimentalParallelDict +module ExperimentalXParallel using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental using BenchmarkTools using Test -function test_correctness_update(N) +function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix}) X=1:N Y=1:N - A=ExtendableSparseMatrixParallelDict{Float64,Int}(N^2,N^2,1) + A=Tm{Float64,Int}(N^2,N^2,1) allnp=[4,5,6,7,8] # Assembele without partitioning @@ -32,7 +32,7 @@ end Test correctness of parallel assembly on NxN grid during build phase, assuming that no structure has been assembled. """ -function test_correctness_build(N) +function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}) X=1:N Y=1:N allnp=[4,5,6,7,8] @@ -43,27 +43,27 @@ function test_correctness_build(N) for np in allnp # Make a new matrix and assemble parallel. # this should result in the same nonzeros - A=ExtendableSparseMatrixParallelDict(N^2,N^2,1) + A=Tm(N^2,N^2,1) partassemble!(A,X,Y, np) @test nonzeros(A)≈nz end end -function test_correctness_mul(N; allnp=[4,5,6,7,8]) +function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8]) X=1:N Y=1:N A0=ExtendableSparseMatrix(N^2,N^2) partassemble!(A0,X,Y) for np in allnp - A=ExtendableSparseMatrixParallelDict(N^2,N^2,1) + A=Tm(N^2,N^2,1) partassemble!(A,X,Y,np) b=rand(N^2) @test A*b ≈ A0*b end end -function speedup_update(N; allnp=[4,5,6,7,8,9,10]) +function speedup_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10]) X=1:N Y=1:N A=ExtendableSparseMatrix(N^2,N^2) @@ -73,7 +73,7 @@ function speedup_update(N; allnp=[4,5,6,7,8,9,10]) # During setup, set matrix entries to zero while keeping the structure t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(nonzeros($A).=0) result=[] - A=ExtendableSparseMatrixParallelDict(N^2,N^2,1) + A=Tm(N^2,N^2,1) for np in allnp # Get the parallel timing # During setup, set matrix entries to zero while keeping the structure @@ -85,11 +85,11 @@ function speedup_update(N; allnp=[4,5,6,7,8,9,10]) result end -function speedup_build(N; allnp=[4,5,6,7,8,9,10]) +function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10]) X=1:N Y=1:N - A0=ExtendableSparseMatrixParallelDict(N^2,N^2,1) - A=ExtendableSparseMatrixParallelDict(N^2,N^2,1) + A0=ExtendableSparseMatrix(N^2,N^2) + A=Tm(N^2,N^2,1) partassemble!(A0,X,Y) nz=copy(nonzeros(A0)) reset!(A0) @@ -117,7 +117,7 @@ function speedup_build(N; allnp=[4,5,6,7,8,9,10]) result end -function speedup_mul(N; allnp=[4,5,6,7,8,9,10]) +function speedup_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10]) X=1:N Y=1:N @@ -128,7 +128,7 @@ function speedup_mul(N; allnp=[4,5,6,7,8,9,10]) result=[] for np in allnp - A=ExtendableSparseMatrixParallelDict(N^2,N^2,1) + A=Tm(N^2,N^2,1) partassemble!(A,X,Y,np) t=@belapsed $A*$b seconds=1 push!(result,(np,round(t0/t,digits=2))) diff --git a/test/runtests.jl b/test/runtests.jl index e7dc9d7..4be9f0f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using Test using LinearAlgebra using SparseArrays using ExtendableSparse +using ExtendableSparse.Experimental using Printf using BenchmarkTools @@ -12,27 +13,35 @@ using ForwardDiff @testset "ExperimentalParallelLocking" begin include("ExperimentalParallelLocking.jl") @testset "update correctness" begin - ExperimentalParallelLocking.test_correctness_update(50) - ExperimentalParallelLocking.test_correctness_update(100) - ExperimentalParallelLocking.test_correctness_update(rand(30:200)) + for N in [100,rand(30:200),500] + ExperimentalParallelLocking.test_correctness_update(N) + end end @testset "build correctness" begin - ExperimentalParallelLocking.test_correctness_build(50) - ExperimentalParallelLocking.test_correctness_build(100) - ExperimentalParallelLocking.test_correctness_build(rand(30:200)) + for N in [100,rand(30:200),500] + ExperimentalParallelLocking.test_correctness_build(N) + end end end -@testset "ExperimentalDict" begin - include("ExperimentalDict.jl") - ExperimentalDict.test_correctness_build(100) +@testset "ExperimentalScalar" begin + include("ExperimentalScalar.jl") + for Tm in [ExtendableSparseMatrixLNK,ExtendableSparseMatrixDict,ExtendableSparseMatrixLNKDict] + for N in [100,rand(30:200),500] + ExperimentalScalar.test_correctness_build(N,Tm) + end + end end -@testset "ExperimentalParallelDict" begin - include("ExperimentalParallelDict.jl") - ExperimentalParallelDict.test_correctness_update(200) - ExperimentalParallelDict.test_correctness_build(200) - ExperimentalParallelDict.test_correctness_mul(200) +@testset "ExperimentalXParallel" begin + include("ExperimentalXParallel.jl") + for Tm in [ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict] + for N in [100,rand(30:200),500] + ExperimentalXParallel.test_correctness_update(N,Tm) + ExperimentalXParallel.test_correctness_build(N,Tm) + ExperimentalXParallel.test_correctness_mul(N,Tm) + end + end end @testset "Constructors" begin include("test_constructors.jl") end From adf95be89a9aa35639e3f00519fa5762338a92e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Mon, 27 May 2024 15:52:05 +0200 Subject: [PATCH 26/44] Introduce AbstractSparseMatrixExtension - Remove parallel locking tests - Test ExperimentalSparseParallel --- src/ExtendableSparse.jl | 1 + src/experimental/Experimental.jl | 14 +- .../extendablesparsematrixparallel.jl | 4 +- .../extendablesparsematrixscalar.jl | 10 +- src/experimental/parallel_testtools.jl | 4 +- src/experimental/sparsematrixdict.jl | 4 +- src/experimental/sparsematrixlnkdict.jl | 27 +- src/experimental/sparsematrixlnkx.jl | 441 ++++++++++++++++++ src/matrix/abstractextension.jl | 28 ++ src/matrix/extendable.jl | 72 +-- src/matrix/sparsematrixlnk.jl | 2 +- test/ExperimentalParallel.jl | 22 +- test/ExperimentalParallelLocking.jl | 128 ----- test/ExperimentalScalar.jl | 6 +- test/ExperimentalXParallel.jl | 3 +- test/Project.toml | 1 + test/runtests.jl | 25 +- 17 files changed, 553 insertions(+), 239 deletions(-) create mode 100644 src/experimental/sparsematrixlnkx.jl create mode 100644 src/matrix/abstractextension.jl delete mode 100644 test/ExperimentalParallelLocking.jl diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 8ab64ba..39a8d19 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -22,6 +22,7 @@ using DocStringExtensions import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros include("matrix/sparsematrixcsc.jl") +include("matrix/abstractextension.jl") include("matrix/sparsematrixlnk.jl") include("matrix/abstractextendable.jl") include("matrix/extendable.jl") diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl index 37a34d1..162ed2f 100644 --- a/src/experimental/Experimental.jl +++ b/src/experimental/Experimental.jl @@ -4,7 +4,8 @@ using LinearAlgebra using SparseArrays: AbstractSparseMatrixCSC import SparseArrays: nonzeros, getcolptr,nzrange import ExtendableSparse: flush!, reset!, rawupdateindex!, findindex -using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash, AbstractExtendableSparseMatrix +using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash +using ExtendableSparse: AbstractExtendableSparseMatrix, AbstractSparseMatrixExtension using DocStringExtensions using Metis using Base.Threads @@ -41,6 +42,9 @@ export reorderlinsys, nnz_noflush include("sparsematrixdict.jl") export SparseMatrixDict +include("sparsematrixlnkx.jl") +export SparseMatrixLNKX + include("sparsematrixlnkdict.jl") export SparseMatrixLNKDict @@ -54,6 +58,9 @@ export ExtendableSparseMatrixDict const ExtendableSparseMatrixLNKDict{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNKDict{Tv,Ti},Tv,Ti} export ExtendableSparseMatrixLNKDict +const ExtendableSparseMatrixLNKX{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNKX{Tv,Ti},Tv,Ti} +export ExtendableSparseMatrixLNKX + const ExtendableSparseMatrixLNK{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNK{Tv,Ti},Tv,Ti} export ExtendableSparseMatrixLNK @@ -63,10 +70,13 @@ const ExtendableSparseMatrixParallelDict{Tv,Ti}=ExtendableSparseMatrixXParallel{ ExtendableSparseMatrixParallelDict(m,n,p)= ExtendableSparseMatrixParallelDict{Float64,Int64}(m,n,p) export ExtendableSparseMatrixParallelDict, partcolors! +const ExtendableSparseMatrixParallelLNKX{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixLNKX{Tv,Ti},Tv,Ti} +ExtendableSparseMatrixParallelLNKX(m,n,p)= ExtendableSparseMatrixParallelLNKX{Float64,Int64}(m,n,p) +export ExtendableSparseMatrixParallelLNKX const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixLNKDict{Tv,Ti},Tv,Ti} ExtendableSparseMatrixParallelLNKDict(m,n,p)= ExtendableSparseMatrixParallelLNKDict{Float64,Int64}(m,n,p) -export ExtendableSparseMatrixParallelLNKDict, partcolors! +export ExtendableSparseMatrixParallelLNKDict include("parallel_testtools.jl") diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl index 2855855..2f79985 100644 --- a/src/experimental/extendablesparsematrixparallel.jl +++ b/src/experimental/extendablesparsematrixparallel.jl @@ -1,4 +1,4 @@ -mutable struct ExtendableSparseMatrixXParallel{Tm, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} +mutable struct ExtendableSparseMatrixXParallel{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} """ Final matrix data """ @@ -15,7 +15,7 @@ mutable struct ExtendableSparseMatrixXParallel{Tm, Tv, Ti <: Integer} <: Abstrac end -function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m,p::Integer) where{Tm, Tv, Ti} +function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m,p::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti} ExtendableSparseMatrixXParallel(spzeros(Tv, Ti, m, n), [Tm(m,n) for i=1:p], zeros(Ti,n), diff --git a/src/experimental/extendablesparsematrixscalar.jl b/src/experimental/extendablesparsematrixscalar.jl index d7fdc67..887d275 100644 --- a/src/experimental/extendablesparsematrixscalar.jl +++ b/src/experimental/extendablesparsematrixscalar.jl @@ -1,4 +1,4 @@ -mutable struct ExtendableSparseMatrixScalar{Tm, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} +mutable struct ExtendableSparseMatrixScalar{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} """ Final matrix data """ @@ -11,7 +11,7 @@ mutable struct ExtendableSparseMatrixScalar{Tm, Tv, Ti <: Integer} <: AbstractEx end -function ExtendableSparseMatrixScalar{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm, Tv, Ti<:Integer} +function ExtendableSparseMatrixScalar{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti<:Integer} ExtendableSparseMatrixScalar(spzeros(Tv, Ti, m, n), Tm(m,n) ) @@ -27,8 +27,10 @@ end function flush!(ext::ExtendableSparseMatrixScalar{Tm,Tv,Ti}) where{Tm,Tv,Ti} - ext.cscmatrix=ext.xmatrix+ext.cscmatrix - ext.xmatrix=Tm(size(ext.cscmatrix)...) + if nnz(ext.xmatrix)>0 + ext.cscmatrix=ext.xmatrix+ext.cscmatrix + ext.xmatrix=Tm(size(ext.cscmatrix)...) + end ext end diff --git a/src/experimental/parallel_testtools.jl b/src/experimental/parallel_testtools.jl index fd261cb..16a67c7 100644 --- a/src/experimental/parallel_testtools.jl +++ b/src/experimental/parallel_testtools.jl @@ -192,7 +192,7 @@ function partassemble!(A,X,Y,nt=1;d=0.1) end -function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict},X,Y,nt=1;d=0.1, reset=true) +function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict,ExtendableSparseMatrixParallelLNKX},X,Y,nt=1;d=0.1, reset=true) Nx=length(X) Ny=length(Y) size(A,1)==Nx*Ny || error("incompatible size of A") @@ -201,7 +201,7 @@ function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSpa lindexes=LinearIndices((1:Nx,1:Ny)) if nt==1 reset!(A,1) - assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1) + assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1) else p,pc=colpart2d(X,Y,nt) if reset diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl index 666aa10..0ffe8aa 100644 --- a/src/experimental/sparsematrixdict.jl +++ b/src/experimental/sparsematrixdict.jl @@ -3,7 +3,7 @@ Sparse matrix where entries are organized as dictionary. """ -mutable struct SparseMatrixDict{Tv,Ti} <: AbstractSparseMatrix{Tv,Ti} +mutable struct SparseMatrixDict{Tv,Ti} <: AbstractSparseMatrixExtension{Tv,Ti} m::Ti n::Ti values::Dict{Pair{Ti,Ti}, Tv} @@ -29,6 +29,8 @@ end Base.size(m::SparseMatrixDict)=(m.m,m.n) +SparseArrays.nnz(m::SparseMatrixDict)=length(m.values) + function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} l=length(m.values) I=Vector{Ti}(undef,l) diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl index 07f25dc..c096c69 100644 --- a/src/experimental/sparsematrixlnkdict.jl +++ b/src/experimental/sparsematrixlnkdict.jl @@ -4,7 +4,7 @@ Modification of SparseMatrixLNK where the pointer to first index of column j is stored in a dictionary. """ -mutable struct SparseMatrixLNKDict{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti} +mutable struct SparseMatrixLNKDict{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti} """ Number of rows """ @@ -240,15 +240,6 @@ Return number of nonzero entries. """ SparseArrays.nnz(lnk::SparseMatrixLNKDict) = lnk.nnz -""" -$(SIGNATURES) - -Dummy flush! method for SparseMatrixLNKDict. Just -used in test methods -""" -function flush!(lnk::SparseMatrixLNKDict{Tv, Ti}) where {Tv, Ti} - return lnk -end """ $(SIGNATURES) @@ -262,13 +253,15 @@ function add_via_COO(lnk::SparseMatrixLNKDict{Tv, Ti}, J=Vector{Ti}(undef,l) V=Vector{Tv}(undef,l) i=1 - for icsc=1:length(colptr)-1 - for j=colptr[icsc]:colptr[icsc+1]-1 - I[i]=icsc - J[i]=rowval[j] - V[i]=nzval[j] - i=i+1 - end + if nnz(csc)>0 + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end end for (j,k) in lnk.colstart while k>0 diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl new file mode 100644 index 0000000..6646e12 --- /dev/null +++ b/src/experimental/sparsematrixlnkx.jl @@ -0,0 +1,441 @@ +""" + $(TYPEDEF) + +Modification of SparseMatrixLNK where the pointer to first index of +column j is stored in a dictionary. +""" +mutable struct SparseMatrixLNKX{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti} + """ + Number of rows + """ + m::Ti + + """ + Number of columns + """ + n::Ti + + """ + Number of nonzeros + """ + nnz::Ti + + """ + Length of arrays + """ + nentries::Ti + + """ + Linked list of column entries. Initial length is n, + it grows with each new entry. + + colptr[index] contains the next + index in the list or zero, in the later case terminating the list which + starts at index 1<=j<=n for each column j. + """ + colptr::Vector{Ti} + + """ + Start indices of columns + """ + colstart::Vector{Ti} + + """ + Row numbers. For each index it contains the zero (initial state) + or the row numbers corresponding to the column entry list in colptr. + """ + rowval::Vector{Ti} + + """ + Nonzero entry values correspondin to each pair + (colptr[index],rowval[index]) + """ + nzval::Vector{Tv} +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +function SparseMatrixLNKX{Tv, Ti}(m, n) where {Tv, Ti <: Integer} + SparseMatrixLNKX{Tv, Ti}(m, n, 0, 0, zeros(Ti,10), zeros(Ti,n), zeros(Ti,10), zeros(Ti,10)) +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +function SparseMatrixLNKX(valuetype::Type{Tv}, indextype::Type{Ti}, m, + n) where {Tv, Ti <: Integer} + SparseMatrixLNKX{Tv, Ti}(m, n) +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +SparseMatrixLNKX(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixLNKX(Tv, Int, m, n) + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +SparseMatrixLNKX(m, n) = SparseMatrixLNKX(Float64, m, n) + + +function findindex(lnk::SparseMatrixLNKX, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + k =lnk.colstart[j] + if k==0 + return 0,0 + end + k0 = k + while k > 0 + if lnk.rowval[k] == i + return k, 0 + end + k0 = k + k = lnk.colptr[k] + end + return 0, k0 +end + +""" +$(SIGNATURES) + +Return value stored for entry or zero if not found +""" +function Base.getindex(lnk::SparseMatrixLNKX{Tv, Ti}, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k == 0 + return zero(Tv) + else + return lnk.nzval[k] + end +end + +function addentry!(lnk::SparseMatrixLNKX, i, j, k, k0) + # increase number of entries + lnk.nentries += 1 + if length(lnk.nzval) < lnk.nentries + newsize = Int(ceil(5.0 * lnk.nentries / 4.0)) + resize!(lnk.nzval, newsize) + resize!(lnk.rowval, newsize) + resize!(lnk.colptr, newsize) + end + + if k0==0 + lnk.colstart[j]=lnk.nentries + end + + # Append entry if not found + lnk.rowval[lnk.nentries] = i + + # Shift the end of the list + lnk.colptr[lnk.nentries] = 0 + + if k0>0 + lnk.colptr[k0] = lnk.nentries + end + + # Update number of nonzero entries + lnk.nnz += 1 + return lnk.nentries +end + +""" +$(SIGNATURES) + +Update value of existing entry, otherwise extend matrix if v is nonzero. +""" +function Base.setindex!(lnk::SparseMatrixLNKX, v, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = v + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = v + end + return lnk +end + +""" +$(SIGNATURES) + +Update element of the matrix with operation `op`. +It assumes that `op(0,0)==0`. If `v` is zero, no new +entry is created. +""" +function updateindex!(lnk::SparseMatrixLNKX{Tv, Ti}, op, v, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +""" +$(SIGNATURES) + +Update element of the matrix with operation `op`. +It assumes that `op(0,0)==0`. If `v` is zero a new entry +is created nevertheless. +""" +function rawupdateindex!(lnk::SparseMatrixLNKX{Tv, Ti}, op, v, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + else + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +""" +$(SIGNATURES) + +Return tuple containing size of the matrix. +""" +Base.size(lnk::SparseMatrixLNKX) = (lnk.m, lnk.n) + +""" +$(SIGNATURES) + +Return number of nonzero entries. +""" +SparseArrays.nnz(lnk::SparseMatrixLNKX) = lnk.nnz + +""" +$(SIGNATURES) + +Dummy flush! method for SparseMatrixLNKX. Just +used in test methods +""" +function flush!(lnk::SparseMatrixLNKX{Tv, Ti}) where {Tv, Ti} + return lnk +end + +""" + $(SIGNATURES) +Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V. +""" +function add_via_COO(lnk::SparseMatrixLNKX{Tv, Ti}, + csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} + (;colptr,nzval,rowval,m,n)=csc + l=nnz(lnk)+nnz(csc) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + if nnz(csc)>0 + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + end + for j=1:n + k=lnk.colstart[j] + while k>0 + I[i]=lnk.rowval[k] + J[i]=j + V[i]=lnk.nzval[k] + k=lnk.colptr[k] + i=i+1 + end + end + return SparseArrays.sparse!(I,J,V,m,n,+) +end + + +""" + $(SIGNATURES) +Add lnk and csc without creation of intermediate data. +""" +function add_directly(lnk::SparseMatrixLNKX{Tv, Ti}, + csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} + @assert(csc.m==lnk.m) + @assert(csc.n==lnk.n) + + # overallocate arrays in order to avoid + # presumably slower push! + xnnz = nnz(csc) + nnz(lnk) + colptr = Vector{Ti}(undef, csc.n + 1) + rowval = Vector{Ti}(undef, xnnz) + nzval = Vector{Tv}(undef, xnnz) + + # Detect the maximum column length of lnk + lnk_maxcol = 0 + for j=1:lnk.n + k=lnk.colstart[j] + lcol = zero(Ti) + while k > 0 + lcol += 1 + k = lnk.colptr[k] + end + lnk_maxcol = max(lcol, lnk_maxcol) + end + + # pre-allocate column data + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol] + + inz = 1 # counts the nonzero entries in the new matrix + + in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1]) + + in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col) + + # loop over all columns + for j = 1:(csc.n) + # Copy extension entries into col and sort them + k = lnk.colstart[j] + l_lnk_col = 0 + while k > 0 + if lnk.rowval[k] > 0 + l_lnk_col += 1 + col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k]) + end + k = lnk.colptr[k] + end + sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward) + + # jointly sort lnk and csc entries into new matrix data + # this could be replaced in a more transparent manner by joint sorting: + # make a joint array for csc and lnk col, sort them. + # Will this be faster? + + colptr[j] = inz + jlnk = one(Ti) # counts the entries in col + jcsc = csc.colptr[j] # counts entries in csc + + while true + if in_csc_col(jcsc, j) && + (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval || + !in_lnk_col(jlnk, l_lnk_col)) + # Insert entries from csc into new structure + rowval[inz] = csc.rowval[jcsc] + nzval[inz] = csc.nzval[jcsc] + jcsc += 1 + inz += 1 + elseif in_csc_col(jcsc, j) && + (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval) + # Add up entries from csc and lnk + rowval[inz] = csc.rowval[jcsc] + nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval + jcsc += 1 + inz += 1 + jlnk += 1 + elseif in_lnk_col(jlnk, l_lnk_col) + # Insert entries from lnk res. col into new structure + rowval[inz] = col[jlnk].rowval + nzval[inz] = col[jlnk].nzval + jlnk += 1 + inz += 1 + else + break + end + end + end + colptr[csc.n + 1] = inz + resize!(rowval, inz - 1) + resize!(nzval, inz - 1) + SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval) +end + + + +""" + $(SIGNATURES) + +Add SparseMatrixCSC matrix and [`SparseMatrixLNKX`](@ref) lnk, returning a SparseMatrixCSC +""" +Base.:+(lnk::SparseMatrixLNKX, csc::SparseMatrixCSC) = add_directly(lnk, csc) + +function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} + lnew=sum(nnz,lnkdictmatrices) + if lnew>0 + (;colptr,nzval,rowval,m,n)=cscmatrix + l=lnew+nnz(cscmatrix) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + + ip=1 + for lnk in lnkdictmatrices + for j=1:n + k=lnk.colstart[j] + nodeparts[j]=ip + while k>0 + I[i]=lnk.rowval[k] + J[i]=j + V[i]=lnk.nzval[k] + k=lnk.colptr[k] + i=i+1 + end + end + ip=ip+1 + end + return SparseArrays.sparse!(I,J,V,m,n,+) + end + return cscmatrix +end + + + +""" +$(SIGNATURES) + +Constructor from SparseMatrixLNKX. + +""" +function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixLNKX)::SparseMatrixCSC + csc = spzeros(lnk.m, lnk.n) + lnk + csc +end + +function SparseArrays.sparse(lnk::SparseMatrixLNKX) + lnk + spzeros(lnk.m, lnk.n) +end + +function Base.copy(S::SparseMatrixLNKX) + SparseMatrixLNKX(size(S, 1), + size(S, 2), + S.nnz, + S.nentries, + copy(S.colptr), + copy(S.colstart), + copy(S.rowvals), + copy(S.nzval)) +end diff --git a/src/matrix/abstractextension.jl b/src/matrix/abstractextension.jl new file mode 100644 index 0000000..378e54a --- /dev/null +++ b/src/matrix/abstractextension.jl @@ -0,0 +1,28 @@ +""" + $(TYPEDEF) + +Abstract type for sparse matrix extension. + +Subtypes T_ext must implement: + + +Constructor T_ext(m,n) +SparseArrays.nnz(ext::T_ext) +Base.size(ext::T_ext) + +Base.+(ext::T_ext, csc) + - Add extension matrix and csc matrix, return csc matrix + +sum!(nodeparts::Vector{Ti},extmatrices::Vector{T_ext}, cscmatrix) + - Add csc matrix and extension matrices (one per partition) and return csc matrix + - Fill nodeparts (already initialized at input) with information which partition was used to assemble node. + i.e. if entry [i,j] comes from extmatrixes[p], set nodeparts[j]=p . + + This information may be used by matrix-vector multiplication and preconditioners + +rawupdateindex!(ext::Text, op, v, i, j) where {Tv, Ti} + - Set ext[i,j]+=v, possibly insert entry into matrix. + + +""" +abstract type AbstractSparseMatrixExtension{Tv, Ti} <: AbstractSparseMatrix{Tv,Ti} end diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl index 572227e..2d8a908 100644 --- a/src/matrix/extendable.jl +++ b/src/matrix/extendable.jl @@ -17,8 +17,6 @@ mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractExtendableSp Linked list structure holding data of extension """ lnkmatrix::Union{SparseMatrixLNK{Tv, Ti}, Nothing} - - lock::Base.ReentrantLock """ Pattern hash @@ -26,32 +24,6 @@ mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractExtendableSp phash::UInt64 end -mutable struct Locking - locking::Bool -end - -# -# Locking functionality just for developing parallelization. -# To be removed before merging into main branch. -# -const locking=Locking(false) - -function with_locking!(l::Bool) - global locking - locking.locking=l -end - -function with_locking() - global locking - locking.locking -end - -mylock(x)=with_locking() ? Base.lock(x) : nothing -myunlock(x)=with_locking() ? Base.unlock(x) : nothing - - -#mylock(x)=nothing -#myunlock(x)=nothing """ ``` @@ -65,7 +37,7 @@ Create empty ExtendableSparseMatrix. This is equivalent to `spzeros(m,n)` for """ function ExtendableSparseMatrix{Tv, Ti}(m, n) where {Tv, Ti <: Integer} - ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing,Base.ReentrantLock(), 0) + ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0) end function ExtendableSparseMatrix(valuetype::Type{Tv}, @@ -87,11 +59,11 @@ $(SIGNATURES) Create ExtendableSparseMatrix from SparseMatrixCSC """ function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} - ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc)) + ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc)) end function ExtendableSparseMatrix{Tv,Ti}(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} - ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc)) + ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc)) end """ @@ -193,15 +165,10 @@ function updateindex!(ext::ExtendableSparseMatrix{Tv, Ti}, if k > 0 ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) else - mylock(ext.lock) - try - if ext.lnkmatrix == nothing - ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) - end - updateindex!(ext.lnkmatrix, op, v, i, j) - finally - myunlock(ext.lock) + if ext.lnkmatrix == nothing + ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) end + updateindex!(ext.lnkmatrix, op, v, i, j) end ext end @@ -220,15 +187,10 @@ function rawupdateindex!(ext::ExtendableSparseMatrix{Tv, Ti}, if k > 0 ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) else - mylock(ext.lock) - try if ext.lnkmatrix == nothing ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) end rawupdateindex!(ext.lnkmatrix, op, v, i, j) - finally - myunlock(ext.lock) - end end ext end @@ -247,15 +209,10 @@ function Base.setindex!(ext::ExtendableSparseMatrix{Tv, Ti}, if k > 0 ext.cscmatrix.nzval[k] = v else - mylock(ext.lock) - try - if ext.lnkmatrix == nothing - ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) - end - ext.lnkmatrix[i, j] = v - finally - myunlock(ext.lock) + if ext.lnkmatrix == nothing + ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) end + ext.lnkmatrix[i, j] = v end end @@ -275,12 +232,7 @@ function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti}, return zero(Tv) else v=zero(Tv) - mylock(ext.lock) - try - v=ext.lnkmatrix[i, j] - finally - myunlock(ext.lock) - end + v=ext.lnkmatrix[i, j] end end @@ -325,9 +277,9 @@ $(SIGNATURES) """ function Base.copy(S::ExtendableSparseMatrix) if isnothing(S.lnkmatrix) - ExtendableSparseMatrix(copy(S.cscmatrix), nothing, Base.ReentrantLock(),S.phash) + ExtendableSparseMatrix(copy(S.cscmatrix), nothing,S.phash) else - ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), Base.ReentrantLock(), S.phash) + ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash) end end diff --git a/src/matrix/sparsematrixlnk.jl b/src/matrix/sparsematrixlnk.jl index b00c6fc..b69c863 100644 --- a/src/matrix/sparsematrixlnk.jl +++ b/src/matrix/sparsematrixlnk.jl @@ -18,7 +18,7 @@ can be conveniently updated via `push!`. No copying of existing data is necessa $(TYPEDFIELDS) """ -mutable struct SparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti} +mutable struct SparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti} """ Number of rows """ diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallel.jl index 4800281..936f566 100644 --- a/test/ExperimentalParallel.jl +++ b/test/ExperimentalParallel.jl @@ -50,6 +50,25 @@ function test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10) end +function test_correctness_build(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10]) + m = n + lindexes = LinearIndices((1:n,1:m)) + X = collect(1:n) #LinRange(0,1,n) + Y = collect(1:n) #LinRange(0,1,m) + + mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti) + + A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m) + assemble_ESMP(A0, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false) + + for nt in allnp + A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false) + assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false) + @assert A.cscmatrix≈A0.cscmatrix + end +end + + function speedup_build(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10]) m = n lindexes = LinearIndices((1:n,1:m)) @@ -158,7 +177,6 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell for cell in A.cellsforpart[A.depth*A.nt+1] assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, 1) end - nnzCSC, nnzLNK = nnz_noflush(A) if nnzCSC > 0 && nnzLNK > 0 flush!(A; do_dense=false) @@ -166,7 +184,7 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell elseif nnzCSC == 0 && nnzLNK > 0 flush!(A; do_dense=true) #dense flush - end + end end diff --git a/test/ExperimentalParallelLocking.jl b/test/ExperimentalParallelLocking.jl deleted file mode 100644 index 4ad7ff2..0000000 --- a/test/ExperimentalParallelLocking.jl +++ /dev/null @@ -1,128 +0,0 @@ -module ExperimentalParallelLocking - -using ExtendableSparse,SparseArrays -using ExtendableSparse: with_locking! -using ExtendableSparse.Experimental -using BenchmarkTools -using Test - -""" - test_correctness_update(N) - -Test correctness of parallel assembly on NxN grid during -update phase, assuming that the structure has been assembled. -""" -function test_correctness_update(N) - with_locking!(true) - X=1:N - Y=1:N - A=ExtendableSparseMatrix(N^2,N^2) - allnp=[4,5,6,7,8] - - # Assembele without partitioning - # this gives the "base truth" to compare with - partassemble!(A,X,Y) - - # Save the nonzeros - nz=copy(nonzeros(A)) - for np in allnp - # Reset the nonzeros, keeping the structure intact - nonzeros(A).=0 - # Parallel assembly whith np threads - partassemble!(A,X,Y, np) - @test nonzeros(A)≈nz - end - with_locking!(false) -end - -""" - test_correctness_build(N) - -Test correctness of parallel assembly on NxN grid during -build phase, assuming that no structure has been assembled. -""" -function test_correctness_build(N) - with_locking!(true) - X=1:N - Y=1:N - allnp=[4,5,6,7,8] - # Get the "ground truth" - A=ExtendableSparseMatrix(N^2,N^2) - partassemble!(A,X,Y) - nz=copy(nonzeros(A)) - for np in allnp - # Make a new matrix and assemble parallel. - # this should result in the same nonzeros - A=ExtendableSparseMatrix(N^2,N^2) - partassemble!(A,X,Y, np) - @test nonzeros(A)≈nz - end - with_locking!(false) -end - - - -""" - speedup_update(N) - -Benchmark parallel speedup of update phase of parallel assembly on NxN grid. -Check for correctness as well. -""" -function speedup_update(N; allnp=[4,5,6,7,8,9,10]) - with_locking!(true) - X=1:N - Y=1:N - A=ExtendableSparseMatrix(N^2,N^2) - partassemble!(A,X,Y) - nz=copy(nonzeros(A)) - # Get the base timing - # During setup, set matrix entries to zero while keeping the structure - t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(nonzeros($A).=0) - result=[] - for np in allnp - # Get the parallel timing - # During setup, set matrix entries to zero while keeping the structure - t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(nonzeros($A).=0) - @assert nonzeros(A)≈nz - push!(result,(np,round(t0/t,digits=2))) - end - with_locking!(false) - result -end - - -""" - speedup_build(N) - -Benchmark parallel speedup of structure build phase of parallel assembly on NxN grid. -Check for correctness as well. - -Works in the moment with locking. -""" -function speedup_build(N; allnp=[4,5,6,7,8,9,10]) - with_locking!(true) - X=1:N - Y=1:N - A=ExtendableSparseMatrix(N^2,N^2) - partassemble!(A,X,Y) - nz=copy(nonzeros(A)) - reset!(A) - partassemble!(A,X,Y) - @assert nonzeros(A)≈(nz) - - # Get the base timing - # During setup, reset matrix to empty state. - t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A)) - - result=[] - for np in allnp - # Get the parallel timing - # During setup, reset matrix to empty state. - t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(reset!($A)) - @assert nonzeros(A)≈nz - push!(result,(np,round(t0/t,digits=2))) - end - with_locking!(false) - result -end -end diff --git a/test/ExperimentalScalar.jl b/test/ExperimentalScalar.jl index 58de771..11040cf 100644 --- a/test/ExperimentalScalar.jl +++ b/test/ExperimentalScalar.jl @@ -20,9 +20,9 @@ function speed_build(N,Tm::Type{<:AbstractSparseMatrix}) A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2) A=Tm{Float64,Int}(N^2,N^2) - tlnk= @belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0)) - tdict= @belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A)) - tlnk/tdict + tbase= @belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0)) + tx= @belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A)) + tbase/tx end end diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl index c6b0122..e45767a 100644 --- a/test/ExperimentalXParallel.jl +++ b/test/ExperimentalXParallel.jl @@ -32,10 +32,9 @@ end Test correctness of parallel assembly on NxN grid during build phase, assuming that no structure has been assembled. """ -function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}) +function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}, allnp=[4,5,6,7,8]) X=1:N Y=1:N - allnp=[4,5,6,7,8] # Get the "ground truth" A=ExtendableSparseMatrix(N^2,N^2) partassemble!(A,X,Y) diff --git a/test/Project.toml b/test/Project.toml index e195dd2..8aa666d 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -9,6 +9,7 @@ IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" MultiFloats = "bdf0d083-296b-4888-a5b6-7498122e68a5" +OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" diff --git a/test/runtests.jl b/test/runtests.jl index 4be9f0f..244dbb5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,21 +9,6 @@ using BenchmarkTools using MultiFloats using ForwardDiff - -@testset "ExperimentalParallelLocking" begin - include("ExperimentalParallelLocking.jl") - @testset "update correctness" begin - for N in [100,rand(30:200),500] - ExperimentalParallelLocking.test_correctness_update(N) - end - end - - @testset "build correctness" begin - for N in [100,rand(30:200),500] - ExperimentalParallelLocking.test_correctness_build(N) - end - end -end @testset "ExperimentalScalar" begin include("ExperimentalScalar.jl") for Tm in [ExtendableSparseMatrixLNK,ExtendableSparseMatrixDict,ExtendableSparseMatrixLNKDict] @@ -44,6 +29,16 @@ end end end +@testset "ExperimentalParallel" begin + include("ExperimentalParallel.jl") + for d=[1,2,3] + for N in [100,rand(30:200),500] + ExperimentalParallel.test_correctness_build(N,d) + end + end +end + + @testset "Constructors" begin include("test_constructors.jl") end @testset "Copy-Methods" begin include("test_copymethods.jl") end From 4fb318e29420030f0d314a35082a55413a561c56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Mon, 27 May 2024 15:58:11 +0200 Subject: [PATCH 27/44] ci: new ci matrix, multithreading --- .github/workflows/ci.yml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ff5995..882d933 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,15 +15,26 @@ jobs: fail-fast: false matrix: version: - - '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'. + - '1.9' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'. - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia. - 'nightly' os: - ubuntu-latest - - macos-latest - windows-latest + - macos-latest # arm + - macOS-13 # intel arch: - x64 + - aarch64 + exclude: + - os: ubuntu-latest + arch: aarch64 + - os: windows-latest + arch: aarch64 + - os: macOS-13 + arch: aarch64 + - os: macos-latest + arch: x64 steps: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v1 @@ -42,6 +53,8 @@ jobs: ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 + env: + JULIA_NUM_THREADS: 4 - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v3 docs: From 99f98517c2887f08046cbeafaa9a094a84204c61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Mon, 27 May 2024 16:26:09 +0200 Subject: [PATCH 28/44] fix sparse!, replace Threads.@threads by tasks --- src/ExtendableSparse.jl | 2 +- src/experimental/sparsematrixdict.jl | 12 ++++++++++-- src/experimental/sparsematrixlnkdict.jl | 6 +++++- src/experimental/sparsematrixlnkx.jl | 12 ++++++++++-- src/factorizations/blockpreconditioner.jl | 10 +++++----- 5 files changed, 31 insertions(+), 11 deletions(-) diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 39a8d19..0d7bd5c 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -3,7 +3,7 @@ using SparseArrays,StaticArrays using LinearAlgebra using Sparspak using ILUZero - +using OhMyThreads: @tasks if !isdefined(Base, :get_extension) using Requires diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl index 0ffe8aa..9c8cea8 100644 --- a/src/experimental/sparsematrixdict.jl +++ b/src/experimental/sparsematrixdict.jl @@ -43,7 +43,11 @@ function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} V[i]=v i=i+1 end - SparseArrays.sparse!(I,J,V,size(mv[1])...,+) + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse!(I,J,V,m,n,+) + end end function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} @@ -70,7 +74,11 @@ function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC V[i]=v i=i+1 end - return SparseArrays.sparse!(I,J,V,m,n,+) + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse!(I,J,V,m,n,+) + end end cscmatrix end diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl index c096c69..f1f6ab5 100644 --- a/src/experimental/sparsematrixlnkdict.jl +++ b/src/experimental/sparsematrixlnkdict.jl @@ -272,7 +272,11 @@ function add_via_COO(lnk::SparseMatrixLNKDict{Tv, Ti}, i=i+1 end end - return SparseArrays.sparse!(I,J,V,m,n,+) + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse!(I,J,V,m,n,+) + end end diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl index 6646e12..01fbbd0 100644 --- a/src/experimental/sparsematrixlnkx.jl +++ b/src/experimental/sparsematrixlnkx.jl @@ -266,7 +266,11 @@ function add_via_COO(lnk::SparseMatrixLNKX{Tv, Ti}, i=i+1 end end - return SparseArrays.sparse!(I,J,V,m,n,+) + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse!(I,J,V,m,n,+) + end end @@ -407,7 +411,11 @@ function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscma end ip=ip+1 end - return SparseArrays.sparse!(I,J,V,m,n,+) + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse!(I,J,V,m,n,+) + end end return cscmatrix end diff --git a/src/factorizations/blockpreconditioner.jl b/src/factorizations/blockpreconditioner.jl index 7c97eca..bf0d5de 100644 --- a/src/factorizations/blockpreconditioner.jl +++ b/src/factorizations/blockpreconditioner.jl @@ -49,7 +49,7 @@ function update!(precon::BlockPreconditioner) np=length(precon.partitioning) precon.facts=Vector{Any}(undef,np) - Threads.@threads for ipart=1:np + @tasks for ipart=1:np factorization=deepcopy(precon.factorization) AP=precon.A[precon.partitioning[ipart],precon.partitioning[ipart]] FP=factorization(AP) @@ -66,11 +66,11 @@ function LinearAlgebra.ldiv!(p::BlockPreconditioner,v) np=length(partitioning) if allow_views(p.factorization) - Threads.@threads for ipart=1:np + @tasks for ipart=1:np ldiv!(facts[ipart],view(v,partitioning[ipart])) end else - Threads.@threads for ipart=1:np + @tasks for ipart=1:np vv=v[partitioning[ipart]] ldiv!(facts[ipart],vv) view(v,partitioning[ipart]).=vv @@ -85,11 +85,11 @@ function LinearAlgebra.ldiv!(u,p::BlockPreconditioner,v) np=length(partitioning) if allow_views(p.factorization) - Threads.@threads for ipart=1:np + @tasks for ipart=1:np ldiv!(view(u,partitioning[ipart]),facts[ipart],view(v,partitioning[ipart])) end else - Threads.@threads for ipart=1:np + @tasks for ipart=1:np uu=u[partitioning[ipart]] ldiv!(uu,facts[ipart],v[partitioning[ipart]]) view(u,partitioning[ipart]).=uu From a78d8ecbc5fa3ef9f0d1eb7c873693884c619c6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Mon, 27 May 2024 16:35:34 +0200 Subject: [PATCH 29/44] fix sparse! calls --- src/experimental/sparsematrixdict.jl | 10 +++++++--- src/experimental/sparsematrixlnkdict.jl | 8 ++++++-- src/experimental/sparsematrixlnkx.jl | 4 ++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl index 9c8cea8..2e58238 100644 --- a/src/experimental/sparsematrixdict.jl +++ b/src/experimental/sparsematrixdict.jl @@ -46,7 +46,7 @@ function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} @static if VERSION>=v"1.10" return SparseArrays.sparse!(I,J,V,m,n,+) else - return SparseArrays.sparse!(I,J,V,m,n,+) + return SparseArrays.sparse(I,J,V,m,n,+) end end @@ -77,7 +77,7 @@ function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC @static if VERSION>=v"1.10" return SparseArrays.sparse!(I,J,V,m,n,+) else - return SparseArrays.sparse!(I,J,V,m,n,+) + return SparseArrays.sparse(I,J,V,m,n,+) end end cscmatrix @@ -113,7 +113,11 @@ function sum!(nodeparts, dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatri end ip=ip+1 end - return SparseArrays.sparse!(I,J,V,m,n,+) + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse(I,J,V,m,n,+) + end end return cscmatrix end diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl index f1f6ab5..a53df25 100644 --- a/src/experimental/sparsematrixlnkdict.jl +++ b/src/experimental/sparsematrixlnkdict.jl @@ -275,7 +275,7 @@ function add_via_COO(lnk::SparseMatrixLNKDict{Tv, Ti}, @static if VERSION>=v"1.10" return SparseArrays.sparse!(I,J,V,m,n,+) else - return SparseArrays.sparse!(I,J,V,m,n,+) + return SparseArrays.sparse(I,J,V,m,n,+) end end @@ -415,7 +415,11 @@ function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cs end ip=ip+1 end - return SparseArrays.sparse!(I,J,V,m,n,+) + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse(I,J,V,m,n,+) + end end return cscmatrix end diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl index 01fbbd0..f7a322a 100644 --- a/src/experimental/sparsematrixlnkx.jl +++ b/src/experimental/sparsematrixlnkx.jl @@ -269,7 +269,7 @@ function add_via_COO(lnk::SparseMatrixLNKX{Tv, Ti}, @static if VERSION>=v"1.10" return SparseArrays.sparse!(I,J,V,m,n,+) else - return SparseArrays.sparse!(I,J,V,m,n,+) + return SparseArrays.sparse(I,J,V,m,n,+) end end @@ -414,7 +414,7 @@ function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscma @static if VERSION>=v"1.10" return SparseArrays.sparse!(I,J,V,m,n,+) else - return SparseArrays.sparse!(I,J,V,m,n,+) + return SparseArrays.sparse(I,J,V,m,n,+) end end return cscmatrix From f183d93d4ce78b15d902d378ea187c4f17ef6fe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Mon, 27 May 2024 18:26:59 +0200 Subject: [PATCH 30/44] fix Base.: dispatch for 1.9 --- Project.toml | 1 + src/experimental/extendablesparsematrixparallel.jl | 8 +++++++- test/ExperimentalXParallel.jl | 3 ++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 1d10c82..e1bcecd 100644 --- a/Project.toml +++ b/Project.toml @@ -18,6 +18,7 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +TestEnv = "1e6cf692-eddd-4d53-88a5-2d735e33781b" [weakdeps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl index 2f79985..c8125bf 100644 --- a/src/experimental/extendablesparsematrixparallel.jl +++ b/src/experimental/extendablesparsematrixparallel.jl @@ -62,7 +62,6 @@ function flush!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}) where{Tm,Tv,Ti} np=length(ext.xmatrices) (m,n)=size(ext.cscmatrix) ext.xmatrices=[Tm(m,n) for i=1:np] - npts::Vector{Ti}=ext.nodeparts pn=zeros(Ti,np) for i=1:n @@ -132,7 +131,14 @@ function rawupdateindex!(ext::ExtendableSparseMatrixXParallel, end end + +# Needed in 1.9 +function Base.:*(ext::ExtendableSparse.Experimental.ExtendableSparseMatrixXParallel{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA + mul!(similar(x),ext,x) +end + function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x) + flush!(ext) A=ext.cscmatrix colparts=ext.colparts partnodes=ext.partnodes diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl index e45767a..d6bb113 100644 --- a/test/ExperimentalXParallel.jl +++ b/test/ExperimentalXParallel.jl @@ -53,11 +53,12 @@ function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5 Y=1:N A0=ExtendableSparseMatrix(N^2,N^2) partassemble!(A0,X,Y) - for np in allnp A=Tm(N^2,N^2,1) partassemble!(A,X,Y,np) b=rand(N^2) + flush!(A) + A*b @test A*b ≈ A0*b end end From c3b78c9486143e9b0c185cd4b7138901f05f3f96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Tue, 28 May 2024 11:57:07 +0200 Subject: [PATCH 31/44] working example with VoronoiFVM --- src/experimental/Experimental.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl index 162ed2f..4346122 100644 --- a/src/experimental/Experimental.jl +++ b/src/experimental/Experimental.jl @@ -10,6 +10,7 @@ using DocStringExtensions using Metis using Base.Threads using OhMyThreads: @tasks +import ExtendableSparse: factorize!, update! include(joinpath(@__DIR__, "..", "matrix", "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl")) From dea60a4cf374dad794144bf3e12057eb298a6086 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Mon, 17 Jun 2024 14:52:38 +0200 Subject: [PATCH 32/44] Tests with partitioned grids. Need for grid induced node partitioning --- .../extendablesparsematrixparallel.jl | 11 +- src/experimental/parallel_testtools.jl | 3 + src/experimental/sparsematrixdict.jl | 10 +- test/ExperimentalScalar.jl | 30 +-- test/ExperimentalXParallel.jl | 188 ++++++++++++------ test/Project.toml | 1 + 6 files changed, 156 insertions(+), 87 deletions(-) diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl index c8125bf..97b4423 100644 --- a/src/experimental/extendablesparsematrixparallel.jl +++ b/src/experimental/extendablesparsematrixparallel.jl @@ -150,13 +150,10 @@ function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x) for icol=1:length(colparts) part=colparts[icol] @tasks for ip=1:length(part) - @inbounds begin - for j in partnodes[part[ip]] - for i in nzrange(A,j) - row = rows[i] - val = vals[i] - r[row]+=val*x[j] - end + pnodes=partnodes[part[ip]] + for j in pnodes + @inbounds for i in nzrange(A,j) + r[rows[i]]+=vals[i]*x[j] end end end diff --git a/src/experimental/parallel_testtools.jl b/src/experimental/parallel_testtools.jl index 16a67c7..83f75cd 100644 --- a/src/experimental/parallel_testtools.jl +++ b/src/experimental/parallel_testtools.jl @@ -195,6 +195,7 @@ end function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict,ExtendableSparseMatrixParallelLNKX},X,Y,nt=1;d=0.1, reset=true) Nx=length(X) Ny=length(Y) + size(A,1)==Nx*Ny || error("incompatible size of A") size(A,2)==Nx*Ny || error("incompatible size of A") @@ -219,3 +220,5 @@ function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSpa end flush!(A) end + + diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl index 2e58238..2bd6295 100644 --- a/src/experimental/sparsematrixdict.jl +++ b/src/experimental/sparsematrixdict.jl @@ -31,22 +31,22 @@ Base.size(m::SparseMatrixDict)=(m.m,m.n) SparseArrays.nnz(m::SparseMatrixDict)=length(m.values) -function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} - l=length(m.values) +function SparseArrays.sparse(mat::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} + l=length(mat.values) I=Vector{Ti}(undef,l) J=Vector{Ti}(undef,l) V=Vector{Tv}(undef,l) i=1 - for (p,v) in m.values + for (p,v) in mat.values I[i]=first(p) J[i]=last(p) V[i]=v i=i+1 end @static if VERSION>=v"1.10" - return SparseArrays.sparse!(I,J,V,m,n,+) + return SparseArrays.sparse!(I,J,V,size(mat)...,+) else - return SparseArrays.sparse(I,J,V,m,n,+) + return SparseArrays.sparse(I,J,V,size(mat)...,+) end end diff --git a/test/ExperimentalScalar.jl b/test/ExperimentalScalar.jl index 11040cf..1f1ee84 100644 --- a/test/ExperimentalScalar.jl +++ b/test/ExperimentalScalar.jl @@ -3,25 +3,25 @@ using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental using BenchmarkTools using Test +include("test_parallel.jl") -function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}) - X=1:N - Y=1:N - A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2) - A=Tm{Float64,Int}(N^2,N^2) - partassemble!(A0,X,Y) - partassemble!(A,X,Y) +function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) + A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) + A=Tm{Float64,Int}(nnodes,nnodes) + testassemble!(A0,grid) + testassemble!(A,grid) @test sparse(A0)≈sparse(A) end -function speed_build(N,Tm::Type{<:AbstractSparseMatrix}) - X=1:N - Y=1:N - A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2) - A=Tm{Float64,Int}(N^2,N^2) - - tbase= @belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0)) - tx= @belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A)) +function speed_build(N,Tm::Type{<:AbstractSparseMatrix}; dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) + A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) + A=Tm{Float64,Int}(nnodes,nnodes) + tbase= @belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0)) + tx= @belapsed testassemble!($A,$grid) seconds=1 setup=(reset!($A)) tbase/tx end diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl index d6bb113..4daf8e9 100644 --- a/test/ExperimentalXParallel.jl +++ b/test/ExperimentalXParallel.jl @@ -2,18 +2,23 @@ module ExperimentalXParallel using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental using BenchmarkTools +using ExtendableGrids +#using MKLSparse +using SparseMatricesCSR using Test +using OhMyThreads +include("test_parallel.jl") -function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix}) - X=1:N - Y=1:N - A=Tm{Float64,Int}(N^2,N^2,1) - allnp=[4,5,6,7,8] +function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix}; dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) + A=Tm{Float64,Int}(nnodes,nnodes,1) + allnp=[10,15,20] # Assembele without partitioning # this gives the "base truth" to compare with - partassemble!(A,X,Y) + testassemble_parallel!(A,grid) # Save the nonzeros nz=copy(nonzeros(A)) @@ -21,7 +26,9 @@ function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix}) # Reset the nonzeros, keeping the structure intact nonzeros(A).=0 # Parallel assembly whith np threads - partassemble!(A,X,Y, np) + pgrid=partition(grid,PlainMetisPartitioning(npart=np)) + @show num_partitions_per_color(pgrid) + testassemble_parallel!(A,pgrid) @test nonzeros(A)≈nz end end @@ -32,109 +39,170 @@ end Test correctness of parallel assembly on NxN grid during build phase, assuming that no structure has been assembled. """ -function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}, allnp=[4,5,6,7,8]) - X=1:N - Y=1:N +function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) # Get the "ground truth" - A=ExtendableSparseMatrix(N^2,N^2) - partassemble!(A,X,Y) - nz=copy(nonzeros(A)) + A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) + testassemble!(A0,grid) + nz=copy(nonzeros(A0)) for np in allnp # Make a new matrix and assemble parallel. # this should result in the same nonzeros - A=Tm(N^2,N^2,1) - partassemble!(A,X,Y, np) - @test nonzeros(A)≈nz + pgrid=partition(grid,PlainMetisPartitioning(npart=np)) + A=Tm(nnodes,nnodes, num_partitions(pgrid)) + @show num_partitions_per_color(pgrid) + @test checkpartitioning(pgrid) + testassemble_parallel!(A,pgrid) + @test nonzeros(A) ≈ nz end end -function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8]) - X=1:N - Y=1:N - A0=ExtendableSparseMatrix(N^2,N^2) - partassemble!(A0,X,Y) +function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) + # Get the "ground truth" + A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) + testassemble!(A0,grid) for np in allnp - A=Tm(N^2,N^2,1) - partassemble!(A,X,Y,np) - b=rand(N^2) + pgrid=partition(grid,PlainMetisPartitioning(npart=np)) + A=Tm(nnodes,nnodes, num_partitions(pgrid)) + testassemble_parallel!(A,pgrid) flush!(A) - A*b + partcolors!(A,partition_pcolors(pgrid)) + b=rand(nnodes) @test A*b ≈ A0*b end end -function speedup_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10]) - X=1:N - Y=1:N - A=ExtendableSparseMatrix(N^2,N^2) - partassemble!(A,X,Y) - nz=copy(nonzeros(A)) +function speedup_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) + # Get the "ground truth" + A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) + testassemble!(A0,grid) + nz=copy(nonzeros(A0)) # Get the base timing # During setup, set matrix entries to zero while keeping the structure - t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(nonzeros($A).=0) + t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(nonzeros($A0).=0) result=[] - A=Tm(N^2,N^2,1) + A=Tm(nnodes,nnodes,1) for np in allnp # Get the parallel timing # During setup, set matrix entries to zero while keeping the structure - partassemble!(A,X,Y,np) - t=@belapsed partassemble!($A,$X,$Y,$np,reset=false) seconds=1 setup=(nonzeros($A).=0) + pgrid=partition(grid,PlainMetisPartitioning(npart=np)) + @show num_partitions_per_color(pgrid) + reset!(A,num_partitions(pgrid)) + testassemble_parallel!(A,pgrid) + t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(nonzeros($A).=0) @assert nonzeros(A)≈nz push!(result,(np,round(t0/t,digits=2))) end result end -function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10]) - X=1:N - Y=1:N - A0=ExtendableSparseMatrix(N^2,N^2) - A=Tm(N^2,N^2,1) - partassemble!(A0,X,Y) +function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) + # Get the "ground truth" + A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) + testassemble!(A0,grid) nz=copy(nonzeros(A0)) reset!(A0) - partassemble!(A0,X,Y) + testassemble!(A0,grid) @assert nonzeros(A0)≈(nz) - partassemble!(A,X,Y) - nz=copy(nonzeros(A)) - reset!(A) - partassemble!(A,X,Y) - @assert nonzeros(A)≈(nz) - # Get the base timing # During setup, reset matrix to empty state. - t0=@belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0)) + t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0)) result=[] + A=Tm(nnodes,nnodes,1) for np in allnp # Get the parallel timing # During setup, reset matrix to empty state. - t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(reset!($A)) + pgrid=partition(grid,PlainMetisPartitioning(npart=np)) + reset!(A,num_partitions(pgrid)) + @show num_partitions_per_color(pgrid) + t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(reset!($A,num_partitions($pgrid))) @assert nonzeros(A)≈nz push!(result,(np,round(t0/t,digits=2))) end result end -function speedup_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10]) - X=1:N - Y=1:N - - A0=ExtendableSparseMatrix(N^2,N^2) - partassemble!(A0,X,Y) - b=rand(N^2) +function speedup_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) + # Get the "ground truth" + A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) + testassemble!(A0,grid) + b=rand(nnodes) t0=@belapsed $A0*$b seconds=1 - + A0b=A0*b result=[] + A=Tm(nnodes,nnodes,1) for np in allnp - A=Tm(N^2,N^2,1) - partassemble!(A,X,Y,np) + pgrid=partition(grid,PlainMetisPartitioning(npart=np)) + @show num_partitions_per_color(pgrid) + reset!(A,num_partitions(pgrid)) + testassemble_parallel!(A,pgrid) + flush!(A) + partcolors!(A,partition_pcolors(pgrid)) + t=@belapsed $A*$b seconds=1 + @assert A0b≈A*b push!(result,(np,round(t0/t,digits=2))) end result end + +function mymul(A::SparseMatrixCSR,v::AbstractVector) + y=copy(v) + A.n == size(v, 1) || throw(DimensionMismatch()) + A.m == size(y, 1) || throw(DimensionMismatch()) + @tasks for row = 1:size(y, 1) + y[row]=0.0 + @inbounds for nz in nzrange(A,row) + col = A.colval[nz] + y[row] += A.nzval[nz]*v[col] + end + end + return y +end + +function speedup_csrmul(N; dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) + # Get the "ground truth" + A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) + t00=@belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0)) + + reset!(A0) + testassemble!(A0,grid) + b=rand(nnodes) + t0=@belapsed $A0*$b seconds=1 + A0b=A0*b + + + t0x=@belapsed A0x=sparse(transpose(sparse($A0))) + + A0x=sparse(transpose(sparse(A0))) + + tx=@belapsed A=SparseMatrixCSR{1}(transpose($A0x)) + + A=SparseMatrixCSR{1}(transpose(sparse(A0x))) + t1=@belapsed $A*$b seconds=1 + + t2=@belapsed mymul($A, $b) seconds=1 + + @info t00,t0,t0x, tx,t1, t2 + + @assert A0b≈A*b + t0/t1 +end + + end diff --git a/test/Project.toml b/test/Project.toml index 8aa666d..78cab80 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,6 +2,7 @@ AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f" IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895" From d16e33d175f9f756e15715f0561d962f0c1eee6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Mon, 17 Jun 2024 22:57:21 +0200 Subject: [PATCH 33/44] parallelization + tests based on grid node partitioning --- Project.toml | 4 +- src/experimental/Experimental.jl | 4 - .../extendablesparsematrixparallel.jl | 63 ++--- src/experimental/parallel_testtools.jl | 224 ------------------ src/experimental/sparsematrixdict.jl | 3 +- src/experimental/sparsematrixlnkdict.jl | 3 +- src/experimental/sparsematrixlnkx.jl | 2 +- test/ExperimentalParallel.jl | 220 +++++++++++++++++ test/ExperimentalScalar.jl | 28 --- test/ExperimentalXParallel.jl | 197 +++++++++++++-- test/Project.toml | 4 + test/runtests.jl | 14 +- 12 files changed, 425 insertions(+), 341 deletions(-) delete mode 100644 src/experimental/parallel_testtools.jl delete mode 100644 test/ExperimentalScalar.jl diff --git a/Project.toml b/Project.toml index e1bcecd..0372ecb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,11 +1,10 @@ name = "ExtendableSparse" uuid = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3" authors = ["Juergen Fuhrmann "] -version = "1.4.0" +version = "1.5.0" [deps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" -ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -18,7 +17,6 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -TestEnv = "1e6cf692-eddd-4d53-88a5-2d735e33781b" [weakdeps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl index 4346122..3c45c6d 100644 --- a/src/experimental/Experimental.jl +++ b/src/experimental/Experimental.jl @@ -79,9 +79,5 @@ const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=ExtendableSparseMatrixXParall ExtendableSparseMatrixParallelLNKDict(m,n,p)= ExtendableSparseMatrixParallelLNKDict{Float64,Int64}(m,n,p) export ExtendableSparseMatrixParallelLNKDict - -include("parallel_testtools.jl") -export part2d, showgrid, partassemble!, assemblepartition! - end diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl index 97b4423..5325e76 100644 --- a/src/experimental/extendablesparsematrixparallel.jl +++ b/src/experimental/extendablesparsematrixparallel.jl @@ -9,34 +9,28 @@ mutable struct ExtendableSparseMatrixXParallel{Tm<:AbstractSparseMatrixExtension """ xmatrices::Vector{Tm} - nodeparts::Vector{Ti} - partnodes::Vector{Vector{Ti}} - colparts::Vector{Vector{Ti}} + colparts::Vector{Ti} + partnodes::Vector{Ti} end function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m,p::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti} + ExtendableSparseMatrixXParallel(spzeros(Tv, Ti, m, n), [Tm(m,n) for i=1:p], - zeros(Ti,n), - Vector{Ti}[], - Vector{Ti}[] + Ti[1,2], + Ti[1,n+1], ) end -function partcolors!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}, partcolors) where {Tm, Tv, Ti} - ncol=maximum(partcolors) - colparts=[Ti[] for i=1:ncol] - for i=1:length(partcolors) - push!(colparts[partcolors[i]],i) - end +function partitioning!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}, colparts, partnodes) where {Tm, Tv, Ti} + ext.partnodes=partnodes ext.colparts=colparts ext end function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m, pc::Vector) where{Tm, Tv, Ti} ext=ExtendableSparseMatrixXParallel(m,n,length(pc)) - partcolors!(ext,pc) end @@ -44,7 +38,8 @@ function reset!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti},p::Integer) where m,n=size(ext.cscmatrix) ext.cscmatrix=spzeros(Tv, Ti, m, n) ext.xmatrices=[Tm(m,n) for i=1:p] - ext.nodeparts.=zero(Ti) + ext.colparts=Ti[1,2] + ext.partnodes=Ti[1,n+1] ext end @@ -52,34 +47,12 @@ function reset!(ext::ExtendableSparseMatrixXParallel) reset!(ext,length(ext.xmatrices)) end -function reset!(ext::ExtendableSparseMatrixXParallel,pc::Vector) - reset!(ext,length(pc)) - partcolors!(ext,pc) -end function flush!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}) where{Tm,Tv,Ti} - ext.cscmatrix=sum!(ext.nodeparts, ext.xmatrices, ext.cscmatrix) + ext.cscmatrix=Base.sum(ext.xmatrices, ext.cscmatrix) np=length(ext.xmatrices) (m,n)=size(ext.cscmatrix) ext.xmatrices=[Tm(m,n) for i=1:np] - npts::Vector{Ti}=ext.nodeparts - pn=zeros(Ti,np) - for i=1:n - npi=npts[i] - if npi>0 - pn[npi]+=1 - end - end - partnodes=[zeros(Int,pn[i]) for i=1:np] - pn.=1 - for i=1:n - npi=ext.nodeparts[i] - if npi>0 - partnodes[npi][pn[npi]]=i - pn[npi]+=1 - end - end - ext.partnodes=partnodes ext end @@ -131,7 +104,6 @@ function rawupdateindex!(ext::ExtendableSparseMatrixXParallel, end end - # Needed in 1.9 function Base.:*(ext::ExtendableSparse.Experimental.ExtendableSparseMatrixXParallel{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA mul!(similar(x),ext,x) @@ -141,19 +113,18 @@ function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x) flush!(ext) A=ext.cscmatrix colparts=ext.colparts + @show colparts partnodes=ext.partnodes + @show partnodes rows = SparseArrays.rowvals(A) vals = nonzeros(A) - r.=zero(eltype(ext)) m,n=size(A) - for icol=1:length(colparts) - part=colparts[icol] - @tasks for ip=1:length(part) - pnodes=partnodes[part[ip]] - for j in pnodes - @inbounds for i in nzrange(A,j) - r[rows[i]]+=vals[i]*x[j] + for icol=1:length(colparts)-1 + @tasks for ip=colparts[icol]:colparts[icol+1]-1 + for inode in partnodes[ip]:partnodes[ip+1]-1 + @inbounds for i in nzrange(A,inode) + r[rows[i]]+=vals[i]*x[inode] end end end diff --git a/src/experimental/parallel_testtools.jl b/src/experimental/parallel_testtools.jl deleted file mode 100644 index 83f75cd..0000000 --- a/src/experimental/parallel_testtools.jl +++ /dev/null @@ -1,224 +0,0 @@ -import ChunkSplitters -# Methods to test parallel assembly -# Will eventually become part of the package. - -""" - $(SIGNATURES) - -Return colored partitioing of grid made up by `X` and `Y` for work with `max(nt,4)` threads -as a vector `p` of a vector pairs of index ranges such that `p[i]` containes partions -of color i which can be assembled independently. - -The current algorithm creates `nt^2` partitions with `nt` colors. -""" -function part2d(X,Y, nt) - nt=max(4,nt) - XP=collect(ChunkSplitters.chunks(1:length(X)-1,n=nt)) - YP=collect(ChunkSplitters.chunks(1:length(Y)-1,n=nt)) - partitions = [Tuple{StepRange{Int64}, StepRange{Int64}}[] for i = 1:nt] - ipart=1 - col=1 - for jp=1:nt - for ip=1:nt - push!(partitions[col], (XP[ip], YP[jp])) - col=(col -1 +1 )%nt+1 - end - col=(col -1 +2)%nt+1 - end - partitions -end - -function colpart2d(X,Y,nt) - Nx=length(X) - Ny=length(Y) - p=part2d(X,Y,nt) - pc=zeros(Int,sum(length,p)) - jp=1 - for icol=1:length(p) - for ip=1:length(p[icol]) - pc[jp]=icol - jp+=1 - end - end - p,pc -end - - -""" - showgrid(Makie, ColorSchemes, X,Y,nt) - -Show grid partitioned according to [`part2d`](@ref). Needs a makie variant and ColorSchemes -to be passed as modules. -""" -function showgrid(Makie, ColorSchemes, X,Y,nt) - f = Makie.Figure() - ax = Makie.Axis(f[1, 1]; aspect = 1) - p=part2d(X,Y,nt) - ncol=length(p) - @show sum(length,p), ncol - colors=get(ColorSchemes.rainbow,collect(1:ncol)/ncol) - poly=Vector{Makie.Point2f}(undef,4) - for icol = 1:ncol - for (xp, yp) in p[icol] - for j in yp - for i in xp - poly[1]=Makie.Point2f(X[i], Y[j]) - poly[2]=Makie.Point2f(X[i + 1], Y[j]) - poly[3]=Makie.Point2f(X[i + 1], Y[j + 1]) - poly[4]=Makie.Point2f(X[i], Y[j + 1]) - Makie.poly!(copy(poly),color = colors[icol]) - end - end - end - end - f -end - - -""" - $(SIGNATURES) - -Assemble edge for finite volume laplacian. -Used by [`partassemble!`](@ref). -""" -function assembleedge!(A,v,k,l) - rawupdateindex!(A,+,v,k,k) - rawupdateindex!(A,+,-v,k,l) - rawupdateindex!(A,+,-v,l,k) - rawupdateindex!(A,+,v,l,l) -end - -function assembleedge!(A,v,k,l,tid) - rawupdateindex!(A,+,v,k,k,tid) - rawupdateindex!(A,+,-v,k,l,tid) - rawupdateindex!(A,+,-v,l,k,tid) - rawupdateindex!(A,+,v,l,l,tid) -end - -""" - $(SIGNATURES) - -Assemble finite volume Laplacian + diagnonal term -on grid cell `i,j`. -Used by [`partassemble!`](@ref). -""" -function assemblecell!(A,lindexes,X,Y,i,j,d) - hx=X[i+1]-X[i] - hy=Y[j+1]-Y[j] - ij00=lindexes[i,j] - ij10=lindexes[i+1,j] - ij11=lindexes[i+1,j+1] - ij01=lindexes[i,j+1] - - assembleedge!(A,0.5*hx/hy,ij00,ij01) - assembleedge!(A,0.5*hx/hy,ij10,ij11) - assembleedge!(A,0.5*hy/hx,ij00,ij10) - assembleedge!(A,0.5*hy/hx,ij01,ij11) - v=0.25*hx*hy - rawupdateindex!(A,+,v*d,ij00,ij00) - rawupdateindex!(A,+,v*d,ij01,ij01) - rawupdateindex!(A,+,v*d,ij10,ij10) - rawupdateindex!(A,+,v*d,ij11,ij11) -end - -function assemblecell!(A,lindexes,X,Y,i,j,d,tid) - hx=X[i+1]-X[i] - hy=Y[j+1]-Y[j] - ij00=lindexes[i,j] - ij10=lindexes[i+1,j] - ij11=lindexes[i+1,j+1] - ij01=lindexes[i,j+1] - - assembleedge!(A,0.5*hx/hy,ij00,ij01,tid) - assembleedge!(A,0.5*hx/hy,ij10,ij11,tid) - assembleedge!(A,0.5*hy/hx,ij00,ij10,tid) - assembleedge!(A,0.5*hy/hx,ij01,ij11,tid) - v=0.25*hx*hy - rawupdateindex!(A,+,v*d,ij00,ij00,tid) - rawupdateindex!(A,+,v*d,ij01,ij01,tid) - rawupdateindex!(A,+,v*d,ij10,ij10,tid) - rawupdateindex!(A,+,v*d,ij11,ij11,tid) -end - -""" - $(SIGNATURES) - -Assemble finite volume Laplacian + diagnonal term -on grid cells in partition described by ranges xp,yp. -Used by [`partassemble!`](@ref). -""" -function assemblepartition!(A,lindexes,X,Y,xp,yp,d) - for j in yp - for i in xp - assemblecell!(A,lindexes,X,Y,i,j,d) - end - end -end - -function assemblepartition!(A,lindexes,X,Y,xp,yp,d,tid) - for j in yp - for i in xp - assemblecell!(A,lindexes,X,Y,i,j,d,tid) - end - end -end - -""" - partassemble!(A,N,nt=1;xrange=(0,1),yrange=(0,1), d=0.1) - -Partitioned, cellwise, multithreaded assembly of finite difference matrix for -` -Δu + d*u=f` with homogeneous Neumann bc on grid set up by coordinate vectors -`X` and `Y` partitioned for work with `nt` threads -Does not work during structure setup. -""" -function partassemble!(A,X,Y,nt=1;d=0.1) - Nx=length(X) - Ny=length(Y) - size(A,1)==Nx*Ny || error("incompatible size of A") - size(A,2)==Nx*Ny || error("incompatible size of A") - - lindexes=LinearIndices((1:Nx,1:Ny)) - if nt==1 - assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d) - else - p=part2d(X,Y,nt) - for icol=1:length(p) - @tasks for (xp, yp) in p[icol] - assemblepartition!(A,lindexes,X,Y,xp,yp,d) - end - end - end - flush!(A) -end - - -function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict,ExtendableSparseMatrixParallelLNKX},X,Y,nt=1;d=0.1, reset=true) - Nx=length(X) - Ny=length(Y) - - size(A,1)==Nx*Ny || error("incompatible size of A") - size(A,2)==Nx*Ny || error("incompatible size of A") - - lindexes=LinearIndices((1:Nx,1:Ny)) - if nt==1 - reset!(A,1) - assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1) - else - p,pc=colpart2d(X,Y,nt) - if reset - reset!(A,pc) - end - jp0=0 - for icol=1:length(p) - npc=length(p[icol]) - @tasks for ip=1:npc - (xp, yp)=p[icol][ip] - assemblepartition!(A,lindexes,X,Y,xp,yp,d,jp0+ip) - end - jp0+=npc - end - end - flush!(A) -end - - diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl index 2bd6295..c5ee469 100644 --- a/src/experimental/sparsematrixdict.jl +++ b/src/experimental/sparsematrixdict.jl @@ -83,7 +83,7 @@ function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC cscmatrix end -function sum!(nodeparts, dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} +function Base.sum(dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} lnew=sum(m->length(m.values),dictmatrices) if lnew>0 (;colptr,nzval,rowval,m,n)=cscmatrix @@ -105,7 +105,6 @@ function sum!(nodeparts, dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatri ip=1 for m in dictmatrices for (p,v) in m.values - nodeparts[last(p)]=ip I[i]=first(p) J[i]=last(p) V[i]=v diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl index a53df25..bcc67f4 100644 --- a/src/experimental/sparsematrixlnkdict.jl +++ b/src/experimental/sparsematrixlnkdict.jl @@ -382,7 +382,7 @@ Add SparseMatrixCSC matrix and [`SparseMatrixLNKDict`](@ref) lnk, returning a S """ Base.:+(lnk::SparseMatrixLNKDict, csc::SparseMatrixCSC) = add_directly(lnk, csc) -function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} +function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} lnew=sum(nnz,lnkdictmatrices) if lnew>0 (;colptr,nzval,rowval,m,n)=cscmatrix @@ -404,7 +404,6 @@ function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cs ip=1 for lnk in lnkdictmatrices for (j,k) in lnk.colstart - nodeparts[j]=ip while k>0 I[i]=lnk.rowval[k] J[i]=j diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl index f7a322a..d445728 100644 --- a/src/experimental/sparsematrixlnkx.jl +++ b/src/experimental/sparsematrixlnkx.jl @@ -377,7 +377,7 @@ Add SparseMatrixCSC matrix and [`SparseMatrixLNKX`](@ref) lnk, returning a Spar """ Base.:+(lnk::SparseMatrixLNKX, csc::SparseMatrixCSC) = add_directly(lnk, csc) -function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} +function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} lnew=sum(nnz,lnkdictmatrices) if lnew>0 (;colptr,nzval,rowval,m,n)=cscmatrix diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallel.jl index 936f566..7fe1029 100644 --- a/test/ExperimentalParallel.jl +++ b/test/ExperimentalParallel.jl @@ -6,6 +6,226 @@ using BenchmarkTools using OhMyThreads: @tasks using Test +import ChunkSplitters +# Methods to test parallel assembly +# Will eventually become part of the package. + +""" + +Return colored partitioing of grid made up by `X` and `Y` for work with `max(nt,4)` threads +as a vector `p` of a vector pairs of index ranges such that `p[i]` containes partions +of color i which can be assembled independently. + +The current algorithm creates `nt^2` partitions with `nt` colors. +""" +function part2d(X,Y, nt) + nt=max(4,nt) + XP=collect(ChunkSplitters.chunks(1:length(X)-1,n=nt)) + YP=collect(ChunkSplitters.chunks(1:length(Y)-1,n=nt)) + partitions = [Tuple{StepRange{Int64}, StepRange{Int64}}[] for i = 1:nt] + ipart=1 + col=1 + for jp=1:nt + for ip=1:nt + push!(partitions[col], (XP[ip], YP[jp])) + col=(col -1 +1 )%nt+1 + end + col=(col -1 +2)%nt+1 + end + partitions +end + +function colpart2d(X,Y,nt) + Nx=length(X) + Ny=length(Y) + p=part2d(X,Y,nt) + pc=zeros(Int,sum(length,p)) + jp=1 + for icol=1:length(p) + for ip=1:length(p[icol]) + pc[jp]=icol + jp+=1 + end + end + p,pc +end + + +""" + showgrid(Makie, ColorSchemes, X,Y,nt) + +Show grid partitioned according to [`part2d`](@ref). Needs a makie variant and ColorSchemes +to be passed as modules. +""" +function showgrid(Makie, ColorSchemes, X,Y,nt) + f = Makie.Figure() + ax = Makie.Axis(f[1, 1]; aspect = 1) + p=part2d(X,Y,nt) + ncol=length(p) + @show sum(length,p), ncol + colors=get(ColorSchemes.rainbow,collect(1:ncol)/ncol) + poly=Vector{Makie.Point2f}(undef,4) + for icol = 1:ncol + for (xp, yp) in p[icol] + for j in yp + for i in xp + poly[1]=Makie.Point2f(X[i], Y[j]) + poly[2]=Makie.Point2f(X[i + 1], Y[j]) + poly[3]=Makie.Point2f(X[i + 1], Y[j + 1]) + poly[4]=Makie.Point2f(X[i], Y[j + 1]) + Makie.poly!(copy(poly),color = colors[icol]) + end + end + end + end + f +end + + +""" + +Assemble edge for finite volume laplacian. +Used by [`partassemble!`](@ref). +""" +function assembleedge!(A,v,k,l) + rawupdateindex!(A,+,v,k,k) + rawupdateindex!(A,+,-v,k,l) + rawupdateindex!(A,+,-v,l,k) + rawupdateindex!(A,+,v,l,l) +end + +function assembleedge!(A,v,k,l,tid) + rawupdateindex!(A,+,v,k,k,tid) + rawupdateindex!(A,+,-v,k,l,tid) + rawupdateindex!(A,+,-v,l,k,tid) + rawupdateindex!(A,+,v,l,l,tid) +end + +""" +Assemble finite volume Laplacian + diagnonal term +on grid cell `i,j`. +Used by [`partassemble!`](@ref). +""" +function assemblecell!(A,lindexes,X,Y,i,j,d) + hx=X[i+1]-X[i] + hy=Y[j+1]-Y[j] + ij00=lindexes[i,j] + ij10=lindexes[i+1,j] + ij11=lindexes[i+1,j+1] + ij01=lindexes[i,j+1] + + assembleedge!(A,0.5*hx/hy,ij00,ij01) + assembleedge!(A,0.5*hx/hy,ij10,ij11) + assembleedge!(A,0.5*hy/hx,ij00,ij10) + assembleedge!(A,0.5*hy/hx,ij01,ij11) + v=0.25*hx*hy + rawupdateindex!(A,+,v*d,ij00,ij00) + rawupdateindex!(A,+,v*d,ij01,ij01) + rawupdateindex!(A,+,v*d,ij10,ij10) + rawupdateindex!(A,+,v*d,ij11,ij11) +end + +function assemblecell!(A,lindexes,X,Y,i,j,d,tid) + hx=X[i+1]-X[i] + hy=Y[j+1]-Y[j] + ij00=lindexes[i,j] + ij10=lindexes[i+1,j] + ij11=lindexes[i+1,j+1] + ij01=lindexes[i,j+1] + + assembleedge!(A,0.5*hx/hy,ij00,ij01,tid) + assembleedge!(A,0.5*hx/hy,ij10,ij11,tid) + assembleedge!(A,0.5*hy/hx,ij00,ij10,tid) + assembleedge!(A,0.5*hy/hx,ij01,ij11,tid) + v=0.25*hx*hy + rawupdateindex!(A,+,v*d,ij00,ij00,tid) + rawupdateindex!(A,+,v*d,ij01,ij01,tid) + rawupdateindex!(A,+,v*d,ij10,ij10,tid) + rawupdateindex!(A,+,v*d,ij11,ij11,tid) +end + +""" + +Assemble finite volume Laplacian + diagnonal term +on grid cells in partition described by ranges xp,yp. +Used by [`partassemble!`](@ref). +""" +function assemblepartition!(A,lindexes,X,Y,xp,yp,d) + for j in yp + for i in xp + assemblecell!(A,lindexes,X,Y,i,j,d) + end + end +end + +function assemblepartition!(A,lindexes,X,Y,xp,yp,d,tid) + for j in yp + for i in xp + assemblecell!(A,lindexes,X,Y,i,j,d,tid) + end + end +end + +""" + partassemble!(A,N,nt=1;xrange=(0,1),yrange=(0,1), d=0.1) + +Partitioned, cellwise, multithreaded assembly of finite difference matrix for +` -Δu + d*u=f` with homogeneous Neumann bc on grid set up by coordinate vectors +`X` and `Y` partitioned for work with `nt` threads +Does not work during structure setup. +""" +function partassemble!(A,X,Y,nt=1;d=0.1) + Nx=length(X) + Ny=length(Y) + size(A,1)==Nx*Ny || error("incompatible size of A") + size(A,2)==Nx*Ny || error("incompatible size of A") + + lindexes=LinearIndices((1:Nx,1:Ny)) + if nt==1 + assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d) + else + p=part2d(X,Y,nt) + for icol=1:length(p) + @tasks for (xp, yp) in p[icol] + assemblepartition!(A,lindexes,X,Y,xp,yp,d) + end + end + end + flush!(A) +end + + +function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict,ExtendableSparseMatrixParallelLNKX},X,Y,nt=1;d=0.1, reset=true) + Nx=length(X) + Ny=length(Y) + + size(A,1)==Nx*Ny || error("incompatible size of A") + size(A,2)==Nx*Ny || error("incompatible size of A") + + lindexes=LinearIndices((1:Nx,1:Ny)) + if nt==1 + reset!(A,1) + assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1) + else + p,pc=colpart2d(X,Y,nt) + if reset + reset!(A,pc) + end + jp0=0 + for icol=1:length(p) + npc=length(p[icol]) + @tasks for ip=1:npc + (xp, yp)=p[icol][ip] + assemblepartition!(A,lindexes,X,Y,xp,yp,d,jp0+ip) + end + jp0+=npc + end + end + flush!(A) +end + + + """ `test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)` diff --git a/test/ExperimentalScalar.jl b/test/ExperimentalScalar.jl deleted file mode 100644 index 1f1ee84..0000000 --- a/test/ExperimentalScalar.jl +++ /dev/null @@ -1,28 +0,0 @@ -module ExperimentalScalar -using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental -using BenchmarkTools -using Test - -include("test_parallel.jl") - -function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; dim=3) - grid=testgrid(N;dim) - nnodes=num_nodes(grid) - A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) - A=Tm{Float64,Int}(nnodes,nnodes) - testassemble!(A0,grid) - testassemble!(A,grid) - @test sparse(A0)≈sparse(A) -end - -function speed_build(N,Tm::Type{<:AbstractSparseMatrix}; dim=3) - grid=testgrid(N;dim) - nnodes=num_nodes(grid) - A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) - A=Tm{Float64,Int}(nnodes,nnodes) - tbase= @belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0)) - tx= @belapsed testassemble!($A,$grid) seconds=1 setup=(reset!($A)) - tbase/tx -end - -end diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl index 4daf8e9..b87b71a 100644 --- a/test/ExperimentalXParallel.jl +++ b/test/ExperimentalXParallel.jl @@ -4,32 +4,179 @@ using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental using BenchmarkTools using ExtendableGrids #using MKLSparse -using SparseMatricesCSR +#using SparseMatricesCSR using Test -using OhMyThreads -include("test_parallel.jl") +using ExtendableSparse, ExtendableGrids, Metis +using LinearAlgebra +using BenchmarkTools +using Test +using OhMyThreads: @tasks +using RecursiveFactorization + +function testgrid(N; dim=3) + X=range(0,1,length=N^(1.0/dim)|> ceil |> Int) + simplexgrid((X for i=1:dim)...) +end + + +function coordmatrix!(C,coord, cellnodes,k) + spacedim=size(coord,1) + celldim=size(cellnodes,1) + @inbounds for jj=1:celldim + C[1,jj]=1 + @inbounds for ii=1:spacedim + C[ii+1,jj]=coord[ii,cellnodes[jj,k]] + end + end +end + +function gradient!(G,C,factdim,I,ipiv) + clu=RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false)) + vol=abs(det(clu))/factdim + ldiv!(G,clu,I) + return vol +end + +function scalpro(G,dim,jl,il) + s=0.0 + @inbounds @simd for k=1:dim + s+=G[jl,k+1]*G[il,k+1] + end + return s +end + +function stiffness!(S,dim,G) + @inbounds for il=1:dim+1 + S[il,il]=scalpro(G,dim,il,il) + @inbounds for jl=il+1:dim+1 + S[il,jl]=scalpro(G,dim,jl,il) + S[jl,il]=S[il,jl] + end + end + return S +end + +function testassemble!(A_h,grid) + coord=grid[Coordinates] + cellnodes=grid[CellNodes] + ncells=num_cells(grid) + dim=size(coord,1) + lnodes=dim+1 + factdim::Float64=factorial(dim) + S=zeros(lnodes, lnodes) # local stiffness matrix + C=zeros(lnodes,lnodes) # local coordinate matrix + G=zeros(lnodes, lnodes) # shape function gradients + ipiv=zeros(Int,lnodes) + I=Matrix(Diagonal(ones(lnodes))) + ncells=size(cellnodes,2) + for icell=1:ncells + coordmatrix!(C,coord,cellnodes,icell) + vol=gradient!(G,C,factdim,I,ipiv) + stiffness!(S,dim,G) + for il=1:lnodes + i=cellnodes[il,icell] + rawupdateindex!(A_h,+,0.1*vol/(dim+1),i,i) + for jl=1:lnodes + j=cellnodes[jl,icell] + rawupdateindex!(A_h,+,vol*(S[il,jl]),i,j) + end + end + end + flush!(A_h) +end + +function testassemble_parallel!(A_h,grid) + coord=grid[Coordinates] + cellnodes=grid[CellNodes] + ncells=num_cells(grid) + dim=size(coord,1) + lnodes=dim+1 + npart=num_partitions(grid) + factdim::Float64=factorial(dim) + SS=[zeros(lnodes, lnodes) for i=1:npart] # local stiffness matrix + CC=[zeros(lnodes, lnodes) for i=1:npart] # local coordinate matrix + GG=[zeros(lnodes, lnodes) for i=1:npart] # shape function gradients + IP=[zeros(Int,lnodes) for i=1:npart] # shape function gradients + I=Matrix(Diagonal(ones(lnodes))) + ncells=size(cellnodes,2) + for color in pcolors(grid) + @tasks for part in pcolor_partitions(grid,color) + C=CC[part] + S=SS[part] + G=GG[part] + ipiv=IP[part] + for icell in partition_cells(grid, part) + coordmatrix!(C,coord,cellnodes,icell) + vol=gradient!(G,C,factdim,I,ipiv) + stiffness!(S,dim,G) + for il=1:lnodes + i=cellnodes[il,icell] + rawupdateindex!(A_h,+,0.1*vol/(dim+1),i,i, part) + for jl=1:lnodes + j=cellnodes[jl,icell] + rawupdateindex!(A_h,+,vol*(S[il,jl]),i,j, part) + end + end + end + end + end + flush!(A_h) +end + + + +function testassemble(grid) + nnodes=num_nodes(grid) + A_h=ExtendableSparseMatrix(nnodes,nnodes) + testassemble!(A_h,grid) + A_h.cscmatrix.nzval.=0 + testassemble!(A_h,grid) +end + + +function test_correctness_build_seq(N,Tm::Type{<:AbstractSparseMatrix}; dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) + A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) + A=Tm{Float64,Int}(nnodes,nnodes) + testassemble!(A0,grid) + testassemble!(A,grid) + @test sparse(A0)≈sparse(A) +end -function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix}; dim=3) +function speed_build_seq(N,Tm::Type{<:AbstractSparseMatrix}; dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) + A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) + A=Tm{Float64,Int}(nnodes,nnodes) + tbase= @belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0)) + tx= @belapsed testassemble!($A,$grid) seconds=1 setup=(reset!($A)) + tbase/tx +end + + +function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3) grid=testgrid(N;dim) nnodes=num_nodes(grid) A=Tm{Float64,Int}(nnodes,nnodes,1) - allnp=[10,15,20] + # Assembele without partitioning # this gives the "base truth" to compare with testassemble_parallel!(A,grid) # Save the nonzeros - nz=copy(nonzeros(A)) + nz=sort(copy(nonzeros(A))) for np in allnp # Reset the nonzeros, keeping the structure intact nonzeros(A).=0 # Parallel assembly whith np threads pgrid=partition(grid,PlainMetisPartitioning(npart=np)) + reset!(A,np) @show num_partitions_per_color(pgrid) testassemble_parallel!(A,pgrid) - @test nonzeros(A)≈nz + @test sort(nonzeros(A))≈nz end end @@ -45,16 +192,16 @@ function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15 # Get the "ground truth" A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) testassemble!(A0,grid) - nz=copy(nonzeros(A0)) + nz=sort(copy(nonzeros(A0))) for np in allnp # Make a new matrix and assemble parallel. # this should result in the same nonzeros pgrid=partition(grid,PlainMetisPartitioning(npart=np)) A=Tm(nnodes,nnodes, num_partitions(pgrid)) @show num_partitions_per_color(pgrid) - @test checkpartitioning(pgrid) + @test check_partitioning(pgrid) testassemble_parallel!(A,pgrid) - @test nonzeros(A) ≈ nz + @test sort(nonzeros(A)) ≈ nz end end @@ -64,14 +211,17 @@ function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,2 # Get the "ground truth" A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) testassemble!(A0,grid) + b=rand(nnodes) + A0b=A0*b for np in allnp pgrid=partition(grid,PlainMetisPartitioning(npart=np)) + @test check_partitioning(pgrid) A=Tm(nnodes,nnodes, num_partitions(pgrid)) + ExtendableSparse.Experimental.partitioning!(A,pgrid[PColorPartitions], pgrid[PartitionNodes]) testassemble_parallel!(A,pgrid) - flush!(A) - partcolors!(A,partition_pcolors(pgrid)) - b=rand(nnodes) - @test A*b ≈ A0*b + invp=invperm(pgrid[NodePermutation]) + @show norm(A0b[invp] - A*b[invp], Inf) + @test A0b[invp] ≈ A*b[invp] end end @@ -81,7 +231,7 @@ function speedup_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], di # Get the "ground truth" A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) testassemble!(A0,grid) - nz=copy(nonzeros(A0)) + nz=copy(nonzeros(A0)) |>sort # Get the base timing # During setup, set matrix entries to zero while keeping the structure t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(nonzeros($A0).=0) @@ -95,7 +245,7 @@ function speedup_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], di reset!(A,num_partitions(pgrid)) testassemble_parallel!(A,pgrid) t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(nonzeros($A).=0) - @assert nonzeros(A)≈nz + @assert sort(nonzeros(A))≈nz push!(result,(np,round(t0/t,digits=2))) end result @@ -107,11 +257,11 @@ function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim # Get the "ground truth" A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) testassemble!(A0,grid) - nz=copy(nonzeros(A0)) + nz=nonzeros(A0) reset!(A0) testassemble!(A0,grid) @assert nonzeros(A0)≈(nz) - + nz=sort(nz) # Get the base timing # During setup, reset matrix to empty state. t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0)) @@ -125,7 +275,7 @@ function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim reset!(A,num_partitions(pgrid)) @show num_partitions_per_color(pgrid) t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(reset!($A,num_partitions($pgrid))) - @assert nonzeros(A)≈nz + @assert sort(nonzeros(A))≈nz push!(result,(np,round(t0/t,digits=2))) end result @@ -148,16 +298,16 @@ function speedup_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3 reset!(A,num_partitions(pgrid)) testassemble_parallel!(A,pgrid) flush!(A) - partcolors!(A,partition_pcolors(pgrid)) - + ExtendableSparse.Experimental.partitioning!(A,pgrid[PColorPartitions], pgrid[PartitionNodes]) t=@belapsed $A*$b seconds=1 - @assert A0b≈A*b + invp=invperm(pgrid[NodePermutation]) + @assert A0b[invp] ≈ A*b[invp] push!(result,(np,round(t0/t,digits=2))) end result end - +#= function mymul(A::SparseMatrixCSR,v::AbstractVector) y=copy(v) A.n == size(v, 1) || throw(DimensionMismatch()) @@ -203,6 +353,7 @@ function speedup_csrmul(N; dim=3) t0/t1 end +=# end diff --git a/test/Project.toml b/test/Project.toml index 78cab80..24a28c6 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,6 +2,7 @@ AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e" ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f" @@ -9,13 +10,16 @@ IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895" IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" +Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" MultiFloats = "bdf0d083-296b-4888-a5b6-7498122e68a5" OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] +ExtendableGrids = "1.7" IterativeSolvers = "0.9" diff --git a/test/runtests.jl b/test/runtests.jl index 244dbb5..25d5d4c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,19 +9,17 @@ using BenchmarkTools using MultiFloats using ForwardDiff -@testset "ExperimentalScalar" begin - include("ExperimentalScalar.jl") + +@testset "ExperimentalXParallel" begin + include("ExperimentalXParallel.jl") for Tm in [ExtendableSparseMatrixLNK,ExtendableSparseMatrixDict,ExtendableSparseMatrixLNKDict] - for N in [100,rand(30:200),500] - ExperimentalScalar.test_correctness_build(N,Tm) + for N in [10000,20000] + ExperimentalXParallel.test_correctness_build_seq(N,Tm) end end -end -@testset "ExperimentalXParallel" begin - include("ExperimentalXParallel.jl") for Tm in [ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict] - for N in [100,rand(30:200),500] + for N in [10000,20000] ExperimentalXParallel.test_correctness_update(N,Tm) ExperimentalXParallel.test_correctness_build(N,Tm) ExperimentalXParallel.test_correctness_mul(N,Tm) From d796b787886ce76f37477c8aeb0ecad9a8afce64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Tue, 18 Jun 2024 09:57:00 +0200 Subject: [PATCH 34/44] move all experimental code to experimental subdir --- docs/make.jl | 1 + src/experimental/Experimental.jl | 10 +++++----- .../ExtendableSparseParallel.jl | 0 .../ilu_Al-Kurdi_Mittal.jl | 0 .../ExtendableSparseMatrixParallel}/iluam.jl | 0 .../pilu_Al-Kurdi_Mittal.jl | 0 .../ExtendableSparseMatrixParallel}/piluam.jl | 0 .../ExtendableSparseMatrixParallel/preparatory.jl | 0 .../ExtendableSparseMatrixParallel/struct_flush.jl | 0 .../ExtendableSparseMatrixParallel/supersparse.jl | 0 10 files changed, 6 insertions(+), 5 deletions(-) rename src/{matrix => experimental}/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl (100%) rename src/{factorizations => experimental/ExtendableSparseMatrixParallel}/ilu_Al-Kurdi_Mittal.jl (100%) rename src/{factorizations => experimental/ExtendableSparseMatrixParallel}/iluam.jl (100%) rename src/{factorizations => experimental/ExtendableSparseMatrixParallel}/pilu_Al-Kurdi_Mittal.jl (100%) rename src/{factorizations => experimental/ExtendableSparseMatrixParallel}/piluam.jl (100%) rename src/{matrix => experimental}/ExtendableSparseMatrixParallel/preparatory.jl (100%) rename src/{matrix => experimental}/ExtendableSparseMatrixParallel/struct_flush.jl (100%) rename src/{matrix => experimental}/ExtendableSparseMatrixParallel/supersparse.jl (100%) diff --git a/docs/make.jl b/docs/make.jl index 3a70bf5..5dbe7c4 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -5,6 +5,7 @@ function mkdocs() makedocs(; sitename = "ExtendableSparse.jl", modules = [ExtendableSparse], doctest = false, + warnonly = true, clean = false, authors = "J. Fuhrmann", repo = "https://github.com/j-fu/ExtendableSparse.jl", diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl index 3c45c6d..546810e 100644 --- a/src/experimental/Experimental.jl +++ b/src/experimental/Experimental.jl @@ -13,15 +13,15 @@ using OhMyThreads: @tasks import ExtendableSparse: factorize!, update! -include(joinpath(@__DIR__, "..", "matrix", "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl")) +include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl")) -include(joinpath(@__DIR__, "..", "factorizations","ilu_Al-Kurdi_Mittal.jl")) +include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "ilu_Al-Kurdi_Mittal.jl")) #using .ILUAM -include(joinpath(@__DIR__, "..", "factorizations","pilu_Al-Kurdi_Mittal.jl")) +include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "pilu_Al-Kurdi_Mittal.jl")) #using .PILUAM -include(joinpath(@__DIR__, "..", "factorizations","iluam.jl")) -include(joinpath(@__DIR__, "..", "factorizations","piluam.jl")) +include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel" ,"iluam.jl")) +include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "piluam.jl")) @eval begin @makefrommatrix ILUAMPreconditioner diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/experimental/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl similarity index 100% rename from src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl rename to src/experimental/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/experimental/ExtendableSparseMatrixParallel/ilu_Al-Kurdi_Mittal.jl similarity index 100% rename from src/factorizations/ilu_Al-Kurdi_Mittal.jl rename to src/experimental/ExtendableSparseMatrixParallel/ilu_Al-Kurdi_Mittal.jl diff --git a/src/factorizations/iluam.jl b/src/experimental/ExtendableSparseMatrixParallel/iluam.jl similarity index 100% rename from src/factorizations/iluam.jl rename to src/experimental/ExtendableSparseMatrixParallel/iluam.jl diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/experimental/ExtendableSparseMatrixParallel/pilu_Al-Kurdi_Mittal.jl similarity index 100% rename from src/factorizations/pilu_Al-Kurdi_Mittal.jl rename to src/experimental/ExtendableSparseMatrixParallel/pilu_Al-Kurdi_Mittal.jl diff --git a/src/factorizations/piluam.jl b/src/experimental/ExtendableSparseMatrixParallel/piluam.jl similarity index 100% rename from src/factorizations/piluam.jl rename to src/experimental/ExtendableSparseMatrixParallel/piluam.jl diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/experimental/ExtendableSparseMatrixParallel/preparatory.jl similarity index 100% rename from src/matrix/ExtendableSparseMatrixParallel/preparatory.jl rename to src/experimental/ExtendableSparseMatrixParallel/preparatory.jl diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/experimental/ExtendableSparseMatrixParallel/struct_flush.jl similarity index 100% rename from src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl rename to src/experimental/ExtendableSparseMatrixParallel/struct_flush.jl diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/experimental/ExtendableSparseMatrixParallel/supersparse.jl similarity index 100% rename from src/matrix/ExtendableSparseMatrixParallel/supersparse.jl rename to src/experimental/ExtendableSparseMatrixParallel/supersparse.jl From 6cd48cdc64c5b279b5808229436cbca063a72aa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Wed, 19 Jun 2024 12:52:28 +0200 Subject: [PATCH 35/44] tweak tests --- src/experimental/extendablesparsematrixparallel.jl | 6 ++---- src/experimental/sparsematrixlnkx.jl | 1 - test/ExperimentalXParallel.jl | 9 +++++---- test/runtests.jl | 12 ++++++------ 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl index 5325e76..ba70a65 100644 --- a/src/experimental/extendablesparsematrixparallel.jl +++ b/src/experimental/extendablesparsematrixparallel.jl @@ -113,16 +113,14 @@ function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x) flush!(ext) A=ext.cscmatrix colparts=ext.colparts - @show colparts partnodes=ext.partnodes - @show partnodes rows = SparseArrays.rowvals(A) vals = nonzeros(A) r.=zero(eltype(ext)) m,n=size(A) for icol=1:length(colparts)-1 - @tasks for ip=colparts[icol]:colparts[icol+1]-1 - for inode in partnodes[ip]:partnodes[ip+1]-1 + @tasks for ip in colparts[icol]:colparts[icol+1]-1 + @inbounds for inode in partnodes[ip]:partnodes[ip+1]-1 @inbounds for i in nzrange(A,inode) r[rows[i]]+=vals[i]*x[inode] end diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl index d445728..bd2cdcf 100644 --- a/src/experimental/sparsematrixlnkx.jl +++ b/src/experimental/sparsematrixlnkx.jl @@ -400,7 +400,6 @@ function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscmatrix::S for lnk in lnkdictmatrices for j=1:n k=lnk.colstart[j] - nodeparts[j]=ip while k>0 I[i]=lnk.rowval[k] J[i]=j diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl index b87b71a..e6322f4 100644 --- a/test/ExperimentalXParallel.jl +++ b/test/ExperimentalXParallel.jl @@ -199,7 +199,7 @@ function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15 pgrid=partition(grid,PlainMetisPartitioning(npart=np)) A=Tm(nnodes,nnodes, num_partitions(pgrid)) @show num_partitions_per_color(pgrid) - @test check_partitioning(pgrid) + @test check_partitioning(pgrid, cellpartonly=true) testassemble_parallel!(A,pgrid) @test sort(nonzeros(A)) ≈ nz end @@ -215,13 +215,14 @@ function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,2 A0b=A0*b for np in allnp pgrid=partition(grid,PlainMetisPartitioning(npart=np)) - @test check_partitioning(pgrid) + @test check_partitioning(pgrid, cellpartonly=false) A=Tm(nnodes,nnodes, num_partitions(pgrid)) ExtendableSparse.Experimental.partitioning!(A,pgrid[PColorPartitions], pgrid[PartitionNodes]) testassemble_parallel!(A,pgrid) invp=invperm(pgrid[NodePermutation]) - @show norm(A0b[invp] - A*b[invp], Inf) - @test A0b[invp] ≈ A*b[invp] + diff=norm(A0b[invp] - A*b[invp], Inf) + @show diff + @test diff Date: Mon, 24 Jun 2024 00:18:10 +0200 Subject: [PATCH 36/44] untweak tests, use partitioning correction --- .JuliaFormatter.toml | 7 +- src/experimental/sparsematrixlnkdict.jl | 11 +- test/ExperimentalXParallel.jl | 397 ++++++++++++------------ 3 files changed, 219 insertions(+), 196 deletions(-) diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml index 02aa07d..5458760 100644 --- a/.JuliaFormatter.toml +++ b/.JuliaFormatter.toml @@ -1,4 +1,7 @@ -style = "sciml" +style = "yas" always_for_in = false -separate_kwargs_with_semicolon = true format_markdown = true +import_to_using = false +pipe_to_function_call = false +short_to_long_function_def = false +always_use_return = false \ No newline at end of file diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl index bcc67f4..6ae7a6c 100644 --- a/src/experimental/sparsematrixlnkdict.jl +++ b/src/experimental/sparsematrixlnkdict.jl @@ -216,7 +216,7 @@ It assumes that `op(0,0)==0`. If `v` is zero a new entry is created nevertheless. """ function rawupdateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti} - k, k0 = findindex(lnk, i, j) +@time k, k0 = findindex(lnk, i, j) if k > 0 lnk.nzval[k] = op(lnk.nzval[k], v) else @@ -423,6 +423,15 @@ function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cscmatrix return cscmatrix end +function reset!(m::SparseMatrixLNKDict{Tv,Ti}) where {Tv,Ti} + m.nnz=0 + m.nentries=0 + m.colptr=zeros(Ti,10) + m.colstart::Dict{Ti,Ti} + m.rowval=zeros(Ti,10) + m.nzval=zeros(Ti,10) + m +end """ diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl index e6322f4..03bf2ad 100644 --- a/test/ExperimentalXParallel.jl +++ b/test/ExperimentalXParallel.jl @@ -1,6 +1,6 @@ module ExperimentalXParallel -using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental +using ExtendableSparse, SparseArrays, ExtendableSparse.Experimental using BenchmarkTools using ExtendableGrids #using MKLSparse @@ -15,107 +15,105 @@ using OhMyThreads: @tasks using RecursiveFactorization function testgrid(N; dim=3) - X=range(0,1,length=N^(1.0/dim)|> ceil |> Int) - simplexgrid((X for i=1:dim)...) + X = range(0, 1; length=N^(1.0 / dim) |> ceil |> Int) + simplexgrid((X for i = 1:dim)...) end - -function coordmatrix!(C,coord, cellnodes,k) +function coordmatrix!(C, coord, cellnodes, k) spacedim=size(coord,1) celldim=size(cellnodes,1) - @inbounds for jj=1:celldim - C[1,jj]=1 - @inbounds for ii=1:spacedim - C[ii+1,jj]=coord[ii,cellnodes[jj,k]] + @inbounds for jj = 1:celldim + C[1, jj] = 1 + @inbounds for ii = 1:spacedim + C[ii + 1, jj] = coord[ii, cellnodes[jj, k]] end end end -function gradient!(G,C,factdim,I,ipiv) - clu=RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false)) - vol=abs(det(clu))/factdim - ldiv!(G,clu,I) - return vol +function gradient!(G, C, factdim, I, ipiv) + clu = RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false)) + ldiv!(G, clu, I) + abs(det(clu)) / factdim end -function scalpro(G,dim,jl,il) - s=0.0 - @inbounds @simd for k=1:dim - s+=G[jl,k+1]*G[il,k+1] +function scalpro(G, dim, jl, il) + s = 0.0 + @inbounds @simd for k = 1:dim + s += G[jl, k + 1] * G[il, k + 1] end return s end -function stiffness!(S,dim,G) - @inbounds for il=1:dim+1 - S[il,il]=scalpro(G,dim,il,il) - @inbounds for jl=il+1:dim+1 - S[il,jl]=scalpro(G,dim,jl,il) - S[jl,il]=S[il,jl] +function stiffness!(S, dim, G) + @inbounds for il = 1:(dim + 1) + S[il, il] = scalpro(G, dim, il, il) + @inbounds for jl = (il + 1):(dim + 1) + S[il, jl] = scalpro(G, dim, jl, il) + S[jl, il] = S[il, jl] end end return S end -function testassemble!(A_h,grid) - coord=grid[Coordinates] - cellnodes=grid[CellNodes] - ncells=num_cells(grid) - dim=size(coord,1) - lnodes=dim+1 - factdim::Float64=factorial(dim) - S=zeros(lnodes, lnodes) # local stiffness matrix - C=zeros(lnodes,lnodes) # local coordinate matrix - G=zeros(lnodes, lnodes) # shape function gradients - ipiv=zeros(Int,lnodes) - I=Matrix(Diagonal(ones(lnodes))) - ncells=size(cellnodes,2) - for icell=1:ncells - coordmatrix!(C,coord,cellnodes,icell) - vol=gradient!(G,C,factdim,I,ipiv) - stiffness!(S,dim,G) - for il=1:lnodes - i=cellnodes[il,icell] - rawupdateindex!(A_h,+,0.1*vol/(dim+1),i,i) - for jl=1:lnodes - j=cellnodes[jl,icell] - rawupdateindex!(A_h,+,vol*(S[il,jl]),i,j) +function testassemble!(A_h, grid) + coord = grid[Coordinates] + cellnodes = grid[CellNodes] + ncells = num_cells(grid) + dim = size(coord, 1) + lnodes = dim + 1 + factdim::Float64 = factorial(dim) + S = zeros(lnodes, lnodes) # local stiffness matrix + C = zeros(lnodes, lnodes) # local coordinate matrix + G = zeros(lnodes, lnodes) # shape function gradients + ipiv = zeros(Int, lnodes) + I = Matrix(Diagonal(ones(lnodes))) + ncells = size(cellnodes, 2) + for icell = 1:ncells + coordmatrix!(C, coord, cellnodes, icell) + vol = gradient!(G, C, factdim, I, ipiv) + stiffness!(S, dim, G) + for il = 1:lnodes + i = cellnodes[il, icell] + rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i) + for jl = 1:lnodes + j = cellnodes[jl, icell] + rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j) end end end flush!(A_h) end -function testassemble_parallel!(A_h,grid) - coord=grid[Coordinates] - cellnodes=grid[CellNodes] - ncells=num_cells(grid) - dim=size(coord,1) - lnodes=dim+1 - npart=num_partitions(grid) - factdim::Float64=factorial(dim) - SS=[zeros(lnodes, lnodes) for i=1:npart] # local stiffness matrix - CC=[zeros(lnodes, lnodes) for i=1:npart] # local coordinate matrix - GG=[zeros(lnodes, lnodes) for i=1:npart] # shape function gradients - IP=[zeros(Int,lnodes) for i=1:npart] # shape function gradients - I=Matrix(Diagonal(ones(lnodes))) - ncells=size(cellnodes,2) +function testassemble_parallel!(A_h, grid) + coord = grid[Coordinates] + cellnodes = grid[CellNodes] + ncells = num_cells(grid) + dim = size(coord, 1) + lnodes = dim + 1 + npart = num_partitions(grid) + factdim::Float64 = factorial(dim) + SS = [zeros(lnodes, lnodes) for i = 1:npart] # local stiffness matrix + CC = [zeros(lnodes, lnodes) for i = 1:npart] # local coordinate matrix + GG = [zeros(lnodes, lnodes) for i = 1:npart] # shape function gradients + IP = [zeros(Int, lnodes) for i = 1:npart] # shape function gradients + I = Matrix(Diagonal(ones(lnodes))) + ncells = size(cellnodes, 2) for color in pcolors(grid) - @tasks for part in pcolor_partitions(grid,color) - C=CC[part] - S=SS[part] - G=GG[part] - ipiv=IP[part] + @tasks for part in pcolor_partitions(grid, color) + C = CC[part] + S = SS[part] + G = GG[part] + ipiv = IP[part] for icell in partition_cells(grid, part) - coordmatrix!(C,coord,cellnodes,icell) - vol=gradient!(G,C,factdim,I,ipiv) - stiffness!(S,dim,G) - for il=1:lnodes - i=cellnodes[il,icell] - rawupdateindex!(A_h,+,0.1*vol/(dim+1),i,i, part) - for jl=1:lnodes - j=cellnodes[jl,icell] - rawupdateindex!(A_h,+,vol*(S[il,jl]),i,j, part) + coordmatrix!(C, coord, cellnodes, icell) + vol = gradient!(G, C, factdim, I, ipiv) + stiffness!(S, dim, G) + for il = 1:lnodes + i = cellnodes[il, icell] + rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i, part) + for jl = 1:lnodes + j = cellnodes[jl, icell] + rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j, part) end end end @@ -124,59 +122,50 @@ function testassemble_parallel!(A_h,grid) flush!(A_h) end - - -function testassemble(grid) - nnodes=num_nodes(grid) - A_h=ExtendableSparseMatrix(nnodes,nnodes) - testassemble!(A_h,grid) - A_h.cscmatrix.nzval.=0 - testassemble!(A_h,grid) +function test_correctness_build_seq(N, Tm::Type{<:AbstractSparseMatrix}; dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + A = Tm{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + testassemble!(A, grid) + @test sparse(A0) ≈ sparse(A) end - -function test_correctness_build_seq(N,Tm::Type{<:AbstractSparseMatrix}; dim=3) - grid=testgrid(N;dim) - nnodes=num_nodes(grid) - A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) - A=Tm{Float64,Int}(nnodes,nnodes) - testassemble!(A0,grid) - testassemble!(A,grid) - @test sparse(A0)≈sparse(A) +function speedup_build_seq(N, Tm::Type{<:AbstractSparseMatrix}; dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + A = Tm{Float64,Int}(nnodes, nnodes) + tbase = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (reset!($A0)) + tx = @belapsed testassemble!($A, $grid) seconds = 1 setup = (reset!($A)) + tbase / tx end -function speed_build_seq(N,Tm::Type{<:AbstractSparseMatrix}; dim=3) - grid=testgrid(N;dim) - nnodes=num_nodes(grid) - A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) - A=Tm{Float64,Int}(nnodes,nnodes) - tbase= @belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0)) - tx= @belapsed testassemble!($A,$grid) seconds=1 setup=(reset!($A)) - tbase/tx -end - - -function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3) - grid=testgrid(N;dim) - nnodes=num_nodes(grid) - A=Tm{Float64,Int}(nnodes,nnodes,1) - +function test_correctness_update(N, + Tm::Type{<:AbstractSparseMatrix}; + Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning, + allnp=[10, 15, 20], + dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) + A = Tm{Float64,Int}(nnodes, nnodes, 1) # Assembele without partitioning # this gives the "base truth" to compare with - testassemble_parallel!(A,grid) + testassemble_parallel!(A, grid) # Save the nonzeros - nz=sort(copy(nonzeros(A))) + nz = sort(copy(nonzeros(A))) for np in allnp # Reset the nonzeros, keeping the structure intact - nonzeros(A).=0 + nonzeros(A) .= 0 # Parallel assembly whith np threads - pgrid=partition(grid,PlainMetisPartitioning(npart=np)) - reset!(A,np) + pgrid = partition(grid, Tp(; npart=np)) + reset!(A, np) @show num_partitions_per_color(pgrid) - testassemble_parallel!(A,pgrid) - @test sort(nonzeros(A))≈nz + testassemble_parallel!(A, pgrid) + @test sort(nonzeros(A)) ≈ nz end end @@ -186,124 +175,148 @@ end Test correctness of parallel assembly on NxN grid during build phase, assuming that no structure has been assembled. """ -function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3) - grid=testgrid(N;dim) - nnodes=num_nodes(grid) +function test_correctness_build(N, + Tm::Type{<:AbstractSparseMatrix}; + Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning, + allnp=[10, 15, 20], + dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) # Get the "ground truth" - A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) - testassemble!(A0,grid) - nz=sort(copy(nonzeros(A0))) + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + nz = sort(copy(nonzeros(A0))) for np in allnp # Make a new matrix and assemble parallel. # this should result in the same nonzeros - pgrid=partition(grid,PlainMetisPartitioning(npart=np)) - A=Tm(nnodes,nnodes, num_partitions(pgrid)) + pgrid = partition(grid, Tp(; npart=np)) + A = Tm(nnodes, nnodes, num_partitions(pgrid)) @show num_partitions_per_color(pgrid) @test check_partitioning(pgrid, cellpartonly=true) - testassemble_parallel!(A,pgrid) + testassemble_parallel!(A, pgrid) @test sort(nonzeros(A)) ≈ nz end end -function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3) - grid=testgrid(N;dim) - nnodes=num_nodes(grid) +function test_correctness_mul(N, + Tm::Type{<:AbstractSparseMatrix}; + Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning, + allnp=[10, 15, 20], + dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) # Get the "ground truth" - A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) - testassemble!(A0,grid) - b=rand(nnodes) - A0b=A0*b + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + b = rand(nnodes) + A0b = A0 * b for np in allnp - pgrid=partition(grid,PlainMetisPartitioning(npart=np)) + pgrid = partition(grid, Tp(; npart=np)) @test check_partitioning(pgrid, cellpartonly=false) - A=Tm(nnodes,nnodes, num_partitions(pgrid)) - ExtendableSparse.Experimental.partitioning!(A,pgrid[PColorPartitions], pgrid[PartitionNodes]) - testassemble_parallel!(A,pgrid) - invp=invperm(pgrid[NodePermutation]) - diff=norm(A0b[invp] - A*b[invp], Inf) + A = Tm(nnodes, nnodes, num_partitions(pgrid)) + ExtendableSparse.Experimental.partitioning!(A, pgrid[PColorPartitions], + pgrid[PartitionNodes]) + testassemble_parallel!(A, pgrid) + invp = invperm(pgrid[NodePermutation]) + diff = norm(A0b[invp] - A * b[invp], Inf) @show diff - @test diffsort + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + nz = copy(nonzeros(A0)) |> sort # Get the base timing # During setup, set matrix entries to zero while keeping the structure - t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(nonzeros($A0).=0) - result=[] - A=Tm(nnodes,nnodes,1) + t0 = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (nonzeros($A0) .= 0) + result = [] + A = Tm(nnodes, nnodes, 1) for np in allnp # Get the parallel timing # During setup, set matrix entries to zero while keeping the structure - pgrid=partition(grid,PlainMetisPartitioning(npart=np)) + pgrid = partition(grid, Tp(; npart=np)) @show num_partitions_per_color(pgrid) - reset!(A,num_partitions(pgrid)) - testassemble_parallel!(A,pgrid) - t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(nonzeros($A).=0) - @assert sort(nonzeros(A))≈nz - push!(result,(np,round(t0/t,digits=2))) + reset!(A, num_partitions(pgrid)) + testassemble_parallel!(A, pgrid) + t = @belapsed testassemble_parallel!($A, $pgrid) seconds = 1 setup = (nonzeros($A) .= 0) + @assert sort(nonzeros(A)) ≈ nz + push!(result, (np, round(t0 / t; digits=2))) end result end -function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3) - grid=testgrid(N;dim) - nnodes=num_nodes(grid) +function speedup_build(N, + Tm::Type{<:AbstractSparseMatrix}; + Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning, + allnp=[10, 15, 20], + dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) # Get the "ground truth" - A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) - testassemble!(A0,grid) - nz=nonzeros(A0) + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + nz = nonzeros(A0) reset!(A0) - testassemble!(A0,grid) - @assert nonzeros(A0)≈(nz) - nz=sort(nz) + testassemble!(A0, grid) + @assert nonzeros(A0) ≈ (nz) + nz = sort(nz) + # Get the base timing # During setup, reset matrix to empty state. - t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0)) - - result=[] - A=Tm(nnodes,nnodes,1) + t0 = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (reset!($A0)) + + result = [] + A = Tm(nnodes, nnodes, 1) for np in allnp # Get the parallel timing # During setup, reset matrix to empty state. - pgrid=partition(grid,PlainMetisPartitioning(npart=np)) - reset!(A,num_partitions(pgrid)) + pgrid = partition(grid, Tp(; npart=np)) + reset!(A, num_partitions(pgrid)) @show num_partitions_per_color(pgrid) - t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(reset!($A,num_partitions($pgrid))) - @assert sort(nonzeros(A))≈nz - push!(result,(np,round(t0/t,digits=2))) + t = @belapsed testassemble_parallel!($A, $pgrid) seconds = 1 setup = (reset!($A, + num_partitions($pgrid))) + @assert sort(nonzeros(A)) ≈ nz + push!(result, (np, round(t0 / t; digits=2))) end result end -function speedup_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3) - grid=testgrid(N;dim) - nnodes=num_nodes(grid) +function speedup_mul(N, + Tm::Type{<:AbstractSparseMatrix}; + Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning, + allnp=[10, 15, 20], + dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) # Get the "ground truth" - A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) - testassemble!(A0,grid) - b=rand(nnodes) - t0=@belapsed $A0*$b seconds=1 - A0b=A0*b - result=[] - A=Tm(nnodes,nnodes,1) + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + b = rand(nnodes) + t0 = @belapsed $A0 * $b seconds = 1 + A0b = A0 * b + result = [] + A = Tm(nnodes, nnodes, 1) for np in allnp - pgrid=partition(grid,PlainMetisPartitioning(npart=np)) + pgrid = partition(grid, Tp(; npart=np)) @show num_partitions_per_color(pgrid) - reset!(A,num_partitions(pgrid)) - testassemble_parallel!(A,pgrid) + reset!(A, num_partitions(pgrid)) + testassemble_parallel!(A, pgrid) flush!(A) - ExtendableSparse.Experimental.partitioning!(A,pgrid[PColorPartitions], pgrid[PartitionNodes]) - t=@belapsed $A*$b seconds=1 - invp=invperm(pgrid[NodePermutation]) - @assert A0b[invp] ≈ A*b[invp] - push!(result,(np,round(t0/t,digits=2))) + ExtendableSparse.Experimental.partitioning!(A, pgrid[PColorPartitions], + pgrid[PartitionNodes]) + t = @belapsed $A * $b seconds = 1 + invp = invperm(pgrid[NodePermutation]) + @assert A0b[invp] ≈ A * b[invp] + push!(result, (np, round(t0 / t; digits=2))) end result end @@ -336,7 +349,6 @@ function speedup_csrmul(N; dim=3) t0=@belapsed $A0*$b seconds=1 A0b=A0*b - t0x=@belapsed A0x=sparse(transpose(sparse($A0))) A0x=sparse(transpose(sparse(A0))) @@ -349,7 +361,7 @@ function speedup_csrmul(N; dim=3) t2=@belapsed mymul($A, $b) seconds=1 @info t00,t0,t0x, tx,t1, t2 - + @assert A0b≈A*b t0/t1 end @@ -357,4 +369,3 @@ end =# end - From dc216a35ee6691a0990c9fb75ebcfacf192a43ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Mon, 24 Jun 2024 20:30:04 +0200 Subject: [PATCH 37/44] update compat for ExtendableGrids --- src/experimental/sparsematrixlnkdict.jl | 2 +- test/ExperimentalXParallel.jl | 4 ++-- test/Project.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl index 6ae7a6c..1d3d8e4 100644 --- a/src/experimental/sparsematrixlnkdict.jl +++ b/src/experimental/sparsematrixlnkdict.jl @@ -216,7 +216,7 @@ It assumes that `op(0,0)==0`. If `v` is zero a new entry is created nevertheless. """ function rawupdateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti} -@time k, k0 = findindex(lnk, i, j) + k, k0 = findindex(lnk, i, j) if k > 0 lnk.nzval[k] = op(lnk.nzval[k], v) else diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl index 03bf2ad..a3744eb 100644 --- a/test/ExperimentalXParallel.jl +++ b/test/ExperimentalXParallel.jl @@ -192,7 +192,7 @@ function test_correctness_build(N, pgrid = partition(grid, Tp(; npart=np)) A = Tm(nnodes, nnodes, num_partitions(pgrid)) @show num_partitions_per_color(pgrid) - @test check_partitioning(pgrid, cellpartonly=true) + @test check_partitioning(pgrid) testassemble_parallel!(A, pgrid) @test sort(nonzeros(A)) ≈ nz end @@ -212,7 +212,7 @@ function test_correctness_mul(N, A0b = A0 * b for np in allnp pgrid = partition(grid, Tp(; npart=np)) - @test check_partitioning(pgrid, cellpartonly=false) + @test check_partitioning(pgrid) A = Tm(nnodes, nnodes, num_partitions(pgrid)) ExtendableSparse.Experimental.partitioning!(A, pgrid[PColorPartitions], pgrid[PartitionNodes]) diff --git a/test/Project.toml b/test/Project.toml index 24a28c6..97f6793 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -21,5 +21,5 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] -ExtendableGrids = "1.7" +ExtendableGrids = "1.8" IterativeSolvers = "0.9" From 18bc37f3ddc15a2bda501c491f7d0a7c4ec0dfc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Mon, 24 Jun 2024 21:52:56 +0200 Subject: [PATCH 38/44] moving things away from experimental --- src/ExtendableSparse.jl | 14 +- src/experimental/Experimental.jl | 14 +- .../extendablesparsematrixparallel.jl | 2 +- .../extendablesparsematrixscalar.jl | 2 +- ...l => abstractextendablesparsematrixcsc.jl} | 70 +-- src/matrix/abstractextension.jl | 28 -- src/matrix/abstractsparsematrixextension.jl | 28 ++ src/matrix/extendable.jl | 112 ++--- .../genericmtextendablesparsematrixcsc.jl | 122 +++++ src/matrix/sparsematrixdilnkc.jl | 462 ++++++++++++++++++ test/ExperimentalXParallel.jl | 116 +---- test/femtools.jl | 110 +++++ test/runtests.jl | 2 +- 13 files changed, 837 insertions(+), 245 deletions(-) rename src/matrix/{abstractextendable.jl => abstractextendablesparsematrixcsc.jl} (77%) delete mode 100644 src/matrix/abstractextension.jl create mode 100644 src/matrix/abstractsparsematrixextension.jl create mode 100644 src/matrix/genericmtextendablesparsematrixcsc.jl create mode 100644 src/matrix/sparsematrixdilnkc.jl create mode 100644 test/femtools.jl diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 0d7bd5c..d7a0cd5 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -22,12 +22,20 @@ using DocStringExtensions import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros include("matrix/sparsematrixcsc.jl") -include("matrix/abstractextension.jl") +include("matrix/abstractsparsematrixextension.jl") include("matrix/sparsematrixlnk.jl") -include("matrix/abstractextendable.jl") +include("matrix/sparsematrixdilnkc.jl") +include("matrix/abstractextendablesparsematrixcsc.jl") include("matrix/extendable.jl") +include("matrix/genericmtextendablesparsematrixcsc.jl") -export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset! +const ExtendableSparseMatrix=ExtendableSparseMatrixCSC +const MTExtendableSparseMatrixCSC=GenericMTExtendableSparseMatrixCSC{SparseMatrixDILNKC} +MTExtendableSparseMatrixCSC(m,n,args...)=MTExtendableSparseMatrixCSC{Float64,Int64}(m,n,args...) + +export ExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC,GenericMTExtendableSparseMatrixCSC +export SparseMatrixLNK, ExtendableSparseMatrix,flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset! +export partitioning! export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl index 546810e..dbf14cd 100644 --- a/src/experimental/Experimental.jl +++ b/src/experimental/Experimental.jl @@ -5,13 +5,12 @@ using SparseArrays: AbstractSparseMatrixCSC import SparseArrays: nonzeros, getcolptr,nzrange import ExtendableSparse: flush!, reset!, rawupdateindex!, findindex using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash -using ExtendableSparse: AbstractExtendableSparseMatrix, AbstractSparseMatrixExtension +using ExtendableSparse: AbstractExtendableSparseMatrixCSC, AbstractSparseMatrixExtension using DocStringExtensions using Metis using Base.Threads using OhMyThreads: @tasks -import ExtendableSparse: factorize!, update! - +import ExtendableSparse: factorize!, update!, partitioning! include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl")) @@ -66,16 +65,15 @@ const ExtendableSparseMatrixLNK{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrix export ExtendableSparseMatrixLNK -include("extendablesparsematrixparallel.jl") -const ExtendableSparseMatrixParallelDict{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixDict{Tv,Ti},Tv,Ti} +const ExtendableSparseMatrixParallelDict{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixDict{Tv,Ti},Tv,Ti} ExtendableSparseMatrixParallelDict(m,n,p)= ExtendableSparseMatrixParallelDict{Float64,Int64}(m,n,p) -export ExtendableSparseMatrixParallelDict, partcolors! +export ExtendableSparseMatrixParallelDict -const ExtendableSparseMatrixParallelLNKX{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixLNKX{Tv,Ti},Tv,Ti} +const ExtendableSparseMatrixParallelLNKX{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixLNKX{Tv,Ti},Tv,Ti} ExtendableSparseMatrixParallelLNKX(m,n,p)= ExtendableSparseMatrixParallelLNKX{Float64,Int64}(m,n,p) export ExtendableSparseMatrixParallelLNKX -const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixLNKDict{Tv,Ti},Tv,Ti} +const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixLNKDict{Tv,Ti},Tv,Ti} ExtendableSparseMatrixParallelLNKDict(m,n,p)= ExtendableSparseMatrixParallelLNKDict{Float64,Int64}(m,n,p) export ExtendableSparseMatrixParallelLNKDict diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl index ba70a65..cd66f92 100644 --- a/src/experimental/extendablesparsematrixparallel.jl +++ b/src/experimental/extendablesparsematrixparallel.jl @@ -1,4 +1,4 @@ -mutable struct ExtendableSparseMatrixXParallel{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} +mutable struct ExtendableSparseMatrixXParallel{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti} """ Final matrix data """ diff --git a/src/experimental/extendablesparsematrixscalar.jl b/src/experimental/extendablesparsematrixscalar.jl index 887d275..36c1dfa 100644 --- a/src/experimental/extendablesparsematrixscalar.jl +++ b/src/experimental/extendablesparsematrixscalar.jl @@ -1,4 +1,4 @@ -mutable struct ExtendableSparseMatrixScalar{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} +mutable struct ExtendableSparseMatrixScalar{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti} """ Final matrix data """ diff --git a/src/matrix/abstractextendable.jl b/src/matrix/abstractextendablesparsematrixcsc.jl similarity index 77% rename from src/matrix/abstractextendable.jl rename to src/matrix/abstractextendablesparsematrixcsc.jl index dae94bb..bcf3a6a 100644 --- a/src/matrix/abstractextendable.jl +++ b/src/matrix/abstractextendablesparsematrixcsc.jl @@ -7,23 +7,23 @@ rawupdateindex! reset!: empty all internals, just keep size """ -abstract type AbstractExtendableSparseMatrix{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end +abstract type AbstractExtendableSparseMatrixCSC{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end """ $(SIGNATURES) [`flush!`](@ref) and return number of nonzeros in ext.cscmatrix. """ -SparseArrays.nnz(ext::AbstractExtendableSparseMatrix)=nnz(sparse(ext)) +SparseArrays.nnz(ext::AbstractExtendableSparseMatrixCSC)=nnz(sparse(ext)) """ $(SIGNATURES) [`flush!`](@ref) and return nonzeros in ext.cscmatrix. """ -SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrix)=nonzeros(sparse(ext)) +SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrixCSC)=nonzeros(sparse(ext)) -Base.size(ext::AbstractExtendableSparseMatrix)=size(ext.cscmatrix) +Base.size(ext::AbstractExtendableSparseMatrixCSC)=size(ext.cscmatrix) @@ -32,7 +32,7 @@ $(SIGNATURES) Return element type. """ -Base.eltype(::AbstractExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti} = Tv +Base.eltype(::AbstractExtendableSparseMatrixCSC{Tv, Ti}) where {Tv, Ti} = Tv @@ -41,12 +41,12 @@ $(SIGNATURES) Create SparseMatrixCSC from ExtendableSparseMatrix """ -SparseArrays.SparseMatrixCSC(A::AbstractExtendableSparseMatrix)=sparse(A) +SparseArrays.SparseMatrixCSC(A::AbstractExtendableSparseMatrixCSC)=sparse(A) -function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrix) +function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrixCSC) A=sparse(ext) xnnz = nnz(A) m, n = size(A) @@ -77,7 +77,7 @@ $(SIGNATURES) [`flush!`](@ref) and return rowvals in ext.cscmatrix. """ -SparseArrays.rowvals(ext::AbstractExtendableSparseMatrix)=rowvals(sparse(ext)) +SparseArrays.rowvals(ext::AbstractExtendableSparseMatrixCSC)=rowvals(sparse(ext)) """ @@ -85,7 +85,7 @@ $(SIGNATURES) [`flush!`](@ref) and return colptr of in ext.cscmatrix. """ -SparseArrays.getcolptr(ext::AbstractExtendableSparseMatrix)=getcolptr(sparse(ext)) +SparseArrays.getcolptr(ext::AbstractExtendableSparseMatrixCSC)=getcolptr(sparse(ext)) """ @@ -93,11 +93,11 @@ $(SIGNATURES) [`flush!`](@ref) and return findnz(ext.cscmatrix). """ -SparseArrays.findnz(ext::AbstractExtendableSparseMatrix)=findnz(sparse(ext)) +SparseArrays.findnz(ext::AbstractExtendableSparseMatrixCSC)=findnz(sparse(ext)) @static if VERSION >= v"1.7" - SparseArrays._checkbuffers(ext::AbstractExtendableSparseMatrix)= SparseArrays._checkbuffers(sparse(ext)) + SparseArrays._checkbuffers(ext::AbstractExtendableSparseMatrixCSC)= SparseArrays._checkbuffers(sparse(ext)) end """ @@ -107,7 +107,7 @@ end are allowed in the Julia sysimage and the floating point type of the matrix is Float64 or Complex64. In that case, Julias standard `\` is called, which is realized via UMFPACK. """ -function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrix{Tv, Ti}, +function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrixCSC{Tv, Ti}, b::AbstractVector) where {Tv, Ti} SparspakLU(sparse(ext)) \ b end @@ -119,7 +119,7 @@ $(SIGNATURES) [`\\`](@ref) for Symmetric{ExtendableSparse} """ function LinearAlgebra.:\(symm_ext::Symmetric{Tm, T}, - b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrix{Tm,Ti}} + b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrixCSC{Tm,Ti}} Symmetric(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ... end @@ -129,19 +129,19 @@ $(SIGNATURES) [`\\`](@ref) for Hermitian{ExtendableSparse} """ function LinearAlgebra.:\(symm_ext::Hermitian{Tm, T}, - b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrix{Tm,Ti}} + b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrixCSC{Tm,Ti}} Hermitian(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ... end if USE_GPL_LIBS for (Tv) in (:Float64, :ComplexF64) - @eval begin function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrix{$Tv, Ti}, + @eval begin function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrixCSC{$Tv, Ti}, B::AbstractVector) where {Ti} sparse(ext) \ B end end @eval begin function LinearAlgebra.:\(symm_ext::Symmetric{$Tv, - AbstractExtendableSparseMatrix{ + AbstractExtendableSparseMatrixCSC{ $Tv, Ti }}, @@ -151,7 +151,7 @@ if USE_GPL_LIBS end end @eval begin function LinearAlgebra.:\(symm_ext::Hermitian{$Tv, - AbstractExtendableSparseMatrix{ + AbstractExtendableSparseMatrixCSC{ $Tv, Ti }}, @@ -167,7 +167,7 @@ $(SIGNATURES) [`flush!`](@ref) and ldiv with ext.cscmatrix """ -function LinearAlgebra.ldiv!(r, ext::AbstractExtendableSparseMatrix, x) +function LinearAlgebra.ldiv!(r, ext::AbstractExtendableSparseMatrixCSC, x) LinearAlgebra.ldiv!(r, sparse(ext), x) end @@ -176,7 +176,7 @@ $(SIGNATURES) [`flush!`](@ref) and multiply with ext.cscmatrix """ -function LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrix, x) +function LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrixCSC, x) LinearAlgebra.mul!(r, sparse(ext), x) end @@ -185,7 +185,7 @@ $(SIGNATURES) [`flush!`](@ref) and calculate norm from cscmatrix """ -function LinearAlgebra.norm(A::AbstractExtendableSparseMatrix, p::Real = 2) +function LinearAlgebra.norm(A::AbstractExtendableSparseMatrixCSC, p::Real = 2) return LinearAlgebra.norm(sparse(A), p) end @@ -194,7 +194,7 @@ $(SIGNATURES) [`flush!`](@ref) and calculate opnorm from cscmatrix """ -function LinearAlgebra.opnorm(A::AbstractExtendableSparseMatrix, p::Real = 2) +function LinearAlgebra.opnorm(A::AbstractExtendableSparseMatrixCSC, p::Real = 2) return LinearAlgebra.opnorm(sparse(A), p) end @@ -203,7 +203,7 @@ $(SIGNATURES) [`flush!`](@ref) and calculate cond from cscmatrix """ -function LinearAlgebra.cond(A::AbstractExtendableSparseMatrix, p::Real = 2) +function LinearAlgebra.cond(A::AbstractExtendableSparseMatrixCSC, p::Real = 2) return LinearAlgebra.cond(sparse(A), p) end @@ -212,7 +212,7 @@ $(SIGNATURES) [`flush!`](@ref) and check for symmetry of cscmatrix """ -function LinearAlgebra.issymmetric(A::AbstractExtendableSparseMatrix) +function LinearAlgebra.issymmetric(A::AbstractExtendableSparseMatrixCSC) return LinearAlgebra.issymmetric(sparse(A)) end @@ -221,29 +221,29 @@ end -function Base.:+(A::T, B::T) where T<:AbstractExtendableSparseMatrix +function Base.:+(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC T(sparse(A) + sparse(B)) end -function Base.:-(A::T, B::T) where T<:AbstractExtendableSparseMatrix +function Base.:-(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC T(sparse(A) - sparse(B)) end -function Base.:*(A::T, B::T) where T<:AbstractExtendableSparseMatrix +function Base.:*(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC T(sparse(A) * sparse(B)) end """ $(SIGNATURES) """ -function Base.:*(d::Diagonal, ext::T)where T<:AbstractExtendableSparseMatrix +function Base.:*(d::Diagonal, ext::T)where T<:AbstractExtendableSparseMatrixCSC return T(d * sparse(ext)) end """ $(SIGNATURES) """ -function Base.:*(ext::T, d::Diagonal) where T<:AbstractExtendableSparseMatrix +function Base.:*(ext::T, d::Diagonal) where T<:AbstractExtendableSparseMatrixCSC return T(sparse(ext) * d) end @@ -253,7 +253,7 @@ $(SIGNATURES) Add SparseMatrixCSC matrix and [`ExtendableSparseMatrix`](@ref) ext. """ -function Base.:+(ext::AbstractExtendableSparseMatrix, csc::SparseMatrixCSC) +function Base.:+(ext::AbstractExtendableSparseMatrixCSC, csc::SparseMatrixCSC) return sparse(ext) + csc end @@ -263,7 +263,7 @@ $(SIGNATURES) Subtract SparseMatrixCSC matrix from [`ExtendableSparseMatrix`](@ref) ext. """ -function Base.:-(ext::AbstractExtendableSparseMatrix, csc::SparseMatrixCSC) +function Base.:-(ext::AbstractExtendableSparseMatrixCSC, csc::SparseMatrixCSC) return sparse(ext) - csc end @@ -272,28 +272,28 @@ $(SIGNATURES) Subtract [`ExtendableSparseMatrix`](@ref) ext from SparseMatrixCSC. """ -function Base.:-(csc::SparseMatrixCSC, ext::AbstractExtendableSparseMatrix) +function Base.:-(csc::SparseMatrixCSC, ext::AbstractExtendableSparseMatrixCSC) return csc - sparse(ext) end """ $(SIGNATURES) """ -function SparseArrays.dropzeros!(ext::AbstractExtendableSparseMatrix) +function SparseArrays.dropzeros!(ext::AbstractExtendableSparseMatrixCSC) dropzeros!(sparse(ext)) end -function mark_dirichlet(A::AbstractExtendableSparseMatrix;penalty=1.0e20) +function mark_dirichlet(A::AbstractExtendableSparseMatrixCSC;penalty=1.0e20) mark_dirichlet(sparse(A);penalty) end -function eliminate_dirichlet(A::T,dirichlet) where T<:AbstractExtendableSparseMatrix +function eliminate_dirichlet(A::T,dirichlet) where T<:AbstractExtendableSparseMatrixCSC T(eliminate_dirichlet(sparse(A),dirichlet)) end -function eliminate_dirichlet!(A::AbstractExtendableSparseMatrix,dirichlet) +function eliminate_dirichlet!(A::AbstractExtendableSparseMatrixCSC,dirichlet) eliminate_dirichlet!(sparse(A),dirichlet) A end diff --git a/src/matrix/abstractextension.jl b/src/matrix/abstractextension.jl deleted file mode 100644 index 378e54a..0000000 --- a/src/matrix/abstractextension.jl +++ /dev/null @@ -1,28 +0,0 @@ -""" - $(TYPEDEF) - -Abstract type for sparse matrix extension. - -Subtypes T_ext must implement: - - -Constructor T_ext(m,n) -SparseArrays.nnz(ext::T_ext) -Base.size(ext::T_ext) - -Base.+(ext::T_ext, csc) - - Add extension matrix and csc matrix, return csc matrix - -sum!(nodeparts::Vector{Ti},extmatrices::Vector{T_ext}, cscmatrix) - - Add csc matrix and extension matrices (one per partition) and return csc matrix - - Fill nodeparts (already initialized at input) with information which partition was used to assemble node. - i.e. if entry [i,j] comes from extmatrixes[p], set nodeparts[j]=p . - - This information may be used by matrix-vector multiplication and preconditioners - -rawupdateindex!(ext::Text, op, v, i, j) where {Tv, Ti} - - Set ext[i,j]+=v, possibly insert entry into matrix. - - -""" -abstract type AbstractSparseMatrixExtension{Tv, Ti} <: AbstractSparseMatrix{Tv,Ti} end diff --git a/src/matrix/abstractsparsematrixextension.jl b/src/matrix/abstractsparsematrixextension.jl new file mode 100644 index 0000000..d8070fc --- /dev/null +++ b/src/matrix/abstractsparsematrixextension.jl @@ -0,0 +1,28 @@ +""" + $(TYPEDEF) + +Abstract type for sparse matrix extension. + +Subtypes T_ext must implement: + +Constructor T_ext(m,n) +SparseArrays.nnz(ext::T_ext) +Base.size(ext::T_ext) + + +Base.sum(extmatrices::Vector{T_ext}, csx) + - Add csx matrix and extension matrices (one per partition) and return csx matrix + +rawupdateindex!(ext::Text, op, v, i, j) where {Tv, Ti} + - Set ext[i,j]+=v, possibly insert entry into matrix. + + +Optional: + +Base.+(ext::T_ext, csx) + - Add extension matrix and csc/csr matrix, return csx matrix + +""" +abstract type AbstractSparseMatrixExtension{Tv, Ti} <: AbstractSparseMatrix{Tv,Ti} end + +Base.:+(ext::AbstractSparseMatrixExtension, csx) = sum([ext],csx) diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl index 2d8a908..a9debf7 100644 --- a/src/matrix/extendable.jl +++ b/src/matrix/extendable.jl @@ -7,7 +7,7 @@ either in cscmatrix, or in lnkmatrix, never in both. $(TYPEDFIELDS) """ -mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti} +mutable struct ExtendableSparseMatrixCSC{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti} """ Final matrix data """ @@ -27,92 +27,92 @@ end """ ``` -ExtendableSparseMatrix(Tv,Ti,m,n) -ExtendableSparseMatrix(Tv,m,n) -ExtendableSparseMatrix(m,n) +ExtendableSparseMatrixCSC(Tv,Ti,m,n) +ExtendableSparseMatrixCSC(Tv,m,n) +ExtendableSparseMatrixCSC(m,n) ``` -Create empty ExtendableSparseMatrix. This is equivalent to `spzeros(m,n)` for +Create empty ExtendableSparseMatrixCSC. This is equivalent to `spzeros(m,n)` for `SparseMartrixCSC`. """ -function ExtendableSparseMatrix{Tv, Ti}(m, n) where {Tv, Ti <: Integer} - ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0) +function ExtendableSparseMatrixCSC{Tv, Ti}(m, n) where {Tv, Ti <: Integer} + ExtendableSparseMatrixCSC{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0) end -function ExtendableSparseMatrix(valuetype::Type{Tv}, +function ExtendableSparseMatrixCSC(valuetype::Type{Tv}, indextype::Type{Ti}, m, n) where {Tv, Ti <: Integer} - ExtendableSparseMatrix{Tv, Ti}(m, n) + ExtendableSparseMatrixCSC{Tv, Ti}(m, n) end -function ExtendableSparseMatrix(valuetype::Type{Tv}, m, n) where {Tv} - ExtendableSparseMatrix{Tv, Int}(m, n) +function ExtendableSparseMatrixCSC(valuetype::Type{Tv}, m, n) where {Tv} + ExtendableSparseMatrixCSC{Tv, Int}(m, n) end -ExtendableSparseMatrix(m, n) = ExtendableSparseMatrix{Float64, Int}(m, n) +ExtendableSparseMatrixCSC(m, n) = ExtendableSparseMatrixCSC{Float64, Int}(m, n) """ $(SIGNATURES) -Create ExtendableSparseMatrix from SparseMatrixCSC +Create ExtendableSparseMatrixCSC from SparseMatrixCSC """ -function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} - ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc)) +function ExtendableSparseMatrixCSC(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} + ExtendableSparseMatrixCSC{Tv, Ti}(csc, nothing, phash(csc)) end -function ExtendableSparseMatrix{Tv,Ti}(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} - ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc)) +function ExtendableSparseMatrixCSC{Tv,Ti}(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} + ExtendableSparseMatrixCSC{Tv, Ti}(csc, nothing, phash(csc)) end """ $(SIGNATURES) - Create ExtendableSparseMatrix from Diagonal + Create ExtendableSparseMatrixCSC from Diagonal """ -ExtendableSparseMatrix(D::Diagonal) = ExtendableSparseMatrix(sparse(D)) +ExtendableSparseMatrixCSC(D::Diagonal) = ExtendableSparseMatrixCSC(sparse(D)) """ $(SIGNATURES) - Create ExtendableSparseMatrix from AbstractMatrix, dropping all zero entries. + Create ExtendableSparseMatrixCSC from AbstractMatrix, dropping all zero entries. This is the equivalent to `sparse(A)`. """ -ExtendableSparseMatrix(A::AbstractMatrix) = ExtendableSparseMatrix(sparse(A)) +ExtendableSparseMatrixCSC(A::AbstractMatrix) = ExtendableSparseMatrixCSC(sparse(A)) """ - ExtendableSparseMatrix(I,J,V) - ExtendableSparseMatrix(I,J,V,m,n) - ExtendableSparseMatrix(I,J,V,combine::Function) - ExtendableSparseMatrix(I,J,V,m,n,combine::Function) + ExtendableSparseMatrixCSC(I,J,V) + ExtendableSparseMatrixCSC(I,J,V,m,n) + ExtendableSparseMatrixCSC(I,J,V,combine::Function) + ExtendableSparseMatrixCSC(I,J,V,m,n,combine::Function) -Create ExtendableSparseMatrix from triplet (COO) data. +Create ExtendableSparseMatrixCSC from triplet (COO) data. """ -ExtendableSparseMatrix(I, J, V::AbstractVector) = ExtendableSparseMatrix(sparse(I, J, V)) +ExtendableSparseMatrixCSC(I, J, V::AbstractVector) = ExtendableSparseMatrixCSC(sparse(I, J, V)) -function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n) - ExtendableSparseMatrix(sparse(I, J, V, m, n)) +function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, m, n) + ExtendableSparseMatrixCSC(sparse(I, J, V, m, n)) end -function ExtendableSparseMatrix(I, J, V::AbstractVector, combine::Function) - ExtendableSparseMatrix(sparse(I, J, V, combine)) +function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, combine::Function) + ExtendableSparseMatrixCSC(sparse(I, J, V, combine)) end -function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n, combine::Function) - ExtendableSparseMatrix(sparse(I, J, V, m, n, combine)) +function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, m, n, combine::Function) + ExtendableSparseMatrixCSC(sparse(I, J, V, m, n, combine)) end # THese are probably too much... -# function Base.transpose(A::ExtendableSparseMatrix) +# function Base.transpose(A::ExtendableSparseMatrixCSC) # flush!(A) -# ExtendableSparseMatrix(Base.transpose(sparse(A))) +# ExtendableSparseMatrixCSC(Base.transpose(sparse(A))) # end -# function Base.adjoint(A::ExtendableSparseMatrix) +# function Base.adjoint(A::ExtendableSparseMatrixCSC) # flush!(A) -# ExtendableSparseMatrix(Base.adjoint(sparse(A))) +# ExtendableSparseMatrixCSC(Base.adjoint(sparse(A))) # end -# function SparseArrays.sparse(text::LinearAlgebra.Transpose{Tv,ExtendableSparseMatrix{Tv,Ti}}) where {Tv,Ti} +# function SparseArrays.sparse(text::LinearAlgebra.Transpose{Tv,ExtendableSparseMatrixCSC{Tv,Ti}}) where {Tv,Ti} # transpose(sparse(parent(text))) # end @@ -123,12 +123,12 @@ $(SIGNATURES) Create similar but emtpy extendableSparseMatrix """ -function Base.similar(m::ExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti} - ExtendableSparseMatrix{Tv, Ti}(size(m)...) +function Base.similar(m::ExtendableSparseMatrixCSC{Tv, Ti}) where {Tv, Ti} + ExtendableSparseMatrixCSC{Tv, Ti}(size(m)...) end -function Base.similar(m::ExtendableSparseMatrix{Tv, Ti}, ::Type{T}) where {Tv, Ti, T} - ExtendableSparseMatrix{T, Ti}(size(m)...) +function Base.similar(m::ExtendableSparseMatrixCSC{Tv, Ti}, ::Type{T}) where {Tv, Ti, T} + ExtendableSparseMatrixCSC{T, Ti}(size(m)...) end """ @@ -140,7 +140,7 @@ search during acces: ```@example using ExtendableSparse # hide -A=ExtendableSparseMatrix(3,3) +A=ExtendableSparseMatrixCSC(3,3) A[1,2]+=0.1 A ``` @@ -148,7 +148,7 @@ A ```@example using ExtendableSparse # hide -A=ExtendableSparseMatrix(3,3) +A=ExtendableSparseMatrixCSC(3,3) updateindex!(A,+,0.1,1,2) A ``` @@ -156,7 +156,7 @@ A If `v` is zero, no new entry is created. """ -function updateindex!(ext::ExtendableSparseMatrix{Tv, Ti}, +function updateindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti}, op, v, i, @@ -178,7 +178,7 @@ $(SIGNATURES) Like [`updateindex!`](@ref) but without checking if v is zero. """ -function rawupdateindex!(ext::ExtendableSparseMatrix{Tv, Ti}, +function rawupdateindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti}, op, v, i, @@ -201,7 +201,7 @@ $(SIGNATURES) Find index in CSC matrix and set value if it exists. Otherwise, set index in extension if `v` is nonzero. """ -function Base.setindex!(ext::ExtendableSparseMatrix{Tv, Ti}, +function Base.setindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti}, v::Union{Number,AbstractVecOrMat}, i::Integer, j::Integer) where {Tv, Ti} @@ -222,7 +222,7 @@ $(SIGNATURES) Find index in CSC matrix and return value, if it exists. Otherwise, return value from extension. """ -function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti}, +function Base.getindex(ext::ExtendableSparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer) where {Tv, Ti <: Integer} k = findindex(ext.cscmatrix, i, j) @@ -244,7 +244,7 @@ $(SIGNATURES) If there are new entries in extension, create new CSC matrix by adding the cscmatrix and linked list matrix and reset the linked list based extension. """ -function flush!(ext::ExtendableSparseMatrix) +function flush!(ext::ExtendableSparseMatrixCSC) if ext.lnkmatrix != nothing && nnz(ext.lnkmatrix) > 0 ext.cscmatrix = ext.lnkmatrix + ext.cscmatrix ext.lnkmatrix = nothing @@ -254,7 +254,7 @@ function flush!(ext::ExtendableSparseMatrix) end -function SparseArrays.sparse(ext::ExtendableSparseMatrix) +function SparseArrays.sparse(ext::ExtendableSparseMatrixCSC) flush!(ext) ext.cscmatrix end @@ -265,7 +265,7 @@ $(SIGNATURES) Reset ExtenableSparseMatrix into state similar to that after creation. """ -function reset!(A::ExtendableSparseMatrix) +function reset!(A::ExtendableSparseMatrixCSC) A.cscmatrix=spzeros(size(A)...) A.lnkmatrix=nothing end @@ -275,11 +275,11 @@ end """ $(SIGNATURES) """ -function Base.copy(S::ExtendableSparseMatrix) +function Base.copy(S::ExtendableSparseMatrixCSC) if isnothing(S.lnkmatrix) - ExtendableSparseMatrix(copy(S.cscmatrix), nothing,S.phash) + ExtendableSparseMatrixCSC(copy(S.cscmatrix), nothing,S.phash) else - ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash) + ExtendableSparseMatrixCSC(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash) end end @@ -288,7 +288,7 @@ end Create a pointblock matrix. """ -function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti} +function pointblock(A0::ExtendableSparseMatrixCSC{Tv,Ti},blocksize) where {Tv,Ti} A=SparseMatrixCSC(A0) colptr=A.colptr rowval=A.rowval @@ -298,7 +298,7 @@ function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti} nblock=n÷blocksize b=SMatrix{blocksize,blocksize}(block) Tb=typeof(b) - Ab=ExtendableSparseMatrix{Tb,Ti}(nblock,nblock) + Ab=ExtendableSparseMatrixCSC{Tb,Ti}(nblock,nblock) for i=1:n diff --git a/src/matrix/genericmtextendablesparsematrixcsc.jl b/src/matrix/genericmtextendablesparsematrixcsc.jl new file mode 100644 index 0000000..5e52fcd --- /dev/null +++ b/src/matrix/genericmtextendablesparsematrixcsc.jl @@ -0,0 +1,122 @@ +mutable struct GenericMTExtendableSparseMatrixCSC{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti} + """ + Final matrix data + """ + cscmatrix::SparseMatrixCSC{Tv, Ti} + + """ + Vector of dictionaries for new entries + """ + xmatrices::Vector{Tm} + + colparts::Vector{Ti} + partnodes::Vector{Ti} +end + +function GenericMTExtendableSparseMatrixCSC{Tm, Tv, Ti}(n,m,p::Integer=1) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti} + GenericMTExtendableSparseMatrixCSC(spzeros(Tv, Ti, m, n), + [Tm(m,n) for i=1:p], + Ti[1,2], + Ti[1,n+1], + ) +end + +function partitioning!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti}, colparts, partnodes) where {Tm, Tv, Ti} + ext.partnodes=partnodes + ext.colparts=colparts + ext +end + + +function reset!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti},p::Integer) where {Tm,Tv,Ti} + m,n=size(ext.cscmatrix) + ext.cscmatrix=spzeros(Tv, Ti, m, n) + ext.xmatrices=[Tm(m,n) for i=1:p] + ext.colparts=Ti[1,2] + ext.partnodes=Ti[1,n+1] + ext +end + +function reset!(ext::GenericMTExtendableSparseMatrixCSC) + reset!(ext,length(ext.xmatrices)) +end + + +function flush!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti}) where{Tm,Tv,Ti} + ext.cscmatrix=Base.sum(ext.xmatrices, ext.cscmatrix) + np=length(ext.xmatrices) + (m,n)=size(ext.cscmatrix) + ext.xmatrices=[Tm(m,n) for i=1:np] + ext +end + + +function SparseArrays.sparse(ext::GenericMTExtendableSparseMatrixCSC) + flush!(ext) + ext.cscmatrix +end + +function Base.setindex!(ext::GenericMTExtendableSparseMatrixCSC, + v::Union{Number,AbstractVecOrMat}, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = v + else + error("use rawupdateindex! for new entries into GenericMTExtendableSparseMatrixCSC") + end +end + +function Base.getindex(ext::GenericMTExtendableSparseMatrixCSC, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + return ext.cscmatrix.nzval[k] + elseif sum(nnz,ext.xmatrices) == 0 + return zero(eltype(ext.cscmatrix)) + else + error("flush! GenericMTExtendableSparseMatrixCSC before using getindex") + end +end + +function rawupdateindex!(ext::GenericMTExtendableSparseMatrixCSC, + op, + v, + i, + j, + tid=1) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + rawupdateindex!(ext.xmatrices[tid],op,v,i,j) + end +end + +# Needed in 1.9 +function Base.:*(ext::GenericMTExtendableSparseMatrixCSC{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA + mul!(similar(x),ext,x) +end + +function LinearAlgebra.mul!(r, ext::GenericMTExtendableSparseMatrixCSC, x) + flush!(ext) + A=ext.cscmatrix + colparts=ext.colparts + partnodes=ext.partnodes + rows = SparseArrays.rowvals(A) + vals = nonzeros(A) + r.=zero(eltype(ext)) + m,n=size(A) + for icol=1:length(colparts)-1 + @tasks for ip in colparts[icol]:colparts[icol+1]-1 + @inbounds for inode in partnodes[ip]:partnodes[ip+1]-1 + @inbounds for i in nzrange(A,inode) + r[rows[i]]+=vals[i]*x[inode] + end + end + end + end + r +end diff --git a/src/matrix/sparsematrixdilnkc.jl b/src/matrix/sparsematrixdilnkc.jl new file mode 100644 index 0000000..d467c3d --- /dev/null +++ b/src/matrix/sparsematrixdilnkc.jl @@ -0,0 +1,462 @@ +""" + $(TYPEDEF) + + Modification of SparseMatrixLNK where the pointer to first index of +column j is stored in a dictionary. + """ +mutable struct SparseMatrixDILNKC{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti} + """ + Number of rows + """ + m::Ti + + """ + Number of columns + """ + n::Ti + + """ + Number of nonzeros + """ + nnz::Ti + + """ + Length of arrays + """ + nentries::Ti + + """ + Linked list of column entries. Initial length is n, + it grows with each new entry. + + colptr[index] contains the next + index in the list or zero, in the later case terminating the list which + starts at index 1<=j<=n for each column j. + """ + colptr::Vector{Ti} + + """ + Dictionary to store start indices of columns + """ + colstart::Dict{Ti,Ti} + + """ + Row numbers. For each index it contains the zero (initial state) + or the row numbers corresponding to the column entry list in colptr. + """ + rowval::Vector{Ti} + + """ + Nonzero entry values correspondin to each pair + (colptr[index],rowval[index]) + """ + nzval::Vector{Tv} +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +function SparseMatrixDILNKC{Tv, Ti}(m, n) where {Tv, Ti <: Integer} + SparseMatrixDILNKC{Tv, Ti}(m, n, 0, 0, zeros(Ti,10), Dict{Ti,Ti}(), zeros(Ti,10), zeros(Ti,10)) +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +function SparseMatrixDILNKC(valuetype::Type{Tv}, indextype::Type{Ti}, m, + n) where {Tv, Ti <: Integer} + SparseMatrixDILNKC{Tv, Ti}(m, n) +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +SparseMatrixDILNKC(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixDILNKC(Tv, Int, m, n) + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +SparseMatrixDILNKC(m, n) = SparseMatrixDILNKC(Float64, m, n) + +""" +$(SIGNATURES) + +Constructor from SparseMatrixCSC. + +""" +function SparseMatrixDILNKC(csc::SparseArrays.SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: + Integer} + lnk = SparseMatrixDILNKC{Tv, Ti}(csc.m, csc.n) + for j = 1:(csc.n) + for k = csc.colptr[j]:(csc.colptr[j + 1] - 1) + lnk[csc.rowval[k], j] = csc.nzval[k] + end + end + lnk +end + +function findindex(lnk::SparseMatrixDILNKC, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + k = get(lnk.colstart, j, 0) + if k==0 + return 0,0 + end + k0 = k + while k > 0 + if lnk.rowval[k] == i + return k, 0 + end + k0 = k + k = lnk.colptr[k] + end + return 0, k0 +end + +""" +$(SIGNATURES) + +Return value stored for entry or zero if not found +""" +function Base.getindex(lnk::SparseMatrixDILNKC{Tv, Ti}, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k == 0 + return zero(Tv) + else + return lnk.nzval[k] + end +end + +function addentry!(lnk::SparseMatrixDILNKC, i, j, k, k0) + # increase number of entries + lnk.nentries += 1 + if length(lnk.nzval) < lnk.nentries + newsize = Int(ceil(5.0 * lnk.nentries / 4.0)) + resize!(lnk.nzval, newsize) + resize!(lnk.rowval, newsize) + resize!(lnk.colptr, newsize) + end + + if k0==0 + lnk.colstart[j]=lnk.nentries + end + + # Append entry if not found + lnk.rowval[lnk.nentries] = i + + # Shift the end of the list + lnk.colptr[lnk.nentries] = 0 + + if k0>0 + lnk.colptr[k0] = lnk.nentries + end + + # Update number of nonzero entries + lnk.nnz += 1 + return lnk.nentries +end + +""" +$(SIGNATURES) + +Update value of existing entry, otherwise extend matrix if v is nonzero. +""" +function Base.setindex!(lnk::SparseMatrixDILNKC, v, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = v + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = v + end + return lnk +end + +""" +$(SIGNATURES) + +Update element of the matrix with operation `op`. +It assumes that `op(0,0)==0`. If `v` is zero, no new +entry is created. +""" +function updateindex!(lnk::SparseMatrixDILNKC{Tv, Ti}, op, v, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +""" +$(SIGNATURES) + +Update element of the matrix with operation `op`. +It assumes that `op(0,0)==0`. If `v` is zero a new entry +is created nevertheless. +""" +function rawupdateindex!(lnk::SparseMatrixDILNKC{Tv, Ti}, op, v, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + else + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +""" +$(SIGNATURES) + +Return tuple containing size of the matrix. +""" +Base.size(lnk::SparseMatrixDILNKC) = (lnk.m, lnk.n) + +""" +$(SIGNATURES) + +Return number of nonzero entries. +""" +SparseArrays.nnz(lnk::SparseMatrixDILNKC) = lnk.nnz + + +""" + $(SIGNATURES) +Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V. +""" +function add_via_COO(lnk::SparseMatrixDILNKC{Tv, Ti}, + csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} + (;colptr,nzval,rowval,m,n)=csc + l=nnz(lnk)+nnz(csc) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + if nnz(csc)>0 + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + end + for (j,k) in lnk.colstart + while k>0 + I[i]=lnk.rowval[k] + J[i]=j + V[i]=lnk.nzval[k] + k=lnk.colptr[k] + i=i+1 + end + end + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse(I,J,V,m,n,+) + end +end + + +""" + $(SIGNATURES) +Add lnk and csc without creation of intermediate data. +(to be fixed) +""" +function add_directly(lnk::SparseMatrixDILNKC{Tv, Ti}, + csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} + @assert(csc.m==lnk.m) + @assert(csc.n==lnk.n) + + # overallocate arrays in order to avoid + # presumably slower push! + xnnz = nnz(csc) + nnz(lnk) + colptr = Vector{Ti}(undef, csc.n + 1) + rowval = Vector{Ti}(undef, xnnz) + nzval = Vector{Tv}(undef, xnnz) + + # Detect the maximum column length of lnk + lnk_maxcol = 0 + for (j,k) in lnk.colstart + lcol = zero(Ti) + while k > 0 + lcol += 1 + k = lnk.colptr[k] + end + lnk_maxcol = max(lcol, lnk_maxcol) + end + + # pre-allocate column data + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol] + + inz = 1 # counts the nonzero entries in the new matrix + + in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1]) + + in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col) + + # loop over all columns + for j = 1:(csc.n) + # Copy extension entries into col and sort them + k = get(lnk.colstart, j, 0) + l_lnk_col = 0 + while k > 0 + if lnk.rowval[k] > 0 + l_lnk_col += 1 + col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k]) + end + k = lnk.colptr[k] + end + sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward) + + # jointly sort lnk and csc entries into new matrix data + # this could be replaced in a more transparent manner by joint sorting: + # make a joint array for csc and lnk col, sort them. + # Will this be faster? + + colptr[j] = inz + jlnk = one(Ti) # counts the entries in col + jcsc = csc.colptr[j] # counts entries in csc + + while true + if in_csc_col(jcsc, j) && + (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval || + !in_lnk_col(jlnk, l_lnk_col)) + # Insert entries from csc into new structure + rowval[inz] = csc.rowval[jcsc] + nzval[inz] = csc.nzval[jcsc] + jcsc += 1 + inz += 1 + elseif in_csc_col(jcsc, j) && + (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval) + # Add up entries from csc and lnk + rowval[inz] = csc.rowval[jcsc] + nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval + jcsc += 1 + inz += 1 + jlnk += 1 + elseif in_lnk_col(jlnk, l_lnk_col) + # Insert entries from lnk res. col into new structure + rowval[inz] = col[jlnk].rowval + nzval[inz] = col[jlnk].nzval + jlnk += 1 + inz += 1 + else + break + end + end + end + colptr[csc.n + 1] = inz + resize!(rowval, inz - 1) + resize!(nzval, inz - 1) + SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval) +end + + + +""" + $(SIGNATURES) + +Add SparseMatrixCSC matrix and [`SparseMatrixDILNKC`](@ref) lnk, returning a SparseMatrixCSC +""" +Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = add_directly(lnk, csc) + +function Base.sum(lnkdictmatrices::Vector{SparseMatrixDILNKC{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} + lnew=sum(nnz,lnkdictmatrices) + if lnew>0 + (;colptr,nzval,rowval,m,n)=cscmatrix + l=lnew+nnz(cscmatrix) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + + ip=1 + for lnk in lnkdictmatrices + for (j,k) in lnk.colstart + while k>0 + I[i]=lnk.rowval[k] + J[i]=j + V[i]=lnk.nzval[k] + k=lnk.colptr[k] + i=i+1 + end + end + ip=ip+1 + end + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse(I,J,V,m,n,+) + end + end + return cscmatrix +end + +function reset!(m::SparseMatrixDILNKC{Tv,Ti}) where {Tv,Ti} + m.nnz=0 + m.nentries=0 + m.colptr=zeros(Ti,10) + m.colstart::Dict{Ti,Ti} + m.rowval=zeros(Ti,10) + m.nzval=zeros(Ti,10) + m +end + + +""" +$(SIGNATURES) + +Constructor from SparseMatrixDILNKC. + +""" +function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixDILNKC)::SparseMatrixCSC + csc = spzeros(lnk.m, lnk.n) + lnk + csc +end + +function SparseArrays.sparse(lnk::SparseMatrixDILNKC) + lnk + spzeros(lnk.m, lnk.n) +end + +function Base.copy(S::SparseMatrixDILNKC) + SparseMatrixDILNKC(size(S, 1), + size(S, 2), + S.nnz, + S.nentries, + copy(S.colptr), + copy(S.colstart), + copy(S.rowvals), + copy(S.nzval)) +end diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl index a3744eb..128d93b 100644 --- a/test/ExperimentalXParallel.jl +++ b/test/ExperimentalXParallel.jl @@ -11,116 +11,8 @@ using ExtendableSparse, ExtendableGrids, Metis using LinearAlgebra using BenchmarkTools using Test -using OhMyThreads: @tasks -using RecursiveFactorization -function testgrid(N; dim=3) - X = range(0, 1; length=N^(1.0 / dim) |> ceil |> Int) - simplexgrid((X for i = 1:dim)...) -end - -function coordmatrix!(C, coord, cellnodes, k) - spacedim=size(coord,1) - celldim=size(cellnodes,1) - @inbounds for jj = 1:celldim - C[1, jj] = 1 - @inbounds for ii = 1:spacedim - C[ii + 1, jj] = coord[ii, cellnodes[jj, k]] - end - end -end - -function gradient!(G, C, factdim, I, ipiv) - clu = RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false)) - ldiv!(G, clu, I) - abs(det(clu)) / factdim -end - -function scalpro(G, dim, jl, il) - s = 0.0 - @inbounds @simd for k = 1:dim - s += G[jl, k + 1] * G[il, k + 1] - end - return s -end - -function stiffness!(S, dim, G) - @inbounds for il = 1:(dim + 1) - S[il, il] = scalpro(G, dim, il, il) - @inbounds for jl = (il + 1):(dim + 1) - S[il, jl] = scalpro(G, dim, jl, il) - S[jl, il] = S[il, jl] - end - end - return S -end - -function testassemble!(A_h, grid) - coord = grid[Coordinates] - cellnodes = grid[CellNodes] - ncells = num_cells(grid) - dim = size(coord, 1) - lnodes = dim + 1 - factdim::Float64 = factorial(dim) - S = zeros(lnodes, lnodes) # local stiffness matrix - C = zeros(lnodes, lnodes) # local coordinate matrix - G = zeros(lnodes, lnodes) # shape function gradients - ipiv = zeros(Int, lnodes) - I = Matrix(Diagonal(ones(lnodes))) - ncells = size(cellnodes, 2) - for icell = 1:ncells - coordmatrix!(C, coord, cellnodes, icell) - vol = gradient!(G, C, factdim, I, ipiv) - stiffness!(S, dim, G) - for il = 1:lnodes - i = cellnodes[il, icell] - rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i) - for jl = 1:lnodes - j = cellnodes[jl, icell] - rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j) - end - end - end - flush!(A_h) -end - -function testassemble_parallel!(A_h, grid) - coord = grid[Coordinates] - cellnodes = grid[CellNodes] - ncells = num_cells(grid) - dim = size(coord, 1) - lnodes = dim + 1 - npart = num_partitions(grid) - factdim::Float64 = factorial(dim) - SS = [zeros(lnodes, lnodes) for i = 1:npart] # local stiffness matrix - CC = [zeros(lnodes, lnodes) for i = 1:npart] # local coordinate matrix - GG = [zeros(lnodes, lnodes) for i = 1:npart] # shape function gradients - IP = [zeros(Int, lnodes) for i = 1:npart] # shape function gradients - I = Matrix(Diagonal(ones(lnodes))) - ncells = size(cellnodes, 2) - for color in pcolors(grid) - @tasks for part in pcolor_partitions(grid, color) - C = CC[part] - S = SS[part] - G = GG[part] - ipiv = IP[part] - for icell in partition_cells(grid, part) - coordmatrix!(C, coord, cellnodes, icell) - vol = gradient!(G, C, factdim, I, ipiv) - stiffness!(S, dim, G) - for il = 1:lnodes - i = cellnodes[il, icell] - rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i, part) - for jl = 1:lnodes - j = cellnodes[jl, icell] - rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j, part) - end - end - end - end - end - flush!(A_h) -end +include("femtools.jl") function test_correctness_build_seq(N, Tm::Type{<:AbstractSparseMatrix}; dim=3) grid = testgrid(N; dim) @@ -214,7 +106,7 @@ function test_correctness_mul(N, pgrid = partition(grid, Tp(; npart=np)) @test check_partitioning(pgrid) A = Tm(nnodes, nnodes, num_partitions(pgrid)) - ExtendableSparse.Experimental.partitioning!(A, pgrid[PColorPartitions], + partitioning!(A, pgrid[PColorPartitions], pgrid[PartitionNodes]) testassemble_parallel!(A, pgrid) invp = invperm(pgrid[NodePermutation]) @@ -311,8 +203,8 @@ function speedup_mul(N, reset!(A, num_partitions(pgrid)) testassemble_parallel!(A, pgrid) flush!(A) - ExtendableSparse.Experimental.partitioning!(A, pgrid[PColorPartitions], - pgrid[PartitionNodes]) + partitioning!(A, pgrid[PColorPartitions], + pgrid[PartitionNodes]) t = @belapsed $A * $b seconds = 1 invp = invperm(pgrid[NodePermutation]) @assert A0b[invp] ≈ A * b[invp] diff --git a/test/femtools.jl b/test/femtools.jl new file mode 100644 index 0000000..8c7e652 --- /dev/null +++ b/test/femtools.jl @@ -0,0 +1,110 @@ +using OhMyThreads: @tasks +using RecursiveFactorization + +function testgrid(N; dim=3) + X = range(0, 1; length=N^(1.0 / dim) |> ceil |> Int) + simplexgrid((X for i = 1:dim)...) +end + +function coordmatrix!(C, coord, cellnodes, k) + spacedim=size(coord,1) + celldim=size(cellnodes,1) + @inbounds for jj = 1:celldim + C[1, jj] = 1 + @inbounds for ii = 1:spacedim + C[ii + 1, jj] = coord[ii, cellnodes[jj, k]] + end + end +end + +function gradient!(G, C, factdim, I, ipiv) + clu = RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false)) + ldiv!(G, clu, I) + abs(det(clu)) / factdim +end + +function scalpro(G, dim, jl, il) + s = 0.0 + @inbounds @simd for k = 1:dim + s += G[jl, k + 1] * G[il, k + 1] + end + return s +end + +function stiffness!(S, dim, G) + @inbounds for il = 1:(dim + 1) + S[il, il] = scalpro(G, dim, il, il) + @inbounds for jl = (il + 1):(dim + 1) + S[il, jl] = scalpro(G, dim, jl, il) + S[jl, il] = S[il, jl] + end + end + return S +end + +function testassemble!(A_h, grid) + coord = grid[Coordinates] + cellnodes = grid[CellNodes] + ncells = num_cells(grid) + dim = size(coord, 1) + lnodes = dim + 1 + factdim::Float64 = factorial(dim) + S = zeros(lnodes, lnodes) # local stiffness matrix + C = zeros(lnodes, lnodes) # local coordinate matrix + G = zeros(lnodes, lnodes) # shape function gradients + ipiv = zeros(Int, lnodes) + I = Matrix(Diagonal(ones(lnodes))) + ncells = size(cellnodes, 2) + for icell = 1:ncells + coordmatrix!(C, coord, cellnodes, icell) + vol = gradient!(G, C, factdim, I, ipiv) + stiffness!(S, dim, G) + for il = 1:lnodes + i = cellnodes[il, icell] + rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i) + for jl = 1:lnodes + j = cellnodes[jl, icell] + rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j) + end + end + end + flush!(A_h) +end + +function testassemble_parallel!(A_h, grid) + coord = grid[Coordinates] + cellnodes = grid[CellNodes] + ncells = num_cells(grid) + dim = size(coord, 1) + lnodes = dim + 1 + npart = num_partitions(grid) + factdim::Float64 = factorial(dim) + SS = [zeros(lnodes, lnodes) for i = 1:npart] # local stiffness matrix + CC = [zeros(lnodes, lnodes) for i = 1:npart] # local coordinate matrix + GG = [zeros(lnodes, lnodes) for i = 1:npart] # shape function gradients + IP = [zeros(Int, lnodes) for i = 1:npart] # shape function gradients + I = Matrix(Diagonal(ones(lnodes))) + ncells = size(cellnodes, 2) + for color in pcolors(grid) + @tasks for part in pcolor_partitions(grid, color) + C = CC[part] + S = SS[part] + G = GG[part] + ipiv = IP[part] + for icell in partition_cells(grid, part) + coordmatrix!(C, coord, cellnodes, icell) + vol = gradient!(G, C, factdim, I, ipiv) + stiffness!(S, dim, G) + for il = 1:lnodes + i = cellnodes[il, icell] + rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i, part) + for jl = 1:lnodes + j = cellnodes[jl, icell] + rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j, part) + end + end + end + end + end + flush!(A_h) +end diff --git a/test/runtests.jl b/test/runtests.jl index 856b8b2..01bdce2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,7 +18,7 @@ using ForwardDiff end end - for Tm in [ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict] + for Tm in [MTExtendableSparseMatrixCSC,ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict] for N in [10000,20000] ExperimentalXParallel.test_correctness_update(N,Tm, dim=2) ExperimentalXParallel.test_correctness_build(N,Tm, dim=2) From 8d7b2165952c9021d870e328b1ab4e0425b3e133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Mon, 1 Jul 2024 10:20:52 +0200 Subject: [PATCH 39/44] Fix MT/ST stuff for VoronoiFVM (in single threaded mode) Add STExtendableSparseMatrixCSC (temporarily) --- src/ExtendableSparse.jl | 9 +- .../abstractextendablesparsematrixcsc.jl | 1 - .../genericextendablesparsematrixcsc.jl | 91 +++++++++++++++++++ .../genericmtextendablesparsematrixcsc.jl | 20 +++- src/matrix/sparsematrixdilnkc.jl | 10 +- 5 files changed, 122 insertions(+), 9 deletions(-) create mode 100644 src/matrix/genericextendablesparsematrixcsc.jl diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index d7a0cd5..a1fd4e9 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -28,12 +28,17 @@ include("matrix/sparsematrixdilnkc.jl") include("matrix/abstractextendablesparsematrixcsc.jl") include("matrix/extendable.jl") include("matrix/genericmtextendablesparsematrixcsc.jl") +include("matrix/genericextendablesparsematrixcsc.jl") const ExtendableSparseMatrix=ExtendableSparseMatrixCSC -const MTExtendableSparseMatrixCSC=GenericMTExtendableSparseMatrixCSC{SparseMatrixDILNKC} +const MTExtendableSparseMatrixCSC{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixDILNKC{Tv,Ti},Tv,Ti} MTExtendableSparseMatrixCSC(m,n,args...)=MTExtendableSparseMatrixCSC{Float64,Int64}(m,n,args...) -export ExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC,GenericMTExtendableSparseMatrixCSC +const STExtendableSparseMatrixCSC{Tv,Ti}=GenericExtendableSparseMatrixCSC{SparseMatrixDILNKC{Tv,Ti},Tv,Ti} +STExtendableSparseMatrixCSC(m,n,args...)=STExtendableSparseMatrixCSC{Float64,Int64}(m,n,args...) + + +export ExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC, STExtendableSparseMatrixCSC, GenericMTExtendableSparseMatrixCSC export SparseMatrixLNK, ExtendableSparseMatrix,flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset! export partitioning! diff --git a/src/matrix/abstractextendablesparsematrixcsc.jl b/src/matrix/abstractextendablesparsematrixcsc.jl index bcf3a6a..491ebfb 100644 --- a/src/matrix/abstractextendablesparsematrixcsc.jl +++ b/src/matrix/abstractextendablesparsematrixcsc.jl @@ -297,4 +297,3 @@ function eliminate_dirichlet!(A::AbstractExtendableSparseMatrixCSC,dirichlet) eliminate_dirichlet!(sparse(A),dirichlet) A end - diff --git a/src/matrix/genericextendablesparsematrixcsc.jl b/src/matrix/genericextendablesparsematrixcsc.jl new file mode 100644 index 0000000..c741283 --- /dev/null +++ b/src/matrix/genericextendablesparsematrixcsc.jl @@ -0,0 +1,91 @@ +mutable struct GenericExtendableSparseMatrixCSC{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti} + """ + Final matrix data + """ + cscmatrix::SparseMatrixCSC{Tv, Ti} + + """ + Matrix for new entries + """ + xmatrix::Tm +end + + +function GenericExtendableSparseMatrixCSC{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti<:Integer} + GenericExtendableSparseMatrixCSC(spzeros(Tv, Ti, m, n), + Tm(m,n) + ) +end + + +function reset!(ext::GenericExtendableSparseMatrixCSC{Tm,Tv,Ti}) where {Tm,Tv,Ti} + m,n=size(ext.cscmatrix) + ext.cscmatrix=spzeros(Tv, Ti, m, n) + ext.xmatrix=Tm(m,n) + ext +end + + +function flush!(ext::GenericExtendableSparseMatrixCSC{Tm,Tv,Ti}) where{Tm,Tv,Ti} + if nnz(ext.xmatrix)>0 + ext.cscmatrix=ext.xmatrix+ext.cscmatrix + ext.xmatrix=Tm(size(ext.cscmatrix)...) + end + ext +end + +function SparseArrays.sparse(ext::GenericExtendableSparseMatrixCSC) + flush!(ext) + ext.cscmatrix +end + +function Base.setindex!(ext::GenericExtendableSparseMatrixCSC, + v::Union{Number,AbstractVecOrMat}, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = v + else + setindex!(ext.xmatrix,v,i,j) + end +end + + +function Base.getindex(ext::GenericExtendableSparseMatrixCSC, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] + else + getindex(ext.xmatrix,i,j) + end +end + +function rawupdateindex!(ext::GenericExtendableSparseMatrixCSC, + op, + v, + i, + j) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + rawupdateindex!(ext.xmatrix,op,v,i,j) + end +end + +function updateindex!(ext::GenericExtendableSparseMatrixCSC, + op, + v, + i, + j) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + updateindex!(ext.xmatrix,op,v,i,j) + end +end + diff --git a/src/matrix/genericmtextendablesparsematrixcsc.jl b/src/matrix/genericmtextendablesparsematrixcsc.jl index 5e52fcd..91c805d 100644 --- a/src/matrix/genericmtextendablesparsematrixcsc.jl +++ b/src/matrix/genericmtextendablesparsematrixcsc.jl @@ -1,7 +1,7 @@ mutable struct GenericMTExtendableSparseMatrixCSC{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti} """ Final matrix data - """ + """ cscmatrix::SparseMatrixCSC{Tv, Ti} """ @@ -95,6 +95,24 @@ function rawupdateindex!(ext::GenericMTExtendableSparseMatrixCSC, end end + +function updateindex!(ext::GenericMTExtendableSparseMatrixCSC, + op, + v, + i, + j, + tid=1) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + updateindex!(ext.xmatrices[tid],op,v,i,j) + end +end + + + + # Needed in 1.9 function Base.:*(ext::GenericMTExtendableSparseMatrixCSC{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA mul!(similar(x),ext,x) diff --git a/src/matrix/sparsematrixdilnkc.jl b/src/matrix/sparsematrixdilnkc.jl index d467c3d..a2cdea8 100644 --- a/src/matrix/sparsematrixdilnkc.jl +++ b/src/matrix/sparsematrixdilnkc.jl @@ -381,7 +381,8 @@ end Add SparseMatrixCSC matrix and [`SparseMatrixDILNKC`](@ref) lnk, returning a SparseMatrixCSC """ -Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = add_directly(lnk, csc) +#Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = add_directly(lnk, csc) +Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = sum([lnk],csc) function Base.sum(lnkdictmatrices::Vector{SparseMatrixDILNKC{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} lnew=sum(nnz,lnkdictmatrices) @@ -395,14 +396,13 @@ function Base.sum(lnkdictmatrices::Vector{SparseMatrixDILNKC{Tv,Ti}}, cscmatrix: for icsc=1:length(colptr)-1 for j=colptr[icsc]:colptr[icsc+1]-1 - I[i]=icsc - J[i]=rowval[j] + I[i]=rowval[j] + J[i]=icsc V[i]=nzval[j] i=i+1 end end - ip=1 for lnk in lnkdictmatrices for (j,k) in lnk.colstart while k>0 @@ -413,8 +413,8 @@ function Base.sum(lnkdictmatrices::Vector{SparseMatrixDILNKC{Tv,Ti}}, cscmatrix: i=i+1 end end - ip=ip+1 end + @assert l==i-1 @static if VERSION>=v"1.10" return SparseArrays.sparse!(I,J,V,m,n,+) else From 4437971ca1754fa120d92d646cbff7aff17aea1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Tue, 2 Jul 2024 23:54:32 +0200 Subject: [PATCH 40/44] additional methods for VoronoiFVM: nnznew --- src/ExtendableSparse.jl | 2 +- src/matrix/extendable.jl | 3 ++- src/matrix/genericextendablesparsematrixcsc.jl | 2 ++ src/matrix/genericmtextendablesparsematrixcsc.jl | 3 +++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index a1fd4e9..0dd8536 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -39,7 +39,7 @@ STExtendableSparseMatrixCSC(m,n,args...)=STExtendableSparseMatrixCSC{Float64,Int export ExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC, STExtendableSparseMatrixCSC, GenericMTExtendableSparseMatrixCSC -export SparseMatrixLNK, ExtendableSparseMatrix,flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset! +export SparseMatrixLNK, ExtendableSparseMatrix,flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset!, nnznew export partitioning! export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl index a9debf7..d4c35b5 100644 --- a/src/matrix/extendable.jl +++ b/src/matrix/extendable.jl @@ -182,7 +182,8 @@ function rawupdateindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti}, op, v, i, - j) where {Tv, Ti <: Integer} + j, + part=1) where {Tv, Ti <: Integer} k = findindex(ext.cscmatrix, i, j) if k > 0 ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) diff --git a/src/matrix/genericextendablesparsematrixcsc.jl b/src/matrix/genericextendablesparsematrixcsc.jl index c741283..457413e 100644 --- a/src/matrix/genericextendablesparsematrixcsc.jl +++ b/src/matrix/genericextendablesparsematrixcsc.jl @@ -18,6 +18,8 @@ function GenericExtendableSparseMatrixCSC{Tm, Tv, Ti}(m::Integer,n::Integer) whe end +nnznew(ext::GenericExtendableSparseMatrixCSC)=nnz(ext.xmatrix) + function reset!(ext::GenericExtendableSparseMatrixCSC{Tm,Tv,Ti}) where {Tm,Tv,Ti} m,n=size(ext.cscmatrix) ext.cscmatrix=spzeros(Tv, Ti, m, n) diff --git a/src/matrix/genericmtextendablesparsematrixcsc.jl b/src/matrix/genericmtextendablesparsematrixcsc.jl index 91c805d..88a4d68 100644 --- a/src/matrix/genericmtextendablesparsematrixcsc.jl +++ b/src/matrix/genericmtextendablesparsematrixcsc.jl @@ -81,6 +81,9 @@ function Base.getindex(ext::GenericMTExtendableSparseMatrixCSC, end end +nnznew(ext::GenericMTExtendableSparseMatrixCSC)=sum(nnz,ext.xmatrices) + + function rawupdateindex!(ext::GenericMTExtendableSparseMatrixCSC, op, v, From 90dc3d739d62a8e30d4aee9129c411748fcec223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Sun, 7 Jul 2024 23:02:22 +0200 Subject: [PATCH 41/44] Restructure tests ExtndableGrids v1.9 --- .gitignore | 2 +- src/experimental/Experimental.jl | 37 ------------------- .../sparsematrixdict.jl | 0 test/ExperimentalParallel.jl | 2 +- test/Project.toml | 2 +- test/runtests.jl | 17 +++++---- ...erimentalXParallel.jl => test_parallel.jl} | 14 +++---- 7 files changed, 19 insertions(+), 55 deletions(-) rename src/{experimental => matrix}/sparsematrixdict.jl (100%) rename test/{ExperimentalXParallel.jl => test_parallel.jl} (93%) diff --git a/.gitignore b/.gitignore index 3ea90ad..068167e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ QUARRY docs/build *~ -Manifest.toml +Manifest*.toml .repl_history quarry diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl index dbf14cd..71995de 100644 --- a/src/experimental/Experimental.jl +++ b/src/experimental/Experimental.jl @@ -39,43 +39,6 @@ export ILUAMPreconditioner, PILUAMPreconditioner export reorderlinsys, nnz_noflush -include("sparsematrixdict.jl") -export SparseMatrixDict - -include("sparsematrixlnkx.jl") -export SparseMatrixLNKX - -include("sparsematrixlnkdict.jl") -export SparseMatrixLNKDict - -include("extendablesparsematrixscalar.jl") -export ExtendableSparseMatrixScalar - -const ExtendableSparseMatrixDict{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixDict{Tv,Ti},Tv,Ti} -export ExtendableSparseMatrixDict - - -const ExtendableSparseMatrixLNKDict{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNKDict{Tv,Ti},Tv,Ti} -export ExtendableSparseMatrixLNKDict - -const ExtendableSparseMatrixLNKX{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNKX{Tv,Ti},Tv,Ti} -export ExtendableSparseMatrixLNKX - -const ExtendableSparseMatrixLNK{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNK{Tv,Ti},Tv,Ti} -export ExtendableSparseMatrixLNK - - -const ExtendableSparseMatrixParallelDict{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixDict{Tv,Ti},Tv,Ti} -ExtendableSparseMatrixParallelDict(m,n,p)= ExtendableSparseMatrixParallelDict{Float64,Int64}(m,n,p) -export ExtendableSparseMatrixParallelDict - -const ExtendableSparseMatrixParallelLNKX{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixLNKX{Tv,Ti},Tv,Ti} -ExtendableSparseMatrixParallelLNKX(m,n,p)= ExtendableSparseMatrixParallelLNKX{Float64,Int64}(m,n,p) -export ExtendableSparseMatrixParallelLNKX - -const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixLNKDict{Tv,Ti},Tv,Ti} -ExtendableSparseMatrixParallelLNKDict(m,n,p)= ExtendableSparseMatrixParallelLNKDict{Float64,Int64}(m,n,p) -export ExtendableSparseMatrixParallelLNKDict end diff --git a/src/experimental/sparsematrixdict.jl b/src/matrix/sparsematrixdict.jl similarity index 100% rename from src/experimental/sparsematrixdict.jl rename to src/matrix/sparsematrixdict.jl diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallel.jl index 7fe1029..45b05a9 100644 --- a/test/ExperimentalParallel.jl +++ b/test/ExperimentalParallel.jl @@ -195,7 +195,7 @@ function partassemble!(A,X,Y,nt=1;d=0.1) end -function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict,ExtendableSparseMatrixParallelLNKX},X,Y,nt=1;d=0.1, reset=true) +function partassemble!(A::Union{MTExtendableSparseMatrixCSC},X,Y,nt=1;d=0.1, reset=true) Nx=length(X) Ny=length(Y) diff --git a/test/Project.toml b/test/Project.toml index 97f6793..9ef7608 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -21,5 +21,5 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] -ExtendableGrids = "1.8" +ExtendableGrids = "1.9" IterativeSolvers = "0.9" diff --git a/test/runtests.jl b/test/runtests.jl index 01bdce2..3af3ce4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,19 +10,20 @@ using MultiFloats using ForwardDiff -@testset "ExperimentalXParallel" begin - include("ExperimentalXParallel.jl") - for Tm in [ExtendableSparseMatrixLNK,ExtendableSparseMatrixLNKX,ExtendableSparseMatrixDict,ExtendableSparseMatrixLNKDict] +@testset "Parallel" begin + include("test_parallel.jl") + + for Tm in [STExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC, ExtendableSparseMatrix] for N in [10000,20000] - ExperimentalXParallel.test_correctness_build_seq(N,Tm, dim=2) + test_parallel.test_correctness_build_seq(N,Tm, dim=2) end end - for Tm in [MTExtendableSparseMatrixCSC,ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict] + for Tm in [MTExtendableSparseMatrixCSC] for N in [10000,20000] - ExperimentalXParallel.test_correctness_update(N,Tm, dim=2) - ExperimentalXParallel.test_correctness_build(N,Tm, dim=2) - ExperimentalXParallel.test_correctness_mul(N,Tm,dim=2) + test_parallel.test_correctness_update(N,Tm, dim=2) + test_parallel.test_correctness_build(N,Tm, dim=2) + test_parallel.test_correctness_mul(N,Tm,dim=2) end end end diff --git a/test/ExperimentalXParallel.jl b/test/test_parallel.jl similarity index 93% rename from test/ExperimentalXParallel.jl rename to test/test_parallel.jl index 128d93b..de26229 100644 --- a/test/ExperimentalXParallel.jl +++ b/test/test_parallel.jl @@ -1,4 +1,4 @@ -module ExperimentalXParallel +module test_parallel using ExtendableSparse, SparseArrays, ExtendableSparse.Experimental using BenchmarkTools @@ -53,7 +53,7 @@ function test_correctness_update(N, # Reset the nonzeros, keeping the structure intact nonzeros(A) .= 0 # Parallel assembly whith np threads - pgrid = partition(grid, Tp(; npart=np)) + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) reset!(A, np) @show num_partitions_per_color(pgrid) testassemble_parallel!(A, pgrid) @@ -81,7 +81,7 @@ function test_correctness_build(N, for np in allnp # Make a new matrix and assemble parallel. # this should result in the same nonzeros - pgrid = partition(grid, Tp(; npart=np)) + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) A = Tm(nnodes, nnodes, num_partitions(pgrid)) @show num_partitions_per_color(pgrid) @test check_partitioning(pgrid) @@ -103,7 +103,7 @@ function test_correctness_mul(N, b = rand(nnodes) A0b = A0 * b for np in allnp - pgrid = partition(grid, Tp(; npart=np)) + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) @test check_partitioning(pgrid) A = Tm(nnodes, nnodes, num_partitions(pgrid)) partitioning!(A, pgrid[PColorPartitions], @@ -135,7 +135,7 @@ function speedup_update(N, for np in allnp # Get the parallel timing # During setup, set matrix entries to zero while keeping the structure - pgrid = partition(grid, Tp(; npart=np)) + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) @show num_partitions_per_color(pgrid) reset!(A, num_partitions(pgrid)) testassemble_parallel!(A, pgrid) @@ -171,7 +171,7 @@ function speedup_build(N, for np in allnp # Get the parallel timing # During setup, reset matrix to empty state. - pgrid = partition(grid, Tp(; npart=np)) + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) reset!(A, num_partitions(pgrid)) @show num_partitions_per_color(pgrid) t = @belapsed testassemble_parallel!($A, $pgrid) seconds = 1 setup = (reset!($A, @@ -198,7 +198,7 @@ function speedup_mul(N, result = [] A = Tm(nnodes, nnodes, 1) for np in allnp - pgrid = partition(grid, Tp(; npart=np)) + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) @show num_partitions_per_color(pgrid) reset!(A, num_partitions(pgrid)) testassemble_parallel!(A, pgrid) From f0f0f2b39ca7fe21994362969dbc11afa283d2b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Sun, 7 Jul 2024 23:06:42 +0200 Subject: [PATCH 42/44] remove experimental code which has been moved to matrix --- .../extendablesparsematrixparallel.jl | 131 ----- .../extendablesparsematrixscalar.jl | 77 --- src/experimental/sparsematrixlnkdict.jl | 461 ------------------ src/experimental/sparsematrixlnkx.jl | 448 ----------------- 4 files changed, 1117 deletions(-) delete mode 100644 src/experimental/extendablesparsematrixparallel.jl delete mode 100644 src/experimental/extendablesparsematrixscalar.jl delete mode 100644 src/experimental/sparsematrixlnkdict.jl delete mode 100644 src/experimental/sparsematrixlnkx.jl diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl deleted file mode 100644 index cd66f92..0000000 --- a/src/experimental/extendablesparsematrixparallel.jl +++ /dev/null @@ -1,131 +0,0 @@ -mutable struct ExtendableSparseMatrixXParallel{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti} - """ - Final matrix data - """ - cscmatrix::SparseMatrixCSC{Tv, Ti} - - """ - Vector of dictionaries for new entries - """ - xmatrices::Vector{Tm} - - colparts::Vector{Ti} - partnodes::Vector{Ti} -end - - -function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m,p::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti} - - ExtendableSparseMatrixXParallel(spzeros(Tv, Ti, m, n), - [Tm(m,n) for i=1:p], - Ti[1,2], - Ti[1,n+1], - ) -end - -function partitioning!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}, colparts, partnodes) where {Tm, Tv, Ti} - ext.partnodes=partnodes - ext.colparts=colparts - ext -end - -function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m, pc::Vector) where{Tm, Tv, Ti} - ext=ExtendableSparseMatrixXParallel(m,n,length(pc)) -end - - -function reset!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti},p::Integer) where {Tm,Tv,Ti} - m,n=size(ext.cscmatrix) - ext.cscmatrix=spzeros(Tv, Ti, m, n) - ext.xmatrices=[Tm(m,n) for i=1:p] - ext.colparts=Ti[1,2] - ext.partnodes=Ti[1,n+1] - ext -end - -function reset!(ext::ExtendableSparseMatrixXParallel) - reset!(ext,length(ext.xmatrices)) -end - - -function flush!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}) where{Tm,Tv,Ti} - ext.cscmatrix=Base.sum(ext.xmatrices, ext.cscmatrix) - np=length(ext.xmatrices) - (m,n)=size(ext.cscmatrix) - ext.xmatrices=[Tm(m,n) for i=1:np] - ext -end - - -function SparseArrays.sparse(ext::ExtendableSparseMatrixXParallel) - flush!(ext) - ext.cscmatrix -end - - - -function Base.setindex!(ext::ExtendableSparseMatrixXParallel, - v::Union{Number,AbstractVecOrMat}, - i::Integer, - j::Integer) - k = findindex(ext.cscmatrix, i, j) - if k > 0 - ext.cscmatrix.nzval[k] = v - else - error("use rawupdateindex! for new entries into ExtendableSparseMatrixXParallel") - end -end - - -function Base.getindex(ext::ExtendableSparseMatrixXParallel, - i::Integer, - j::Integer) - k = findindex(ext.cscmatrix, i, j) - if k > 0 - return ext.cscmatrix.nzval[k] - elseif sum(nnz,ext.xmatrices) == 0 - return zero(eltype(ext.cscmatrix)) - else - error("flush! ExtendableSparseMatrixXParallel before using getindex") - end -end - -function rawupdateindex!(ext::ExtendableSparseMatrixXParallel, - op, - v, - i, - j, - tid) - k = findindex(ext.cscmatrix, i, j) - if k > 0 - ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) - else - rawupdateindex!(ext.xmatrices[tid],op,v,i,j) - end -end - -# Needed in 1.9 -function Base.:*(ext::ExtendableSparse.Experimental.ExtendableSparseMatrixXParallel{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA - mul!(similar(x),ext,x) -end - -function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x) - flush!(ext) - A=ext.cscmatrix - colparts=ext.colparts - partnodes=ext.partnodes - rows = SparseArrays.rowvals(A) - vals = nonzeros(A) - r.=zero(eltype(ext)) - m,n=size(A) - for icol=1:length(colparts)-1 - @tasks for ip in colparts[icol]:colparts[icol+1]-1 - @inbounds for inode in partnodes[ip]:partnodes[ip+1]-1 - @inbounds for i in nzrange(A,inode) - r[rows[i]]+=vals[i]*x[inode] - end - end - end - end - r -end diff --git a/src/experimental/extendablesparsematrixscalar.jl b/src/experimental/extendablesparsematrixscalar.jl deleted file mode 100644 index 36c1dfa..0000000 --- a/src/experimental/extendablesparsematrixscalar.jl +++ /dev/null @@ -1,77 +0,0 @@ -mutable struct ExtendableSparseMatrixScalar{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti} - """ - Final matrix data - """ - cscmatrix::SparseMatrixCSC{Tv, Ti} - - """ - Matrix for new entries - """ - xmatrix::Tm -end - - -function ExtendableSparseMatrixScalar{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti<:Integer} - ExtendableSparseMatrixScalar(spzeros(Tv, Ti, m, n), - Tm(m,n) - ) -end - - -function reset!(ext::ExtendableSparseMatrixScalar{Tm,Tv,Ti}) where {Tm,Tv,Ti} - m,n=size(ext.cscmatrix) - ext.cscmatrix=spzeros(Tv, Ti, m, n) - ext.xmatrix=Tm(m,n) - ext -end - - -function flush!(ext::ExtendableSparseMatrixScalar{Tm,Tv,Ti}) where{Tm,Tv,Ti} - if nnz(ext.xmatrix)>0 - ext.cscmatrix=ext.xmatrix+ext.cscmatrix - ext.xmatrix=Tm(size(ext.cscmatrix)...) - end - ext -end - -function SparseArrays.sparse(ext::ExtendableSparseMatrixScalar) - flush!(ext) - ext.cscmatrix -end - -function Base.setindex!(ext::ExtendableSparseMatrixScalar, - v::Union{Number,AbstractVecOrMat}, - i::Integer, - j::Integer) - k = findindex(ext.cscmatrix, i, j) - if k > 0 - ext.cscmatrix.nzval[k] = v - else - setindex!(ext.xmatrix,v,i,j) - end -end - - -function Base.getindex(ext::ExtendableSparseMatrixScalar, - i::Integer, - j::Integer) - k = findindex(ext.cscmatrix, i, j) - if k > 0 - ext.cscmatrix.nzval[k] - else - getindex(ext.xmatrix,i,j) - end -end - -function rawupdateindex!(ext::ExtendableSparseMatrixScalar, - op, - v, - i, - j) - k = findindex(ext.cscmatrix, i, j) - if k > 0 - ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) - else - rawupdateindex!(ext.xmatrix,op,v,i,j) - end -end diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl deleted file mode 100644 index 1d3d8e4..0000000 --- a/src/experimental/sparsematrixlnkdict.jl +++ /dev/null @@ -1,461 +0,0 @@ -""" - $(TYPEDEF) - -Modification of SparseMatrixLNK where the pointer to first index of -column j is stored in a dictionary. -""" -mutable struct SparseMatrixLNKDict{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti} - """ - Number of rows - """ - m::Ti - - """ - Number of columns - """ - n::Ti - - """ - Number of nonzeros - """ - nnz::Ti - - """ - Length of arrays - """ - nentries::Ti - - """ - Linked list of column entries. Initial length is n, - it grows with each new entry. - - colptr[index] contains the next - index in the list or zero, in the later case terminating the list which - starts at index 1<=j<=n for each column j. - """ - colptr::Vector{Ti} - - """ - Dictionary to store start indices of columns - """ - colstart::Dict{Ti,Ti} - - """ - Row numbers. For each index it contains the zero (initial state) - or the row numbers corresponding to the column entry list in colptr. - """ - rowval::Vector{Ti} - - """ - Nonzero entry values correspondin to each pair - (colptr[index],rowval[index]) - """ - nzval::Vector{Tv} -end - -""" -$(SIGNATURES) - -Constructor of empty matrix. -""" -function SparseMatrixLNKDict{Tv, Ti}(m, n) where {Tv, Ti <: Integer} - SparseMatrixLNKDict{Tv, Ti}(m, n, 0, 0, zeros(Ti,10), Dict{Ti,Ti}(), zeros(Ti,10), zeros(Ti,10)) -end - -""" -$(SIGNATURES) - -Constructor of empty matrix. -""" -function SparseMatrixLNKDict(valuetype::Type{Tv}, indextype::Type{Ti}, m, - n) where {Tv, Ti <: Integer} - SparseMatrixLNKDict{Tv, Ti}(m, n) -end - -""" -$(SIGNATURES) - -Constructor of empty matrix. -""" -SparseMatrixLNKDict(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixLNKDict(Tv, Int, m, n) - -""" -$(SIGNATURES) - -Constructor of empty matrix. -""" -SparseMatrixLNKDict(m, n) = SparseMatrixLNKDict(Float64, m, n) - -""" -$(SIGNATURES) - -Constructor from SparseMatrixCSC. - -""" -function SparseMatrixLNKDict(csc::SparseArrays.SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: - Integer} - lnk = SparseMatrixLNKDict{Tv, Ti}(csc.m, csc.n) - for j = 1:(csc.n) - for k = csc.colptr[j]:(csc.colptr[j + 1] - 1) - lnk[csc.rowval[k], j] = csc.nzval[k] - end - end - lnk -end - -function findindex(lnk::SparseMatrixLNKDict, i, j) - if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) - throw(BoundsError(lnk, (i, j))) - end - - k = get(lnk.colstart, j, 0) - if k==0 - return 0,0 - end - k0 = k - while k > 0 - if lnk.rowval[k] == i - return k, 0 - end - k0 = k - k = lnk.colptr[k] - end - return 0, k0 -end - -""" -$(SIGNATURES) - -Return value stored for entry or zero if not found -""" -function Base.getindex(lnk::SparseMatrixLNKDict{Tv, Ti}, i, j) where {Tv, Ti} - k, k0 = findindex(lnk, i, j) - if k == 0 - return zero(Tv) - else - return lnk.nzval[k] - end -end - -function addentry!(lnk::SparseMatrixLNKDict, i, j, k, k0) - # increase number of entries - lnk.nentries += 1 - if length(lnk.nzval) < lnk.nentries - newsize = Int(ceil(5.0 * lnk.nentries / 4.0)) - resize!(lnk.nzval, newsize) - resize!(lnk.rowval, newsize) - resize!(lnk.colptr, newsize) - end - - if k0==0 - lnk.colstart[j]=lnk.nentries - end - - # Append entry if not found - lnk.rowval[lnk.nentries] = i - - # Shift the end of the list - lnk.colptr[lnk.nentries] = 0 - - if k0>0 - lnk.colptr[k0] = lnk.nentries - end - - # Update number of nonzero entries - lnk.nnz += 1 - return lnk.nentries -end - -""" -$(SIGNATURES) - -Update value of existing entry, otherwise extend matrix if v is nonzero. -""" -function Base.setindex!(lnk::SparseMatrixLNKDict, v, i, j) - if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) - throw(BoundsError(lnk, (i, j))) - end - - k, k0 = findindex(lnk, i, j) - if k > 0 - lnk.nzval[k] = v - return lnk - end - if !iszero(v) - k = addentry!(lnk, i, j, k, k0) - lnk.nzval[k] = v - end - return lnk -end - -""" -$(SIGNATURES) - -Update element of the matrix with operation `op`. -It assumes that `op(0,0)==0`. If `v` is zero, no new -entry is created. -""" -function updateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti} - k, k0 = findindex(lnk, i, j) - if k > 0 - lnk.nzval[k] = op(lnk.nzval[k], v) - return lnk - end - if !iszero(v) - k = addentry!(lnk, i, j, k, k0) - lnk.nzval[k] = op(zero(Tv), v) - end - lnk -end - -""" -$(SIGNATURES) - -Update element of the matrix with operation `op`. -It assumes that `op(0,0)==0`. If `v` is zero a new entry -is created nevertheless. -""" -function rawupdateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti} - k, k0 = findindex(lnk, i, j) - if k > 0 - lnk.nzval[k] = op(lnk.nzval[k], v) - else - k = addentry!(lnk, i, j, k, k0) - lnk.nzval[k] = op(zero(Tv), v) - end - lnk -end - -""" -$(SIGNATURES) - -Return tuple containing size of the matrix. -""" -Base.size(lnk::SparseMatrixLNKDict) = (lnk.m, lnk.n) - -""" -$(SIGNATURES) - -Return number of nonzero entries. -""" -SparseArrays.nnz(lnk::SparseMatrixLNKDict) = lnk.nnz - - -""" - $(SIGNATURES) -Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V. -""" -function add_via_COO(lnk::SparseMatrixLNKDict{Tv, Ti}, - csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} - (;colptr,nzval,rowval,m,n)=csc - l=nnz(lnk)+nnz(csc) - I=Vector{Ti}(undef,l) - J=Vector{Ti}(undef,l) - V=Vector{Tv}(undef,l) - i=1 - if nnz(csc)>0 - for icsc=1:length(colptr)-1 - for j=colptr[icsc]:colptr[icsc+1]-1 - I[i]=icsc - J[i]=rowval[j] - V[i]=nzval[j] - i=i+1 - end - end - end - for (j,k) in lnk.colstart - while k>0 - I[i]=lnk.rowval[k] - J[i]=j - V[i]=lnk.nzval[k] - k=lnk.colptr[k] - i=i+1 - end - end - @static if VERSION>=v"1.10" - return SparseArrays.sparse!(I,J,V,m,n,+) - else - return SparseArrays.sparse(I,J,V,m,n,+) - end -end - - -""" - $(SIGNATURES) -Add lnk and csc without creation of intermediate data. -""" -function add_directly(lnk::SparseMatrixLNKDict{Tv, Ti}, - csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} - @assert(csc.m==lnk.m) - @assert(csc.n==lnk.n) - - # overallocate arrays in order to avoid - # presumably slower push! - xnnz = nnz(csc) + nnz(lnk) - colptr = Vector{Ti}(undef, csc.n + 1) - rowval = Vector{Ti}(undef, xnnz) - nzval = Vector{Tv}(undef, xnnz) - - # Detect the maximum column length of lnk - lnk_maxcol = 0 - for (j,k) in lnk.colstart - lcol = zero(Ti) - while k > 0 - lcol += 1 - k = lnk.colptr[k] - end - lnk_maxcol = max(lcol, lnk_maxcol) - end - - # pre-allocate column data - col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol] - - inz = 1 # counts the nonzero entries in the new matrix - - in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1]) - - in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col) - - # loop over all columns - for j = 1:(csc.n) - # Copy extension entries into col and sort them - k = get(lnk.colstart, j, 0) - l_lnk_col = 0 - while k > 0 - if lnk.rowval[k] > 0 - l_lnk_col += 1 - col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k]) - end - k = lnk.colptr[k] - end - sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward) - - # jointly sort lnk and csc entries into new matrix data - # this could be replaced in a more transparent manner by joint sorting: - # make a joint array for csc and lnk col, sort them. - # Will this be faster? - - colptr[j] = inz - jlnk = one(Ti) # counts the entries in col - jcsc = csc.colptr[j] # counts entries in csc - - while true - if in_csc_col(jcsc, j) && - (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval || - !in_lnk_col(jlnk, l_lnk_col)) - # Insert entries from csc into new structure - rowval[inz] = csc.rowval[jcsc] - nzval[inz] = csc.nzval[jcsc] - jcsc += 1 - inz += 1 - elseif in_csc_col(jcsc, j) && - (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval) - # Add up entries from csc and lnk - rowval[inz] = csc.rowval[jcsc] - nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval - jcsc += 1 - inz += 1 - jlnk += 1 - elseif in_lnk_col(jlnk, l_lnk_col) - # Insert entries from lnk res. col into new structure - rowval[inz] = col[jlnk].rowval - nzval[inz] = col[jlnk].nzval - jlnk += 1 - inz += 1 - else - break - end - end - end - colptr[csc.n + 1] = inz - resize!(rowval, inz - 1) - resize!(nzval, inz - 1) - SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval) -end - - - -""" - $(SIGNATURES) - -Add SparseMatrixCSC matrix and [`SparseMatrixLNKDict`](@ref) lnk, returning a SparseMatrixCSC -""" -Base.:+(lnk::SparseMatrixLNKDict, csc::SparseMatrixCSC) = add_directly(lnk, csc) - -function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} - lnew=sum(nnz,lnkdictmatrices) - if lnew>0 - (;colptr,nzval,rowval,m,n)=cscmatrix - l=lnew+nnz(cscmatrix) - I=Vector{Ti}(undef,l) - J=Vector{Ti}(undef,l) - V=Vector{Tv}(undef,l) - i=1 - - for icsc=1:length(colptr)-1 - for j=colptr[icsc]:colptr[icsc+1]-1 - I[i]=icsc - J[i]=rowval[j] - V[i]=nzval[j] - i=i+1 - end - end - - ip=1 - for lnk in lnkdictmatrices - for (j,k) in lnk.colstart - while k>0 - I[i]=lnk.rowval[k] - J[i]=j - V[i]=lnk.nzval[k] - k=lnk.colptr[k] - i=i+1 - end - end - ip=ip+1 - end - @static if VERSION>=v"1.10" - return SparseArrays.sparse!(I,J,V,m,n,+) - else - return SparseArrays.sparse(I,J,V,m,n,+) - end - end - return cscmatrix -end - -function reset!(m::SparseMatrixLNKDict{Tv,Ti}) where {Tv,Ti} - m.nnz=0 - m.nentries=0 - m.colptr=zeros(Ti,10) - m.colstart::Dict{Ti,Ti} - m.rowval=zeros(Ti,10) - m.nzval=zeros(Ti,10) - m -end - - -""" -$(SIGNATURES) - -Constructor from SparseMatrixLNKDict. - -""" -function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixLNKDict)::SparseMatrixCSC - csc = spzeros(lnk.m, lnk.n) - lnk + csc -end - -function SparseArrays.sparse(lnk::SparseMatrixLNKDict) - lnk + spzeros(lnk.m, lnk.n) -end - -function Base.copy(S::SparseMatrixLNKDict) - SparseMatrixLNKDict(size(S, 1), - size(S, 2), - S.nnz, - S.nentries, - copy(S.colptr), - copy(S.colstart), - copy(S.rowvals), - copy(S.nzval)) -end diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl deleted file mode 100644 index bd2cdcf..0000000 --- a/src/experimental/sparsematrixlnkx.jl +++ /dev/null @@ -1,448 +0,0 @@ -""" - $(TYPEDEF) - -Modification of SparseMatrixLNK where the pointer to first index of -column j is stored in a dictionary. -""" -mutable struct SparseMatrixLNKX{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti} - """ - Number of rows - """ - m::Ti - - """ - Number of columns - """ - n::Ti - - """ - Number of nonzeros - """ - nnz::Ti - - """ - Length of arrays - """ - nentries::Ti - - """ - Linked list of column entries. Initial length is n, - it grows with each new entry. - - colptr[index] contains the next - index in the list or zero, in the later case terminating the list which - starts at index 1<=j<=n for each column j. - """ - colptr::Vector{Ti} - - """ - Start indices of columns - """ - colstart::Vector{Ti} - - """ - Row numbers. For each index it contains the zero (initial state) - or the row numbers corresponding to the column entry list in colptr. - """ - rowval::Vector{Ti} - - """ - Nonzero entry values correspondin to each pair - (colptr[index],rowval[index]) - """ - nzval::Vector{Tv} -end - -""" -$(SIGNATURES) - -Constructor of empty matrix. -""" -function SparseMatrixLNKX{Tv, Ti}(m, n) where {Tv, Ti <: Integer} - SparseMatrixLNKX{Tv, Ti}(m, n, 0, 0, zeros(Ti,10), zeros(Ti,n), zeros(Ti,10), zeros(Ti,10)) -end - -""" -$(SIGNATURES) - -Constructor of empty matrix. -""" -function SparseMatrixLNKX(valuetype::Type{Tv}, indextype::Type{Ti}, m, - n) where {Tv, Ti <: Integer} - SparseMatrixLNKX{Tv, Ti}(m, n) -end - -""" -$(SIGNATURES) - -Constructor of empty matrix. -""" -SparseMatrixLNKX(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixLNKX(Tv, Int, m, n) - -""" -$(SIGNATURES) - -Constructor of empty matrix. -""" -SparseMatrixLNKX(m, n) = SparseMatrixLNKX(Float64, m, n) - - -function findindex(lnk::SparseMatrixLNKX, i, j) - if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) - throw(BoundsError(lnk, (i, j))) - end - - k =lnk.colstart[j] - if k==0 - return 0,0 - end - k0 = k - while k > 0 - if lnk.rowval[k] == i - return k, 0 - end - k0 = k - k = lnk.colptr[k] - end - return 0, k0 -end - -""" -$(SIGNATURES) - -Return value stored for entry or zero if not found -""" -function Base.getindex(lnk::SparseMatrixLNKX{Tv, Ti}, i, j) where {Tv, Ti} - k, k0 = findindex(lnk, i, j) - if k == 0 - return zero(Tv) - else - return lnk.nzval[k] - end -end - -function addentry!(lnk::SparseMatrixLNKX, i, j, k, k0) - # increase number of entries - lnk.nentries += 1 - if length(lnk.nzval) < lnk.nentries - newsize = Int(ceil(5.0 * lnk.nentries / 4.0)) - resize!(lnk.nzval, newsize) - resize!(lnk.rowval, newsize) - resize!(lnk.colptr, newsize) - end - - if k0==0 - lnk.colstart[j]=lnk.nentries - end - - # Append entry if not found - lnk.rowval[lnk.nentries] = i - - # Shift the end of the list - lnk.colptr[lnk.nentries] = 0 - - if k0>0 - lnk.colptr[k0] = lnk.nentries - end - - # Update number of nonzero entries - lnk.nnz += 1 - return lnk.nentries -end - -""" -$(SIGNATURES) - -Update value of existing entry, otherwise extend matrix if v is nonzero. -""" -function Base.setindex!(lnk::SparseMatrixLNKX, v, i, j) - if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) - throw(BoundsError(lnk, (i, j))) - end - - k, k0 = findindex(lnk, i, j) - if k > 0 - lnk.nzval[k] = v - return lnk - end - if !iszero(v) - k = addentry!(lnk, i, j, k, k0) - lnk.nzval[k] = v - end - return lnk -end - -""" -$(SIGNATURES) - -Update element of the matrix with operation `op`. -It assumes that `op(0,0)==0`. If `v` is zero, no new -entry is created. -""" -function updateindex!(lnk::SparseMatrixLNKX{Tv, Ti}, op, v, i, j) where {Tv, Ti} - k, k0 = findindex(lnk, i, j) - if k > 0 - lnk.nzval[k] = op(lnk.nzval[k], v) - return lnk - end - if !iszero(v) - k = addentry!(lnk, i, j, k, k0) - lnk.nzval[k] = op(zero(Tv), v) - end - lnk -end - -""" -$(SIGNATURES) - -Update element of the matrix with operation `op`. -It assumes that `op(0,0)==0`. If `v` is zero a new entry -is created nevertheless. -""" -function rawupdateindex!(lnk::SparseMatrixLNKX{Tv, Ti}, op, v, i, j) where {Tv, Ti} - k, k0 = findindex(lnk, i, j) - if k > 0 - lnk.nzval[k] = op(lnk.nzval[k], v) - else - k = addentry!(lnk, i, j, k, k0) - lnk.nzval[k] = op(zero(Tv), v) - end - lnk -end - -""" -$(SIGNATURES) - -Return tuple containing size of the matrix. -""" -Base.size(lnk::SparseMatrixLNKX) = (lnk.m, lnk.n) - -""" -$(SIGNATURES) - -Return number of nonzero entries. -""" -SparseArrays.nnz(lnk::SparseMatrixLNKX) = lnk.nnz - -""" -$(SIGNATURES) - -Dummy flush! method for SparseMatrixLNKX. Just -used in test methods -""" -function flush!(lnk::SparseMatrixLNKX{Tv, Ti}) where {Tv, Ti} - return lnk -end - -""" - $(SIGNATURES) -Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V. -""" -function add_via_COO(lnk::SparseMatrixLNKX{Tv, Ti}, - csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} - (;colptr,nzval,rowval,m,n)=csc - l=nnz(lnk)+nnz(csc) - I=Vector{Ti}(undef,l) - J=Vector{Ti}(undef,l) - V=Vector{Tv}(undef,l) - i=1 - if nnz(csc)>0 - for icsc=1:length(colptr)-1 - for j=colptr[icsc]:colptr[icsc+1]-1 - I[i]=icsc - J[i]=rowval[j] - V[i]=nzval[j] - i=i+1 - end - end - end - for j=1:n - k=lnk.colstart[j] - while k>0 - I[i]=lnk.rowval[k] - J[i]=j - V[i]=lnk.nzval[k] - k=lnk.colptr[k] - i=i+1 - end - end - @static if VERSION>=v"1.10" - return SparseArrays.sparse!(I,J,V,m,n,+) - else - return SparseArrays.sparse(I,J,V,m,n,+) - end -end - - -""" - $(SIGNATURES) -Add lnk and csc without creation of intermediate data. -""" -function add_directly(lnk::SparseMatrixLNKX{Tv, Ti}, - csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} - @assert(csc.m==lnk.m) - @assert(csc.n==lnk.n) - - # overallocate arrays in order to avoid - # presumably slower push! - xnnz = nnz(csc) + nnz(lnk) - colptr = Vector{Ti}(undef, csc.n + 1) - rowval = Vector{Ti}(undef, xnnz) - nzval = Vector{Tv}(undef, xnnz) - - # Detect the maximum column length of lnk - lnk_maxcol = 0 - for j=1:lnk.n - k=lnk.colstart[j] - lcol = zero(Ti) - while k > 0 - lcol += 1 - k = lnk.colptr[k] - end - lnk_maxcol = max(lcol, lnk_maxcol) - end - - # pre-allocate column data - col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol] - - inz = 1 # counts the nonzero entries in the new matrix - - in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1]) - - in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col) - - # loop over all columns - for j = 1:(csc.n) - # Copy extension entries into col and sort them - k = lnk.colstart[j] - l_lnk_col = 0 - while k > 0 - if lnk.rowval[k] > 0 - l_lnk_col += 1 - col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k]) - end - k = lnk.colptr[k] - end - sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward) - - # jointly sort lnk and csc entries into new matrix data - # this could be replaced in a more transparent manner by joint sorting: - # make a joint array for csc and lnk col, sort them. - # Will this be faster? - - colptr[j] = inz - jlnk = one(Ti) # counts the entries in col - jcsc = csc.colptr[j] # counts entries in csc - - while true - if in_csc_col(jcsc, j) && - (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval || - !in_lnk_col(jlnk, l_lnk_col)) - # Insert entries from csc into new structure - rowval[inz] = csc.rowval[jcsc] - nzval[inz] = csc.nzval[jcsc] - jcsc += 1 - inz += 1 - elseif in_csc_col(jcsc, j) && - (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval) - # Add up entries from csc and lnk - rowval[inz] = csc.rowval[jcsc] - nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval - jcsc += 1 - inz += 1 - jlnk += 1 - elseif in_lnk_col(jlnk, l_lnk_col) - # Insert entries from lnk res. col into new structure - rowval[inz] = col[jlnk].rowval - nzval[inz] = col[jlnk].nzval - jlnk += 1 - inz += 1 - else - break - end - end - end - colptr[csc.n + 1] = inz - resize!(rowval, inz - 1) - resize!(nzval, inz - 1) - SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval) -end - - - -""" - $(SIGNATURES) - -Add SparseMatrixCSC matrix and [`SparseMatrixLNKX`](@ref) lnk, returning a SparseMatrixCSC -""" -Base.:+(lnk::SparseMatrixLNKX, csc::SparseMatrixCSC) = add_directly(lnk, csc) - -function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} - lnew=sum(nnz,lnkdictmatrices) - if lnew>0 - (;colptr,nzval,rowval,m,n)=cscmatrix - l=lnew+nnz(cscmatrix) - I=Vector{Ti}(undef,l) - J=Vector{Ti}(undef,l) - V=Vector{Tv}(undef,l) - i=1 - - for icsc=1:length(colptr)-1 - for j=colptr[icsc]:colptr[icsc+1]-1 - I[i]=icsc - J[i]=rowval[j] - V[i]=nzval[j] - i=i+1 - end - end - - ip=1 - for lnk in lnkdictmatrices - for j=1:n - k=lnk.colstart[j] - while k>0 - I[i]=lnk.rowval[k] - J[i]=j - V[i]=lnk.nzval[k] - k=lnk.colptr[k] - i=i+1 - end - end - ip=ip+1 - end - @static if VERSION>=v"1.10" - return SparseArrays.sparse!(I,J,V,m,n,+) - else - return SparseArrays.sparse(I,J,V,m,n,+) - end - end - return cscmatrix -end - - - -""" -$(SIGNATURES) - -Constructor from SparseMatrixLNKX. - -""" -function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixLNKX)::SparseMatrixCSC - csc = spzeros(lnk.m, lnk.n) - lnk + csc -end - -function SparseArrays.sparse(lnk::SparseMatrixLNKX) - lnk + spzeros(lnk.m, lnk.n) -end - -function Base.copy(S::SparseMatrixLNKX) - SparseMatrixLNKX(size(S, 1), - size(S, 2), - S.nnz, - S.nentries, - copy(S.colptr), - copy(S.colstart), - copy(S.rowvals), - copy(S.nzval)) -end From 41d23ccfda4fe1cf5f4f00e8b2ce7044cefca428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Sun, 7 Jul 2024 23:25:34 +0200 Subject: [PATCH 43/44] ExplicitImports --- Project.toml | 2 +- src/ExtendableSparse.jl | 37 +++++++++++-------------------------- test/Project.toml | 2 ++ test/runtests.jl | 5 +++++ 4 files changed, 19 insertions(+), 27 deletions(-) diff --git a/Project.toml b/Project.toml index 0372ecb..546b08a 100644 --- a/Project.toml +++ b/Project.toml @@ -40,7 +40,7 @@ Pardiso = "0.5.1" Requires = "1.1.3" Sparspak = "0.3.6" StaticArrays = "1.5.24" -julia = "1.6" +julia = "1.9" [extras] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 0dd8536..d348d00 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -1,13 +1,17 @@ module ExtendableSparse -using SparseArrays,StaticArrays -using LinearAlgebra -using Sparspak -using ILUZero + +using DocStringExtensions: DocStringExtensions, SIGNATURES, TYPEDEF,TYPEDFIELDS +using ILUZero: ILUZero, ldiv!, nnz using OhMyThreads: @tasks +using LinearAlgebra: LinearAlgebra, Diagonal, Hermitian, Symmetric, Tridiagonal, + cholesky, cholesky!, convert, lu!, mul!, norm, transpose +using SparseArrays: SparseArrays, AbstractSparseMatrix, SparseMatrixCSC, + dropzeros!, findnz, nzrange, sparse, spzeros +using Sparspak: Sparspak, sparspaklu, sparspaklu! +using StaticArrays: StaticArrays, SMatrix, SVector +using SuiteSparse: SuiteSparse +import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros -if !isdefined(Base, :get_extension) - using Requires -end # Define our own constant here in order to be able to # test things at least a little bit.. @@ -17,9 +21,7 @@ if USE_GPL_LIBS using SuiteSparse end -using DocStringExtensions -import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros include("matrix/sparsematrixcsc.jl") include("matrix/abstractsparsematrixextension.jl") @@ -71,23 +73,6 @@ export AbstractFactorization, LUFactorization, CholeskyFactorization, SparspakLU export issolver export factorize!, update! -@static if !isdefined(Base, :get_extension) - function __init__() - @require Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2" begin - include("../ext/ExtendableSparsePardisoExt.jl") - end - @require IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895" begin - include("../ext/ExtendableSparseIncompleteLUExt.jl") - end - @require AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c" begin - include("../ext/ExtendableSparseAlgebraicMultigridExt.jl") - end - @require AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" begin - include("../ext/ExtendableSparseAMGCLWrapExt.jl") - end - end -end - """ ``` ILUTPreconditioner(;droptol=1.0e-3) diff --git a/test/Project.toml b/test/Project.toml index 9ef7608..76e3ae8 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,8 +1,10 @@ [deps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c" +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e" +ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7" ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f" diff --git a/test/runtests.jl b/test/runtests.jl index 3af3ce4..b154d1d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,7 +8,12 @@ using BenchmarkTools using MultiFloats using ForwardDiff +using ExplicitImports +@testset "ExplicitImports" begin + @test ExplicitImports.check_no_implicit_imports(ExtendableSparse, allow_unanalyzable=(ExtendableSparse.Experimental,)) === nothing + @test ExplicitImports.check_no_stale_explicit_imports(ExtendableSparse, allow_unanalyzable=(ExtendableSparse.Experimental,)) === nothing +end @testset "Parallel" begin include("test_parallel.jl") From 24f73ab641398d0c2853d966938df06570cd19cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= Date: Tue, 16 Jul 2024 22:47:20 +0200 Subject: [PATCH 44/44] fix some docstrings, AMGCLWrap dependency --- Project.toml | 2 +- docs/src/internal.md | 11 ++++++++++ src/matrix/abstractsparsematrixextension.jl | 23 +++++++-------------- src/matrix/sparsematrixdilnkc.jl | 10 +++++++++ 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/Project.toml b/Project.toml index 546b08a..5bf880c 100644 --- a/Project.toml +++ b/Project.toml @@ -31,7 +31,7 @@ ExtendableSparseIncompleteLUExt = "IncompleteLU" ExtendableSparsePardisoExt = "Pardiso" [compat] -AMGCLWrap = "0.3.1,0.4" +AMGCLWrap = "0.4" AlgebraicMultigrid = "0.4,0.5,0.6" DocStringExtensions = "0.8, 0.9" ILUZero = "0.2" diff --git a/docs/src/internal.md b/docs/src/internal.md index 853dbce..71a10f3 100644 --- a/docs/src/internal.md +++ b/docs/src/internal.md @@ -13,6 +13,17 @@ Pages = ["sparsematrixlnk.jl"] Modules = [ExtendableSparse] Pages = ["sparsematrixcsc.jl"] ``` +## New API +Under development - aimed at multithreading +```@autodocs +Modules = [ExtendableSparse] +Pages = ["abstractsparsematrixextension.jl", + "abstractextendablesparsematrixcsc.jl", + "sparsematrixdilnkc.jl", + "genericextendablesparsematrixcsc.jl", + "genericmtextendablesparsematrixcsc.jl"] +``` + ## Misc methods diff --git a/src/matrix/abstractsparsematrixextension.jl b/src/matrix/abstractsparsematrixextension.jl index d8070fc..c206483 100644 --- a/src/matrix/abstractsparsematrixextension.jl +++ b/src/matrix/abstractsparsematrixextension.jl @@ -5,22 +5,13 @@ Abstract type for sparse matrix extension. Subtypes T_ext must implement: -Constructor T_ext(m,n) -SparseArrays.nnz(ext::T_ext) -Base.size(ext::T_ext) - - -Base.sum(extmatrices::Vector{T_ext}, csx) - - Add csx matrix and extension matrices (one per partition) and return csx matrix - -rawupdateindex!(ext::Text, op, v, i, j) where {Tv, Ti} - - Set ext[i,j]+=v, possibly insert entry into matrix. - - -Optional: - -Base.+(ext::T_ext, csx) - - Add extension matrix and csc/csr matrix, return csx matrix +- Constructor `T_ext(m,n)` +- `SparseArrays.nnz(ext::T_ext)` +- `Base.size(ext::T_ext)` +- `Base.sum(extmatrices::Vector{T_ext}, csx)`: add csr or csc matrix and extension matrices (one per partition) and return csx matrix +- `Base.+(ext::T_ext, csx)` (optional) - Add extension matrix and csc/csr matrix, return csx matrix +- `rawupdateindex!(ext::Text, op, v, i, j, tid) where {Tv, Ti}`: Set `ext[i,j]op=v`, possibly insert new entry into matrix. `tid` is a +task or partition id """ abstract type AbstractSparseMatrixExtension{Tv, Ti} <: AbstractSparseMatrix{Tv,Ti} end diff --git a/src/matrix/sparsematrixdilnkc.jl b/src/matrix/sparsematrixdilnkc.jl index a2cdea8..ea58534 100644 --- a/src/matrix/sparsematrixdilnkc.jl +++ b/src/matrix/sparsematrixdilnkc.jl @@ -103,6 +103,11 @@ function SparseMatrixDILNKC(csc::SparseArrays.SparseMatrixCSC{Tv, Ti}) where {Tv lnk end +""" +$(SIGNATURES) + +Find index in matrix. +""" function findindex(lnk::SparseMatrixDILNKC, i, j) if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) throw(BoundsError(lnk, (i, j))) @@ -137,6 +142,11 @@ function Base.getindex(lnk::SparseMatrixDILNKC{Tv, Ti}, i, j) where {Tv, Ti} end end +""" + $(SIGNATURES) + +Add entry. +""" function addentry!(lnk::SparseMatrixDILNKC, i, j, k, k0) # increase number of entries lnk.nentries += 1