diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml index 02aa07d..5458760 100644 --- a/.JuliaFormatter.toml +++ b/.JuliaFormatter.toml @@ -1,4 +1,7 @@ -style = "sciml" +style = "yas" always_for_in = false -separate_kwargs_with_semicolon = true format_markdown = true +import_to_using = false +pipe_to_function_call = false +short_to_long_function_def = false +always_use_return = false \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ff5995..882d933 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,15 +15,26 @@ jobs: fail-fast: false matrix: version: - - '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'. + - '1.9' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'. - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia. - 'nightly' os: - ubuntu-latest - - macos-latest - windows-latest + - macos-latest # arm + - macOS-13 # intel arch: - x64 + - aarch64 + exclude: + - os: ubuntu-latest + arch: aarch64 + - os: windows-latest + arch: aarch64 + - os: macOS-13 + arch: aarch64 + - os: macos-latest + arch: x64 steps: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v1 @@ -42,6 +53,8 @@ jobs: ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 + env: + JULIA_NUM_THREADS: 4 - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v3 docs: diff --git a/.gitignore b/.gitignore index 3ea90ad..068167e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ QUARRY docs/build *~ -Manifest.toml +Manifest*.toml .repl_history quarry diff --git a/Project.toml b/Project.toml index 65f17f1..5bf880c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,13 +1,15 @@ name = "ExtendableSparse" uuid = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3" authors = ["Juergen Fuhrmann "] -version = "1.4.1" +version = "1.5.0" [deps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" +OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Requires = "ae029012-a4dd-5104-9daa-d747884805df" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" @@ -29,7 +31,7 @@ ExtendableSparseIncompleteLUExt = "IncompleteLU" ExtendableSparsePardisoExt = "Pardiso" [compat] -AMGCLWrap = "0.3.1,0.4" +AMGCLWrap = "0.4" AlgebraicMultigrid = "0.4,0.5,0.6" DocStringExtensions = "0.8, 0.9" ILUZero = "0.2" @@ -38,7 +40,7 @@ Pardiso = "0.5.1" Requires = "1.1.3" Sparspak = "0.3.6" StaticArrays = "1.5.24" -julia = "1.6" +julia = "1.9" [extras] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" diff --git a/docs/make.jl b/docs/make.jl index 3a70bf5..5dbe7c4 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -5,6 +5,7 @@ function mkdocs() makedocs(; sitename = "ExtendableSparse.jl", modules = [ExtendableSparse], doctest = false, + warnonly = true, clean = false, authors = "J. Fuhrmann", repo = "https://github.com/j-fu/ExtendableSparse.jl", diff --git a/docs/src/internal.md b/docs/src/internal.md index 853dbce..71a10f3 100644 --- a/docs/src/internal.md +++ b/docs/src/internal.md @@ -13,6 +13,17 @@ Pages = ["sparsematrixlnk.jl"] Modules = [ExtendableSparse] Pages = ["sparsematrixcsc.jl"] ``` +## New API +Under development - aimed at multithreading +```@autodocs +Modules = [ExtendableSparse] +Pages = ["abstractsparsematrixextension.jl", + "abstractextendablesparsematrixcsc.jl", + "sparsematrixdilnkc.jl", + "genericextendablesparsematrixcsc.jl", + "genericmtextendablesparsematrixcsc.jl"] +``` + ## Misc methods diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl index 700bbda..d348d00 100644 --- a/src/ExtendableSparse.jl +++ b/src/ExtendableSparse.jl @@ -1,12 +1,17 @@ module ExtendableSparse -using SparseArrays,StaticArrays -using LinearAlgebra -using Sparspak -using ILUZero -if !isdefined(Base, :get_extension) - using Requires -end +using DocStringExtensions: DocStringExtensions, SIGNATURES, TYPEDEF,TYPEDFIELDS +using ILUZero: ILUZero, ldiv!, nnz +using OhMyThreads: @tasks +using LinearAlgebra: LinearAlgebra, Diagonal, Hermitian, Symmetric, Tridiagonal, + cholesky, cholesky!, convert, lu!, mul!, norm, transpose +using SparseArrays: SparseArrays, AbstractSparseMatrix, SparseMatrixCSC, + dropzeros!, findnz, nzrange, sparse, spzeros +using Sparspak: Sparspak, sparspaklu, sparspaklu! +using StaticArrays: StaticArrays, SMatrix, SVector +using SuiteSparse: SuiteSparse +import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros + # Define our own constant here in order to be able to # test things at least a little bit.. @@ -17,58 +22,57 @@ if USE_GPL_LIBS end -using DocStringExtensions - -import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros include("matrix/sparsematrixcsc.jl") +include("matrix/abstractsparsematrixextension.jl") include("matrix/sparsematrixlnk.jl") +include("matrix/sparsematrixdilnkc.jl") +include("matrix/abstractextendablesparsematrixcsc.jl") include("matrix/extendable.jl") +include("matrix/genericmtextendablesparsematrixcsc.jl") +include("matrix/genericextendablesparsematrixcsc.jl") + +const ExtendableSparseMatrix=ExtendableSparseMatrixCSC +const MTExtendableSparseMatrixCSC{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixDILNKC{Tv,Ti},Tv,Ti} +MTExtendableSparseMatrixCSC(m,n,args...)=MTExtendableSparseMatrixCSC{Float64,Int64}(m,n,args...) + +const STExtendableSparseMatrixCSC{Tv,Ti}=GenericExtendableSparseMatrixCSC{SparseMatrixDILNKC{Tv,Ti},Tv,Ti} +STExtendableSparseMatrixCSC(m,n,args...)=STExtendableSparseMatrixCSC{Float64,Int64}(m,n,args...) -export SparseMatrixLNK, - ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse + +export ExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC, STExtendableSparseMatrixCSC, GenericMTExtendableSparseMatrixCSC +export SparseMatrixLNK, ExtendableSparseMatrix,flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset!, nnznew +export partitioning! export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet include("factorizations/factorizations.jl") +include("experimental/Experimental.jl") + +include("factorizations/simple_iteration.jl") +export simple, simple! + +include("matrix/sprand.jl") +export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark + +export rawupdateindex!, updateindex! + + + + export JacobiPreconditioner, ILU0Preconditioner, ILUZeroPreconditioner, PointBlockILUZeroPreconditioner, ParallelJacobiPreconditioner, ParallelILU0Preconditioner, - BlockPreconditioner,allow_views, - reorderlinsys + BlockPreconditioner,allow_views export AbstractFactorization, LUFactorization, CholeskyFactorization, SparspakLU export issolver export factorize!, update! -include("factorizations/simple_iteration.jl") -export simple, simple! - -include("matrix/sprand.jl") -export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark - - -@static if !isdefined(Base, :get_extension) - function __init__() - @require Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2" begin - include("../ext/ExtendableSparsePardisoExt.jl") - end - @require IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895" begin - include("../ext/ExtendableSparseIncompleteLUExt.jl") - end - @require AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c" begin - include("../ext/ExtendableSparseAlgebraicMultigridExt.jl") - end - @require AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" begin - include("../ext/ExtendableSparseAMGCLWrapExt.jl") - end - end -end - """ ``` ILUTPreconditioner(;droptol=1.0e-3) diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl new file mode 100644 index 0000000..71995de --- /dev/null +++ b/src/experimental/Experimental.jl @@ -0,0 +1,44 @@ +module Experimental +using ExtendableSparse, SparseArrays +using LinearAlgebra +using SparseArrays: AbstractSparseMatrixCSC +import SparseArrays: nonzeros, getcolptr,nzrange +import ExtendableSparse: flush!, reset!, rawupdateindex!, findindex +using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash +using ExtendableSparse: AbstractExtendableSparseMatrixCSC, AbstractSparseMatrixExtension +using DocStringExtensions +using Metis +using Base.Threads +using OhMyThreads: @tasks +import ExtendableSparse: factorize!, update!, partitioning! + +include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl")) + +include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "ilu_Al-Kurdi_Mittal.jl")) +#using .ILUAM +include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "pilu_Al-Kurdi_Mittal.jl")) +#using .PILUAM + +include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel" ,"iluam.jl")) +include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "piluam.jl")) + +@eval begin + @makefrommatrix ILUAMPreconditioner + @makefrommatrix PILUAMPreconditioner +end + +function factorize!(p::PILUAMPreconditioner, A::ExtendableSparseMatrixParallel) + p.A = A + update!(p) + p +end + +export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK +export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, compare_matrices_light +export ILUAMPreconditioner, PILUAMPreconditioner +export reorderlinsys, nnz_noflush + + + +end + diff --git a/src/experimental/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/experimental/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl new file mode 100644 index 0000000..08268ee --- /dev/null +++ b/src/experimental/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl @@ -0,0 +1,464 @@ +include("supersparse.jl") +include("preparatory.jl") +#include("prep_time.jl") + +mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti} + """ + Final matrix data + """ + cscmatrix::SparseMatrixCSC{Tv, Ti} + + """ + Linked list structures holding data of extension, one for each thread + """ + lnkmatrices::Vector{SuperSparseMatrixLNK{Tv, Ti}} + + """ + Number of Nodes per Threads + """ + nnts::Vector{Ti} + + """ + sortednodesperthread[i,j] = local index of the j-th global column in the i-th LNK matrix + (this is used e.g. when assembling the matrix) + """ + sortednodesperthread::Matrix{Ti} + + """ + depth+1 x nn matrix, + old_noderegions[i,j] = region in which node j is, in level i + old refers to the fact that j is the 'old index' (i.e. grid index, not matrix index, see 'new_indices') + """ + old_noderegions::Matrix{Ti} + + """ + cellsforpart[i] is a vector containing all cells in the i-th region + cellsforpart has length nt*depth + 1 + """ + cellsforpart::Vector{Vector{Ti}} + + """ + globalindices[i][j] = index in the global (ESMP & CSC) matrix of the j-th column of the i-th LNK matrix + (this maps the local indices (in the LNKs) to the global indices (ESMP & CSC)) + """ + globalindices::Vector{Vector{Ti}} + + """ + For some applications such as the parallel ILU preconditioner, a block form is necessary. + Thus, the columns are reordered and the A[i,i] does not correspond to the i-th node of the grid, + but A[new_indices[i], new_indices[i]] does + """ + new_indices::Vector{Ti} + + """ + Reverse: rev_new_indices[new_indices[i]] = i, for all i + """ + rev_new_indices::Vector{Ti} + + """ + starts[i] gives the first column of the i-th region, i.e. starts[1] = 1 + starts has length nt*depth + 1 + """ + start::Vector{Ti} + + """ + cellparts[i] = region of the i-th cell + """ + cellparts::Vector{Ti} + + """ + Number of threads + """ + nt::Ti + + """ + How often is the separator partitioned? (if never: depth = 1) + """ + depth::Ti + + phash::UInt64 + + """ + Number of rows / number of nodes in grid + """ + n::Ti + + """ + Number of columns / number of nodes in grid (only works for square matrices) + """ + m::Ti + + +end + + +""" +$(SIGNATURES) + +`ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct = true) where {Tv, Ti <: Integer}` + +Create an ExtendableSparseMatrixParallel based on a grid. +The grid is specified by nc (number of cells), nn (number of nodes) and the `mat_cell_node` (i.e. grid[CellNodes] if ExtendableGrids is used). +Here, `mat_cell_node[k,i]` is the i-th node in the k-th cell. +The matrix structure is made for parallel computations with `nt` threads. +`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again +`block_struct=true` means, the matrix should be reordered two have a block structure, this is necessary for parallel ILU, for `false`, the matrix is not reordered +""" +function ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct = true) where {Tv, Ti <: Integer} + nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth, Ti; block_struct) + csc = spzeros(Tv, Ti, nn, nn) + lnk = [SuperSparseMatrixLNK{Tv, Ti}(nn, nnts[tid]) for tid=1:nt] + ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m) +end + + +""" +$(SIGNATURES) + +`addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}` + +`A[i,j] += v` +This function should be used, if the thread in which the entry appears is known (`tid`). +If the thread is not known, use `addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false)`, this function calculates `tid`. +If you know that the entry is not yet known to the CSC structure, set `known_that_unknown=true`. +""" +function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer} + if known_that_unknown + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + return + end + + if updatentryCSC2!(A.cscmatrix, i, j, v) + else + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + end +end + + +""" +$(SIGNATURES) + +`addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer}` + +A[i,j] += v, using any partition. +If the partition should be specified (for parallel use), use +`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}`. +""" +function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer} + if known_that_unknown + level, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]]) + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + return + end + + if updatentryCSC2!(A.cscmatrix, i, j, v) + else + _, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]]) + A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v + end +end + +#--------------------------------- + +""" +$(SIGNATURES) +`updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j) where {Tv, Ti <: Integer` + +Update element of the matrix with operation `op`. +Use this method if the 'thread of the element' is not known, otherwise use `updateindex!(ext, op, v, i, j, tid)`. +""" +function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + return + else + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + updateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +""" +$(SIGNATURES) +`updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j, tid) where {Tv, Ti <: Integer` + +Update element of the matrix with operation `op`. +Use this method if the 'thread of the element' is known, otherwise use `updateindex!(ext, op, v, i, j)`. +""" +function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j, + tid) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + return + else + updateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +""" +$(SIGNATURES) +`rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j) where {Tv, Ti <: Integer}` + +Like [`updateindex!`](@ref) but without checking if v is zero. +Use this method if the 'thread of the element' is not known. +""" +function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + rawupdateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +""" +$(SIGNATURES) +`rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j, tid) where {Tv, Ti <: Integer}` + +Like [`updateindex!`](@ref) but without checking if v is zero. +Use this method if the 'thread of the element' is known +""" +function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + op, + v, + i, + j, + tid) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + rawupdateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j]) + end + ext +end + +""" +$(SIGNATURES) +``Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti}, i::Integer, j::Integer) where {Tv, Ti <: Integer` + +Find index in CSC matrix and return value, if it exists. +Otherwise, return value from extension. +""" +function Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + i::Integer, + j::Integer) where {Tv, Ti <: Integer} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + return ext.cscmatrix.nzval[k] + end + + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + ext.lnkmatrices[tid][i, ext.sortednodesperthread[tid, j]] + +end + +""" +$(SIGNATURES) +`Base.setindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, v::Union{Number,AbstractVecOrMat}, i::Integer, j::Integer) where {Tv, Ti}` + +Find index in CSC matrix and set value if it exists. Otherwise, +set index in extension if `v` is nonzero. +""" +function Base.setindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, + v::Union{Number,AbstractVecOrMat}, + i::Integer, + j::Integer) where {Tv, Ti} + k = ExtendableSparse.findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = v + else + level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]]) + #@info typeof(tid), typeof(j) + jj = ext.sortednodesperthread[tid, j] + ext.lnkmatrices[tid][i, jj] = v + end +end + + + +#------------------------------------ + +""" +$(SIGNATURES) + +Reset matrix, such that CSC and LNK have no non-zero entries. +""" +function reset!(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer} + A.cscmatrix = spzeros(Tv, Ti, A.n, A.m) + A.lnkmatrices = [SuperSparseMatrixLNK{Tv, Ti}(A.n, A.nnts[tid]) for tid=1:A.nt] +end + +""" +$(SIGNATURES) + +Compute number of non-zero elements, after flush. +""" +function nnz_flush(ext::ExtendableSparseMatrixParallel) + flush!(ext) + return nnz(ext.cscmatrix) +end + +""" +$(SIGNATURES) + +Compute number of non-zero elements, without flush. +""" +function nnz_noflush(ext::ExtendableSparseMatrixParallel) + return nnz(ext.cscmatrix), sum([ext.lnkmatrices[i].nnz for i=1:ext.nt]) +end + +function matrixindextype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer} + Ti +end + +function matrixvaluetype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer} + Tv +end + + +""" +$(SIGNATURES) + +Show matrix, without flushing +""" +function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrixParallel) + #flush!(ext) + xnnzCSC, xnnzLNK = nnz_noflush(ext) + m, n = size(ext) + print(io, + m, + "×", + n, + " ", + typeof(ext), + " with ", + xnnzCSC, + " stored ", + xnnzCSC == 1 ? "entry" : "entries", + " in CSC and ", + xnnzLNK, + " stored ", + xnnzLNK == 1 ? "entry" : "entries", + " in LNK. CSC:") + + if !haskey(io, :compact) + io = IOContext(io, :compact => true) + end + + if !(m == 0 || n == 0 || xnnzCSC == 0) + print(io, ":\n") + Base.print_array(IOContext(io), ext.cscmatrix) + end +end + +""" +`function entryexists2(CSC, i, j)` + +Find out if CSC already has an nonzero entry at i,j without any allocations +""" +function entryexists2(CSC, i, j) #find out if CSC already has an nonzero entry at i,j + #vals = + #ids = CSC.colptr[j]:(CSC.colptr[j+1]-1) + i in view(CSC.rowval, CSC.colptr[j]:(CSC.colptr[j+1]-1)) +end + +""" +$(SIGNATURES) + +Find out if i,j is non-zero entry in CSC, if yes, update entry with += v and return `true`, if not return `false` +""" +function updatentryCSC2!(CSC::SparseArrays.SparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer, v) where {Tv, Ti <: Integer} + p1 = CSC.colptr[j] + p2 = CSC.colptr[j+1]-1 + + searchk = searchsortedfirst(view(CSC.rowval, p1:p2), i) + p1 - 1 + + if (searchk <= p2) && (CSC.rowval[searchk] == i) + CSC.nzval[searchk] += v + return true + else + return false + end +end + + + + +Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n) +include("struct_flush.jl") + + + +import LinearAlgebra.mul! + +""" +$(SIGNATURES) +```function LinearAlgebra.mul!(y, A, x)``` + +This overwrites the mul! function for A::ExtendableSparseMatrixParallel +""" +function LinearAlgebra.mul!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv, Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer} + #@info "my matvec" + _, nnzLNK = nnz_noflush(A) + @assert nnzLNK == 0 + #mul!(y, A.cscmatrix, x) + matvec!(y, A, x) +end + + +""" +$(SIGNATURES) +```function matvec!(y, A, x)``` + +y <- A*x, where y and x are vectors and A is an ExtendableSparseMatrixParallel +this computation is done in parallel, it has the same result as y = A.cscmatrix*x +""" +function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer} + nt = A.nt + depth = A.depth + colptr = A.cscmatrix.colptr + nzv = A.cscmatrix.nzval + rv = A.cscmatrix.rowval + + LinearAlgebra._rmul_or_fill!(y, 0.0) + + for level=1:depth + @threads for tid::Int64=1:nt + for col::Int64=A.start[(level-1)*nt+tid]:A.start[(level-1)*nt+tid+1]-1 + for row::Int64=colptr[col]:colptr[col+1]-1 # in nzrange(A, col) + y[rv[row]] += nzv[row]*x[col] + end + end + end + end + + + + @threads for tid=1:1 + for col::Int64=A.start[depth*nt+1]:A.start[depth*nt+2]-1 + for row::Int64=colptr[col]:colptr[col+1]-1 #nzrange(A, col) + y[rv[row]] += nzv[row]*x[col] + end + end + end + + y +end diff --git a/src/experimental/ExtendableSparseMatrixParallel/ilu_Al-Kurdi_Mittal.jl b/src/experimental/ExtendableSparseMatrixParallel/ilu_Al-Kurdi_Mittal.jl new file mode 100644 index 0000000..ad2207d --- /dev/null +++ b/src/experimental/ExtendableSparseMatrixParallel/ilu_Al-Kurdi_Mittal.jl @@ -0,0 +1,267 @@ +#module ILUAM +#using LinearAlgebra, SparseArrays + +import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + +#@info "ILUAM" + +mutable struct ILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + A::AbstractMatrix + +end + + +function iluAM!(ILU::ILUAMPrecon{Tv,Ti}, A::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <:Integer} + diag = ILU.diag + nzval = ILU.nzval + + nzval = copy(A.nzval) + diag = Vector{Ti}(undef, n) + ILU.A = A + colptr = A.colptr + rowval = A.rowval + n = A.n + point = zeros(Ti, n) + + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(Ti) + end + end + +end + +function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer} + #@info "iluAM" + nzval = copy(A.nzval) + colptr = A.colptr + rowval = A.rowval + #nzval = ILU.nzval + n = A.n # number of columns + point = zeros(Ti, n) #Vector{Ti}(undef, n) + diag = Vector{Ti}(undef, n) + + # find diagonal entries + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + if rowval[v] == j + diag[j] = v + break + end + #elseif rowval[v] + end + end + + #@info diag[1:20]' + #@info diag[end-20:end]' + + # compute L and U + for j=1:n + for v=colptr[j]:colptr[j+1]-1 ## start at colptr[j]+1 ?? + point[rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + #nzval[v] /= nzval[diag[i]] + for w=diag[i]+1:colptr[i+1]-1 + k = point[rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + + for v=colptr[j]:colptr[j+1]-1 + point[rowval[v]] = zero(Ti) + end + end + #nzval, diag + ILUAMPrecon{Tv,Ti}(diag, nzval, A) +end + +function forward_subst_old!(y, v, nzval, diag, A) + #@info "fso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" + n = A.n + colptr = A.colptr + rowval = A.rowval + + for i in eachindex(y) + y[i] = zero(Float64) + end + + #y .= 0 + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + y +end + + +function backward_subst_old!(x, y, nzval, diag, A) + #@info "bso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" + n = A.n + colptr = A.colptr + rowval = A.rowval + @inbounds for j=n:-1:1 + x[j] = y[j] / nzval[diag[j]] + + for i=colptr[j]:diag[j]-1 + y[rowval[i]] -= nzval[i]*x[j] + end + + end + x +end + + +function ldiv!(x, ILU::ILUAMPrecon, b) + #t = @elapsed begin + #@info "iluam ldiv 1" + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + backward_subst_old!(x, y, nzval, diag, A) + #@info "ILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)), nnz(A) #, A[10,10] + #, b[1], x[1], y[1]#maximum(abs.(b)), maximum(abs.(x)) + #end + #println("$t") #@info t + x +end + +function ldiv!(ILU::ILUAMPrecon, b) + #@info "iluam ldiv 2" + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + backward_subst_old!(b, y, nzval, diag, A) + b +end + +function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer} + x = copy(b) + ldiv!(x, ilu, b) + x +end + +function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + +#= +function forward_subst!(y, v, ilu) #::ILUAMPrecon{T,N}) where {T,N<:Integer} + @info "fw" + n = ilu.A.n + nzval = ilu.nzval + diag = ilu.diag + colptr = ilu.A.colptr + rowval = ilu.A.rowval + + for i in eachindex(y) + y[i] = zero(Float64) + end + + #y .= 0 + @inbounds for j=1:n + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + y +end + +function backward_subst!(x, y, ilu) #::ILUAMPrecon{T,N}) where {T,N<:Integer} + @info "bw" + n = ilu.A.n + nzval = ilu.nzval + diag = ilu.diag + colptr = ilu.A.colptr + rowval = ilu.A.rowval + #wrk = copy(y) + @inbounds for j=n:-1:1 + x[j] = y[j] / nzval[diag[j]] + + for i=colptr[j]:diag[j]-1 + y[rowval[i]] -= nzval[i]*x[j] + end + + end + x +end + +function iluam_subst(ILU::ILUAMPrecon, b) + y = copy(b) + forward_subst!(y, b, ILU) + z = copy(b) + backward_subst!(z, y, ILU) + z +end + + + +function iluam_subst_old(ILU::ILUAMPrecon, b) + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, A) + z = copy(b) + backward_subst_old!(z, y, nzval, diag, A) + #backward_subst!(z, y, ILU) + z +end +=# + + + +#end \ No newline at end of file diff --git a/src/experimental/ExtendableSparseMatrixParallel/iluam.jl b/src/experimental/ExtendableSparseMatrixParallel/iluam.jl new file mode 100644 index 0000000..24b75be --- /dev/null +++ b/src/experimental/ExtendableSparseMatrixParallel/iluam.jl @@ -0,0 +1,35 @@ +mutable struct ILUAMPreconditioner <: AbstractPreconditioner + A::ExtendableSparseMatrix + factorization::ILUAMPrecon + phash::UInt64 + function ILUAMPreconditioner() + p = new() + p.phash = 0 + p + end +end + +""" +``` +ILUAMPreconditioner() +ILUAMPreconditioner(matrix) +``` +Incomplete LU preconditioner with zero fill-in using ... . This preconditioner +also calculates and stores updates to the off-diagonal entries and thus delivers better convergence than the [`ILU0Preconditioner`](@ref). +""" +function ILUAMPreconditioner end + +function update!(p::ILUAMPreconditioner) + flush!(p.A) + if p.A.phash != p.phash + p.factorization = iluAM(p.A.cscmatrix) + p.phash=p.A.phash + else + iluam!(p.factorization, p.A.cscmatrix) + end + p +end + +allow_views(::ILUAMPreconditioner)=true +allow_views(::Type{ILUAMPreconditioner})=true + diff --git a/src/experimental/ExtendableSparseMatrixParallel/pilu_Al-Kurdi_Mittal.jl b/src/experimental/ExtendableSparseMatrixParallel/pilu_Al-Kurdi_Mittal.jl new file mode 100644 index 0000000..ad9529b --- /dev/null +++ b/src/experimental/ExtendableSparseMatrixParallel/pilu_Al-Kurdi_Mittal.jl @@ -0,0 +1,351 @@ +#module PILUAM +#using Base.Threads +#using LinearAlgebra, SparseArrays + +import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz + +#@info "PILUAM" + +mutable struct PILUAMPrecon{T,N} + + diag::AbstractVector + nzval::AbstractVector + A::AbstractMatrix + start::AbstractVector + nt::Integer + depth::Integer + +end + +function use_vector_par(n, nt, Ti) + point = [Vector{Ti}(undef, n) for tid=1:nt] + @threads for tid=1:nt + point[tid] = zeros(Ti, n) + end + point +end + +function compute_lu!(nzval, point, j0, j1, tid, rowval, colptr, diag, Ti) + for j=j0:j1-1 + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[tid][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = zero(Ti) + end + end +end + +function piluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} + #@info "piluAM!" + diag = ILU.diag + nzval = ILU.nzval + ILU.A = A + start = ILU.start + + ILU.nt = A.nt + nt = A.nt + + ILU.depth = A.depth + depth = A.depth + + + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + n = A.cscmatrix.n # number of columns + diag = Vector{Ti}(undef, n) + nzval = Vector{Tv}(undef, length(rowval)) #copy(A.nzval) + point = use_vector_par(n, A.nt, Int32) + + + @threads for tid=1:depth*nt+1 + for j=start[tid]:start[tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + nzval[v] = A.cscmatrix.nzval[v] + if rowval[v] == j + diag[j] = v + end + #elseif rowval[v] + end + end + end + + for level=1:depth + @threads for tid=1:nt + for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[tid][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = zero(Ti) + end + end + end + end + + #point = zeros(Ti, n) #Vector{Ti}(undef, n) + for j=start[depth*nt+1]:start[depth*nt+2]-1 + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[1][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = zero(Ti) + end + end + +end + +function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer} + start = A.start + nt = A.nt + depth = A.depth + + colptr = A.cscmatrix.colptr + rowval = A.cscmatrix.rowval + nzval = Vector{Tv}(undef, length(rowval)) #copy(A.nzval) + n = A.cscmatrix.n # number of columns + diag = Vector{Ti}(undef, n) + point = use_vector_par(n, A.nt, Int32) + + # find diagonal entries + # + + #= + for j=1:n + for v=colptr[j]:colptr[j+1]-1 + nzval[v] = A.cscmatrix.nzval[v] + if rowval[v] == j + diag[j] = v + #break + end + #elseif rowval[v] + end + end + =# + + + + @threads for tid=1:depth*nt+1 + for j=start[tid]:start[tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + nzval[v] = A.cscmatrix.nzval[v] + if rowval[v] == j + diag[j] = v + end + #elseif rowval[v] + end + end + end + + + #@info diag[1:20]' + #@info diag[end-20:end]' + + for level=1:depth + @threads for tid=1:nt + for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1 + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[tid][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[tid][rowval[v]] = zero(Ti) + end + end + end + end + + #point = zeros(Ti, n) #Vector{Ti}(undef, n) + for j=start[depth*nt+1]:start[depth*nt+2]-1 + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = v + end + + for v=colptr[j]:diag[j]-1 + i = rowval[v] + for w=diag[i]+1:colptr[i+1]-1 + k = point[1][rowval[w]] + if k>0 + nzval[k] -= nzval[v]*nzval[w] + end + end + end + + for v=diag[j]+1:colptr[j+1]-1 + nzval[v] /= nzval[diag[j]] + end + + for v=colptr[j]:colptr[j+1]-1 + point[1][rowval[v]] = zero(Ti) + end + end + + #nzval, diag + PILUAMPrecon{Tv,Ti}(diag, nzval, A.cscmatrix, start, nt, depth) +end + +function forward_subst_old!(y, v, nzval, diag, start, nt, depth, A) + #@info "pfso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" + #@info "fwo" + n = A.n + colptr = A.colptr + rowval = A.rowval + + y .= 0 + + for level=1:depth + @threads for tid=1:nt + @inbounds for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1 + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + end + end + + @inbounds for j=start[depth*nt+1]:start[depth*nt+2]-1 + y[j] += v[j] + for v=diag[j]+1:colptr[j+1]-1 + y[rowval[v]] -= nzval[v]*y[j] + end + end + +end + + +function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) + #@info "pbso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])" + + #@info "bwo" + n = A.n + colptr = A.colptr + rowval = A.rowval + #wrk = copy(y) + + + @inbounds for j=start[depth*nt+2]-1:-1:start[depth*nt+1] + x[j] = y[j] / nzval[diag[j]] + + for i=colptr[j]:diag[j]-1 + y[rowval[i]] -= nzval[i]*x[j] + end + + end + + for level=depth:-1:1 + @threads for tid=1:nt + @inbounds for j=start[(level-1)*nt+tid+1]-1:-1:start[(level-1)*nt+tid] + x[j] = y[j] / nzval[diag[j]] + for i=colptr[j]:diag[j]-1 + y[rowval[i]] -= nzval[i]*x[j] + end + end + end + end + +end + + +function ldiv!(x, ILU::PILUAMPrecon, b) + #@info "piluam ldiv 1" + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + start = ILU.start + nt = ILU.nt + depth = ILU.depth + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, start, nt, depth, A) + backward_subst_old!(x, y, nzval, diag, start, nt, depth, A) + #@info "PILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)), nnz(A) #, A[10,10] + #@info "PILUAM:", maximum(abs.(b-A*x)), b[1], x[1], maximum(abs.(b)), maximum(abs.(x)) + x +end + +function ldiv!(ILU::PILUAMPrecon, b) + #@info "piluam ldiv 2" + nzval = ILU.nzval + diag = ILU.diag + A = ILU.A + start = ILU.start + nt = ILU.nt + depth = ILU.depth + y = copy(b) + #forward_subst!(y, b, ILU) + forward_subst_old!(y, b, nzval, diag, start, nt, depth, A) + backward_subst_old!(b, y, nzval, diag, start, nt, depth, A) + b +end + +function \(ilu::PILUAMPrecon{T,N}, b) where {T,N<:Integer} + x = copy(b) + ldiv!(x, ilu, b) + x +end + +function nnz(ilu::PILUAMPrecon{T,N}) where {T,N<:Integer} + length(ilu.nzval) +end + +#end \ No newline at end of file diff --git a/src/experimental/ExtendableSparseMatrixParallel/piluam.jl b/src/experimental/ExtendableSparseMatrixParallel/piluam.jl new file mode 100644 index 0000000..50a46fd --- /dev/null +++ b/src/experimental/ExtendableSparseMatrixParallel/piluam.jl @@ -0,0 +1,36 @@ +mutable struct PILUAMPreconditioner <: AbstractPreconditioner + A::ExtendableSparseMatrixParallel + factorization::PILUAMPrecon + phash::UInt64 + function PILUAMPreconditioner() + p = new() + p.phash = 0 + p + end +end + +""" +``` +PILUAMPreconditioner() +PILUAMPreconditioner(matrix) +``` +Incomplete LU preconditioner with zero fill-in using ... . This preconditioner +also calculates and stores updates to the off-diagonal entries and thus delivers better convergence than the [`ILU0Preconditioner`](@ref). +""" +function PILUAMPreconditioner end + +function update!(p::PILUAMPreconditioner) + #@warn "Should flush now", nnz_noflush(p.A) + flush!(p.A) + if p.A.phash != p.phash + p.factorization = piluAM(p.A) + p.phash=p.A.phash + else + piluAM!(p.factorization, p.A) + end + p +end + +allow_views(::PILUAMPreconditioner)=true +allow_views(::Type{PILUAMPreconditioner})=true + diff --git a/src/experimental/ExtendableSparseMatrixParallel/preparatory.jl b/src/experimental/ExtendableSparseMatrixParallel/preparatory.jl new file mode 100644 index 0000000..fe5686f --- /dev/null +++ b/src/experimental/ExtendableSparseMatrixParallel/preparatory.jl @@ -0,0 +1,941 @@ +""" +`function preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth)` + +`nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid). +`nt` is the number of threads. +`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... +To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here. + +This should be somewhere else, longterm +""" +function preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth, Ti; + sequential=false, assembly=:cellwise, + minsize_sepa=10, do_print=false, check_partition=false, ne=0, ce=[], mat_edge_node=[], block_struct=true) + #grid = getgrid(nm; x0, x1) + adepth = 0 + if sequential + (allcells, start, cellparts, adepth) = grid_to_graph_cellwise_nogrid!(mat_cell_node, nc, nn, nt, depth; minsize_sepa, do_print)#) + else + (allcells, start, cellparts, adepth) = grid_to_graph_cellwise_par_nogrid!(mat_cell_node, nc, nn, nt, depth; minsize_sepa, do_print) + end + + if (adepth != depth) && do_print + @info "The requested depth of partitioning is too high. The depth is set to $adepth." + end + depth = adepth + + if assembly == :cellwise + cfp = bettercellsforpart(cellparts, depth*nt+1) + + else + edgeparts = edgewise_partition_from_cellwise_partition(nc, ne, ce, cellparts) + cfp = bettercellsforpart(edgeparts, depth*nt+1) + end + + + if check_partition + if assembly == :cellwise + validate_partition(nn, mat_cell_node, cellparts, start, allcells, nt, depth, assembly) + else + validate_partition(nn, mat_edge_node, cellparts, start, allcells, nt, depth, assembly) + end + end + + #@info length.(cfp) + #@info minimum(cellparts), maximum(cellparts), nt, depth + + (nnts, s, onr, gi, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush( + cellparts, allcells, start, nn, Ti, nt, depth; block_struct + ) + + + return nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, adepth +end + + + +""" +`function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)` + +After the cellregions (partitioning of the grid) of the grid have been computed, other things have to be computed, such as `sortednodesperthread` a depth+1 x num_nodes matrix, here `sortednodesperthreads[i,j]` is the point at which the j-th node appears in the i-th level matrix in the corresponding submatrix. +`cellregs` contains the partiton for each cell. +Furthermore, `nnts` (number of nodes of the threads) is computed, which contain for each thread the number of nodes that are contained in the cells of that thread. +`allcells` and `start` together behave like the rowval and colptr arrays of a CSC matrix, such that `allcells[start[j]:start[j+1]-1]` are all cells that contain the j-th node. +`nn` is the number of nodes in the grid. +`Ti` is the type (Int64,...) of the elements in the created arrays. +`nt` is the number of threads. +`block_struct=true` means, the matrix should be reordered two have a block structure, this is necessary for parallel ILU, for `false`, the matrix is not reordered +""" +function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt, depth; block_struct = true) + + #num_matrices = maximum(cellregs) + #depth = Int(floor((num_matrices-1)/nt)) + + #loop over each node, get the cellregion of the cell (the one not in the separator) write the position of that node inside the cellregions sorted ranking into a long vector + #nnts = [zeros(Ti, nt+1) for i=1:depth+1] + nnts = zeros(Ti, nt) + #noderegs_max_tmp = 0 + old_noderegions = zeros(Ti, (depth+1, nn)) + + # Count nodes per thread: + tmp = zeros(depth+1) + for j=1:nn + cells = @view allcells[start[j]:start[j+1]-1] + sortedcellregs = unique(sort(cellregs[cells])) + #tmp = [] + tmpctr = 1 + for cr in sortedcellregs + crmod = (cr-1)%nt+1 + level = Int(ceil(cr/nt)) + #nnts[crmod] += 1 + old_noderegions[level,j] = crmod + if !(crmod in tmp[1:tmpctr-1]) + nnts[crmod] += 1 + #sortednodesperthread[crmod,j] = nnts[crmod] #nnts[i][cr] + #push!(tmp, crmod) + if tmpctr > depth+1 + @info "Cellregs: ", sortedcellregs + @info "Levels : ", Int.(ceil.(sortedcellregs/nt)) + @info "PartsMod: ", ((sortedcellregs.-1).%nt).+1 + end + tmp[tmpctr] = crmod + tmpctr += 1 + end + end + end + + # Reorder inidices to receive a block structure: + # Taking the original matrix [a_ij] and mapping each i and j to new_indices[i] and new_indices[j], gives a block structure + # the reverse is also defined rev_new_indices[new_indices[k]] = k + # From now on we will only use this new ordering + counter_for_reorder = zeros(Ti, depth*nt+1) + for j=1:nn + level, reg = last_nz(old_noderegions[:, j]) + counter_for_reorder[(level-1)*nt + reg] += 1 #(reg-1)*depth + level] += 1 + end + + starts = vcat([0], cumsum(counter_for_reorder)) + counter_for_reorder2 = zeros(Ti, depth*nt+1) + new_indices = Vector{Ti}(undef, nn) + rev_new_indices = Vector{Ti}(undef, nn) + origin = Vector{Ti}(undef, nn) + for j=1:nn + level, reg = last_nz(old_noderegions[:, j]) + counter_for_reorder2[(level-1)*nt + reg] += 1 + origin[j] = reg + new_indices[j] = starts[(level-1)*nt + reg]+counter_for_reorder2[(level-1)*nt + reg] + rev_new_indices[new_indices[j]] = j + end + starts .+= 1 + + if !block_struct + new_indices = collect(1:nn) + rev_new_indices = collect(1:nn) + starts = [] + end + + # Build sortednodesperthread and globalindices array: + # They are inverses of each other: globalindices[tid][sortednodeperthread[tid][j]] = j + # Note that j has to be a `new index` + + sortednodesperthread = zeros(Ti, (nt, nn)) #vvcons(Ti, nnts) + globalindices = vvcons(Ti, nnts) + gictrs = zeros(Ti, nt) + + for nj=1:nn + oj = rev_new_indices[nj] + cells = @view allcells[start[oj]:start[oj+1]-1] + sortedcellregs = unique(sort(cellregs[cells])) + #tmp = [] + tmpctr = 1 + for cr in sortedcellregs + crmod = (cr-1)%nt+1 + #level = Int(ceil(cr/nt)) + if !(crmod in tmp[1:tmpctr-1]) + gictrs[crmod] += 1 # , level] += 1 + sortednodesperthread[crmod,nj] = gictrs[crmod] + globalindices[crmod][gictrs[crmod]] = nj + #push!(tmp, crmod) + tmp[tmpctr] = crmod + tmpctr += 1 + end + end + end + + nnts, sortednodesperthread, old_noderegions, globalindices, new_indices, rev_new_indices, starts +end + + + + + +""" +`function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes)` + +This function partitons the separator, which is done if `depth`>1 (see `grid_to_graph_ps_multi!` and/or `preparatory_multi_ps`). +`cellregs` contains the regions/partitions/colors of each cell. +`nc` is the number of cells in the grid. +`ACSC` is the adjacency matrix of the graph of the (separator-) grid (vertex in graph is cell in grid, edge in graph means two cells share a node) stored as a CSC. +`nt` is the number of threads. +`level0` is the separator-partitoning level, if the (first) separator is partitioned, level0 = 1, in the next iteration, level0 = 2... +`preparatory_multi_ps` is the number of separator-cells. +""" +function separate!(cellregs, ACSC, nt, level0, ctr_sepanodes, ri, gi, do_print) + # current number of cells treated + nc2 = size(ACSC, 1) + + indptr = collect(1:nc2+1) + indices = zeros(Int64, nc2) + rowval = zeros(Int64, nc2) + + indptrT = collect(1:ctr_sepanodes+1) + indicesT = zeros(Int64, ctr_sepanodes) + rowvalT = zeros(Int64, ctr_sepanodes) + + for i=1:ctr_sepanodes + j = ri[i] + indices[j] = i + indicesT[i] = j + rowval[j] = 1 + rowvalT[i] = 1 + end + + + + R = SparseMatrixCSC(ctr_sepanodes, nc2, indptr, indices, rowval) + RT = SparseMatrixCSC(nc2, ctr_sepanodes, indptrT, indicesT, rowvalT) + # current adjacency matrix, taken as a part of the given one ACSC + RART = dropzeros(R)*ACSC*dropzeros(RT) + + cellregs2 = Metis.partition(RART, nt) + + + for i=1:ctr_sepanodes + if cellregs[gi[i]] < level0*nt+1 + @warn "cell treated in this iteration was not a separator-cell last iteration" + end + cellregs[gi[i]] = level0*nt + cellregs2[i] + end + + # how many cells are in the separator of the new partiton (which is only computed on the separator of the old partition) + new_ctr_sepanodes = 0 + ri2 = Vector{Int64}(undef, ctr_sepanodes) + gi2 = Vector{Int64}(undef, ctr_sepanodes) + + for tid=1:nt + for i=1:ctr_sepanodes + if cellregs2[i] == tid + neighbors = RART.rowval[RART.colptr[i]:(RART.colptr[i+1]-1)] + rows = gi[vcat(neighbors, [i])] + #counts how many different regions (besides) the separator are adjacent to the current cell + x = how_many_different_below(cellregs[rows], (level0+1)*nt+1) + if x > 1 + cellregs[gi[i]] = (level0+1)*nt+1 + new_ctr_sepanodes += 1 + gi2[new_ctr_sepanodes] = gi[i] + ri2[new_ctr_sepanodes] = i + end + end + end + end + + + ri2 = ri2[1:new_ctr_sepanodes] + gi2 = gi2[1:new_ctr_sepanodes] + + if do_print + @info "At level $(level0+1), we found $new_ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART, new_ctr_sepanodes, ri2, gi2 +end + + + +""" +`function grid_to_graph_ps_multi_nogrid!(nc, nn, mat_cell_node, nt, depth)` + +The function assigns colors/partitons to each cell in the `grid`. First, the grid is partitoned into `nt` partitions. If `depth` > 1, the separator is partitioned again... +The grid is specified by nc (number of cells), nn (number of nodes) and the mat_cell_node (i.e. grid[CellNodes] if ExtendableGrids is used). +Here, `mat_cell_node[k,i]` is the i-th node in the k-th cell. +`nt` is the number of threads. +`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices... +""" +function grid_to_graph_cellwise_nogrid!(nc, nn, mat_cell_node, nt, depth; minsize_sepa=10, do_print=false) + A = SparseMatrixLNK{Int64, Int64}(nc, nc) + number_cells_per_node = zeros(Int64, nn) + for j=1:nc + for node_id in mat_cell_node[:,j] + number_cells_per_node[node_id] += 1 + end + end + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, nn+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + for j=1:nc + for node_id in mat_cell_node[:,j] + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + for j=1:nn + cells = @view allcells[start[j]:start[j+1]-1] + for (i,id1) in enumerate(cells) + for id2 in cells[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + ACSC = SparseArrays.SparseMatrixCSC(A) + + partition = Metis.partition(ACSC, nt) + cellregs = copy(partition) + + sn = Vector{Int64}(undef, nc) + gi = Vector{Int64}(undef, nc) + ctr_sepanodes = 0 + + for tid=1:nt + for j=1:nc + if cellregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(cellregs[rows], nt+1) > 1 + cellregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodes += 1 + sn[ctr_sepanodes] = j + gi[ctr_sepanodes] = j + end + end + end + end + + sn = sn[1:ctr_sepanodes] + gi = gi[1:ctr_sepanodes] + + if do_print + @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART = copy(ACSC) + actual_depth = 1 + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end + end + + return allcells, start, cellregs, actual_depth +end + + +""" +`function grid_to_graph_ps_multi_par_nogrid!(nc, nn, mat_cell_node, nt, depth)` + +Same result as `grid_to_graph_ps_multi_nogrid!`, but computed on multiple threads. +""" +function grid_to_graph_cellwise_par_nogrid!(cn, nc, nn, nt, depth; minsize_sepa=10, do_print=false) + As = [ExtendableSparseMatrix{Int64, Int64}(nc, nc) for tid=1:nt] + number_cells_per_node = zeros(Int64, nn) + + + for j=1:nc + tmp = view(cn, :, j) + for node_id in tmp + number_cells_per_node[node_id] += 1 + end + end + + + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, nn+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + + for j=1:nc + tmp = view(cn, :, j) + for node_id in tmp + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + node_range = get_starts(nn, nt) + Threads.@threads for tid=1:nt + for j in node_range[tid]:node_range[tid+1]-1 + cells = @view allcells[start[j]:start[j+1]-1] + l = length(cells) + for (i,id1) in enumerate(cells) + ce = view(cells, i+1:l) + for id2 in ce + As[tid][id1,id2] = 1 + As[tid][id2,id1] = 1 + end + end + end + ExtendableSparse.flush!(As[tid]) + end + + ACSC = add_all_par!(As).cscmatrix + + cellregs = Metis.partition(ACSC, nt) + + sn = [Vector{Int64}(undef, Int(ceil(nc/nt))) for tid=1:nt] + ctr_sepanodess = zeros(Int64, nt) + + @threads for tid=1:nt + for j=1:nc + if cellregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(cellregs[rows], nt+1) > 1 + cellregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodess[tid] += 1 + sn[tid][ctr_sepanodess[tid]] = j + end + end + end + end + + for tid=1:nt + sn[tid] = sn[tid][1:ctr_sepanodess[tid]] + end + ctr_sepanodes = sum(ctr_sepanodess) + sn = vcat(sn...) + gi = copy(sn) + + if do_print + @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART = ACSC + actual_depth = 1 + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end + end + + #grid[CellRegions] = cellregs + #grid + return allcells, start, cellregs, actual_depth +end + +""" +function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=false) + A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) + number_cells_per_node = zeros(Int64, num_nodes(grid)) + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + number_cells_per_node[node_id] += 1 + end + end + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + for (i,id1) in enumerate(cells) + for id2 in cells[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + ACSC = SparseArrays.SparseMatrixCSC(A) + + partition = Metis.partition(ACSC, nt) + cellregs = copy(partition) + + sn = Vector{Int64}(undef, num_cells(grid)) + gi = Vector{Int64}(undef, num_cells(grid)) + ctr_sepanodes = 0 + + for tid=1:nt + for j=1:num_cells(grid) + if cellregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(cellregs[rows], nt+1) > 1 + cellregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodes += 1 + sn[ctr_sepanodes] = j + gi[ctr_sepanodes] = j + end + end + end + end + + sn = sn[1:ctr_sepanodes] + gi = gi[1:ctr_sepanodes] + + if do_print + @info "At level (1), we found ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART = copy(ACSC) + actual_depth = 1 + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end + end + + return allcells, start, cellregs, actual_depth +end + + +function grid_to_graph_edgewise!(grid, nt, depth; minsize_sepa=10, do_print=false) + ce = grid[CellEdges] + A = SparseMatrixLNK{Int64, Int64}(num_edges(grid), num_edges(grid)) + number_edges_per_node = zeros(Int64, num_nodes(grid)) + + for i=1:num_edges(grid) + for node_id in grid[EdgeNodes][:,i] + number_edges_per_node[node_id] += 1 + end + end + + alledges = zeros(Int64, sum(number_edges_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_edges_per_node) + number_edges_per_node .= 0 + + for j=1:num_edges(grid) + for node_id in grid[EdgeNodes][:,j] + alledges[start[node_id] + number_edges_per_node[node_id]] = j + number_edges_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + edges = @view alledges[start[j]:start[j+1]-1] + for (i,id1) in enumerate(edges) + for id2 in edges[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + ACSC = SparseArrays.SparseMatrixCSC(A) + + partition = Metis.partition(ACSC, nt) + + sn = Vector{Int64}(undef, num_edges(grid)) + gi = Vector{Int64}(undef, num_edges(grid)) + ctr_sepanodes = 0 + + edgeregs = copy(partition) + for tid=1:nt + for j=1:num_edges(grid) + if edgeregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(edgeregs[rows], nt+1) > 1 + edgeregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodes += 1 + sn[ctr_sepanodes] = j + gi[ctr_sepanodes] = j + end + end + end + end + + sn = sn[1:ctr_sepanodes] + gi = gi[1:ctr_sepanodes] + + if do_print + @info "At level (1), we found ctr_sepanodes cells that have to be treated in the next iteration!" + end + + RART = copy(ACSC) + actual_depth = 1 + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate!(edgeregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end + end + + return alledges, start, edgeregs, actual_depth +end + +function grid_to_graph_edgewise_par!(grid, nt, depth; minsize_sepa=10, do_print=false) + ce = grid[CellEdges] + cn = grid[EdgeNodes] + + As = [ExtendableSparseMatrix{Int64, Int64}(num_edges(grid), num_edges(grid)) for tid=1:nt] + number_edges_per_node = zeros(Int64, num_nodes(grid)) + + + for j=1:num_edges(grid) + tmp = view(cn, :, j) + for node_id in tmp + number_edges_per_node[node_id] += 1 + end + end + + + alledges = zeros(Int64, sum(number_edges_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_edges_per_node) + number_edges_per_node .= 0 + + for j=1:num_edges(grid) + tmp = view(cn, :, j) + for node_id in tmp + alledges[start[node_id] + number_edges_per_node[node_id]] = j + number_edges_per_node[node_id] += 1 + end + end + + node_range = get_starts(num_nodes(grid), nt) + Threads.@threads for tid=1:nt + for j in node_range[tid]:node_range[tid+1]-1 + edges = @view alledges[start[j]:start[j+1]-1] + l = length(edges) + for (i,id1) in enumerate(edges) + ce = view(edges, i+1:l) + for id2 in ce + As[tid][id1,id2] = 1 + As[tid][id2,id1] = 1 + + end + end + end + ExtendableSparse.flush!(As[tid]) + end + + ACSC = add_all_par!(As).cscmatrix + + cellregs = Metis.partition(ACSC, nt) + + sn = [Vector{Int64}(undef, Int(ceil(num_cells(grid)/nt))) for tid=1:nt] + ctr_sepanodess = zeros(Int64, nt) + + @threads for tid=1:nt + for j=1:num_edges(grid) + if cellregs[j] == tid + rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j]) + if how_many_different_below(cellregs[rows], nt+1) > 1 + cellregs[j] = nt+1 #+ctr_sepanodes + ctr_sepanodess[tid] += 1 + sn[tid][ctr_sepanodess[tid]] = j + end + end + end + end + + for tid=1:nt + sn[tid] = sn[tid][1:ctr_sepanodess[tid]] + end + ctr_sepanodes = sum(ctr_sepanodess) + sn = vcat(sn...) + gi = copy(sn) + + if do_print + @info "At level (1), we found ctr_sepanodes edges that have to be treated in the next iteration!" + end + + RART = ACSC + actual_depth = 1 + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print) + actual_depth += 1 + if ctr_sepanodes < minsize_sepa + break + end + end + + #grid[CellRegions] = cellregs + #grid + return alledges, start, cellregs, actual_depth +end +""" + +function edgewise_partition_from_cellwise_partition(nc, ne, ce, cellregs) + #ce = grid[CellEdges] + edgeregs = maximum(cellregs)*ones(Int64, ne) + + for icell=1:nc + tmp = cellregs[icell] + for iedge in ce[:,icell] + if tmp < edgeregs[iedge] + edgeregs[iedge] = tmp + end + end + end + + edgeregs +end + +""" +`function add_all_par!(As)` + +Add LNK matrices (stored in a vector) parallely (tree structure). +The result is stored in the first LNK matrix. +""" +function add_all_par!(As) + nt = length(As) + depth = Int(floor(log2(nt))) + ende = nt + for level=1:depth + + @threads for tid=1:2^(depth-level) + #@info "$level, $tid" + start = tid+2^(depth-level) + while start <= ende + As[tid] += As[start] + start += 2^(depth-level) + end + end + ende = 2^(depth-level) + end + As[1] + +end + + +""" +`function vvcons(Ti, lengths)` + +`lengths` is a vector of integers. +The function creates a vector of zero vectors of type `Ti` of length `lengths[i]`. +""" +function vvcons(Ti, lengths) + x::Vector{Vector{Ti}} = [zeros(Ti, i) for i in lengths] + return x +end + + +""" +`function bettercellsforpart(xx, upper)` + +`xx` are the CellRegions (i.e. the color/partition of each cell). +`upper` is the number of partitions (upper=depth*nt+1). +The function returns a vector e.g. [v1, v2, v3, v4, v5]. +The element v1 would be the list of cells that are in partition 1 etc. +The function is basically a faster findall. +""" +function bettercellsforpart(xx, upper) + ctr = zeros(Int64, upper) + for x in xx + ctr[x] += 1 + end + cfp = vvcons(Int64, ctr) + ctr .= 1 + for (i,x) in enumerate(xx) + cfp[x][ctr[x]] = i + ctr[x] += 1 + end + cfp +end + + + +function get_starts(n, nt) + ret = ones(Int64, nt+1) + ret[end] = n+1 + for i=nt:-1:2 + ret[i] = ret[i+1] - Int(round(ret[i+1]/i)) #Int(round(n/nt))-1 + end + ret +end + +function last_nz(x) + n = length(x) + for j=n:-1:1 + if x[j] != 0 + return (j, x[j]) + end + end +end + + +function how_many_different_below(x0, y; u=0) + x = copy(x0) + z = unique(x) + t = findall(w->ww>u,z[t]) + length(t) +end + + + +function lookat_grid_to_graph_ps_multi!(nm, nt, depth) + grid = getgrid(nm) + A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) + number_cells_per_node = zeros(Int64, num_nodes(grid)) + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + number_cells_per_node[node_id] += 1 + end + end + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + for (i,id1) in enumerate(cells) + for id2 in cells[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + ACSC = SparseArrays.SparseMatrixCSC(A) + + partition = Metis.partition(ACSC, nt) + cellregs = copy(partition) + + sn = [] + gi = [] + ctr_sepanodes = 0 + for j=1:num_cells(grid) + rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)] + if minimum(partition[rows]) != maximum(partition[rows]) + cellregs[j] = nt+1 + ctr_sepanodes += 1 + push!(sn, j) + push!(gi, j) + end + end + RART = ACSC + #sn = 1:num_cells(grid) + #gi = 1:num_cells(grid) + for level=1:depth-1 + RART, ctr_sepanodes, sn, gi = separate_careful!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi) + if ctr_sepanodes == 0 + return RART + end + end + + + #return allcells, start, cellregs + RART +end + + +function adjacencies(grid) + A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid)) + number_cells_per_node = zeros(Int64, num_nodes(grid)) + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + number_cells_per_node[node_id] += 1 + end + end + allcells = zeros(Int64, sum(number_cells_per_node)) + start = ones(Int64, num_nodes(grid)+1) + start[2:end] += cumsum(number_cells_per_node) + number_cells_per_node .= 0 + for j=1:num_cells(grid) + for node_id in grid[CellNodes][:,j] + allcells[start[node_id] + number_cells_per_node[node_id]] = j + number_cells_per_node[node_id] += 1 + end + end + + for j=1:num_nodes(grid) + cells = @view allcells[start[j]:start[j+1]-1] + for (i,id1) in enumerate(cells) + for id2 in cells[i+1:end] + A[id1,id2] = 1 + A[id2,id1] = 1 + end + end + end + + allcells, start, SparseArrays.SparseMatrixCSC(A) +end + +function check_adjacencies(nm) + grid = getgrid(nm) + allcells, start, A = adjacencies(grid) + + i = 1 + cells1 = sort(vcat([i], A.rowval[A.colptr[i]:(A.colptr[i+1]-1)])) #adjacent cells + nodes2 = grid[CellNodes][:,i] + cells2 = sort(unique(vcat([allcells[start[j]:start[j+1]-1] for j in nodes2]...))) + + @info cells1 + @info cells2 + @info maximum(abs.(cells1-cells2)) + + +end + +#= +function check_partition(nm, nt, depth) + grid = getgrid(nm) + + (allcells, start, cellregs, adepth, ACSC) = grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=true)#) + + if (adepth != depth) + @info "The requested depth of partitioning is too high. The depth is set to $adepth." + end + depth = adepth + + validate_partition(num_nodes(grid), num_cells(grid), grid, cellregs, start, allcells, nt, depth, ACSC) +end +=# + +function validate_partition(nn, mat, cellregs, start, allcells, nt, depth, assemblytype) + violation_ctr = 0 + + if assemblytype == :cellwise + key = CellNodes + else + key = EdgeNodes + end + + for j=1:nn + cells = @view allcells[start[j]:start[j+1]-1] + sortedcellregs = unique(sort(cellregs[cells])) + levels = Int.(ceil.(sortedcellregs/nt)) + + for i=1:depth+1 + ids_lev = findall(x->x==i, levels) + if length(ids_lev) > 1 + violation_ctr += 1 + + if violation_ctr == 1 + @info "Node Id : $j (we only show one violation)" + @info "Cellregs: $sortedcellregs" + @info "Levels : $levels" + + loc = findall(x->x==4, Int.(ceil.(cellregs[allcells[start[j]:start[j+1]-1]]/nt))) + cells_at_level4 = allcells[loc.+(start[j]-1)] + @info cells_at_level4, cellregs[cells_at_level4] + @info mat[:,cells_at_level4[1]], mat[:,cells_at_level4[2]] + end + end + end + end + @info "We found $violation_ctr violation(s)" +end diff --git a/src/experimental/ExtendableSparseMatrixParallel/struct_flush.jl b/src/experimental/ExtendableSparseMatrixParallel/struct_flush.jl new file mode 100644 index 0000000..3169d4b --- /dev/null +++ b/src/experimental/ExtendableSparseMatrixParallel/struct_flush.jl @@ -0,0 +1,273 @@ +function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=true) + _, nnzLNK = nnz_noflush(A) + + if nnzLNK == 0 + return + end + + if !do_dense + A.cscmatrix = A.cscmatrix+sparse_flush!(A; keep_zeros) + + else + if keep_zeros + A.cscmatrix = dense_flush_keepzeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices) + else + A.cscmatrix = dense_flush_removezeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices) + end + end + A.phash = phash(A.cscmatrix) + A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(A.n, A.nnts[tid]) for tid=1:A.nt] + +end + +""" +`CSC_RLNK_plusequals_less3_reordered_super!` from `plusequals.jl` +""" +function sparse_flush!(A::ExtendableSparseMatrixParallel; keep_zeros=true) + + #dropzeros!( + plus_remap(A.lnkmatrices, A.cscmatrix, A.globalindices; keep_zeros) + #) + +end + + + +""" +`CSC_RLNK_si_oc_ps_dz_less_reordered` from `conversion.jl` +""" +function dense_flush_keepzeros!( + As::Vector{SuperSparseMatrixLNK{Tv, Ti}}, + onr, s, nt, rni + ) where {Tv, Ti <: Integer} + + nnz = sum([As[i].nnz for i=1:nt]) #you could also subtract the diagonal entries from shared columns, since those are definitely double + indptr = zeros(Ti, As[1].m+1) + indices = zeros(Ti, nnz) #sum(As.nnz)) + data = zeros(Float64, nnz) #sum(As.nnz)) + ctr = 1 + eqctr = 0 + tmp = zeros(Ti, size(onr)[1]) + + #@warn [As[i].nnz for i=1:nt], [As[i].n for i=1:nt], [As[i].m for i=1:nt] + #@info maximum.([As[i].colptr for i=1:nt]) + + for nj=1:As[1].m + indptr[nj] = ctr + oj = rni[nj] + regionctr = 1 + jc = 0 + nrr = view(onr, :, oj) + tmp .= 0 + for region in nrr #nrr #[:,j] + regmod = region #(region-1)%nt+1 + if (region > 0) & !(region in tmp) + k = s[regmod, nj] + if regionctr == 1 + while k>0 + if As[regmod].rowval[k] != 0 + if ctr > nnz + @info "ctr > nnz, $nj, $oj" + end + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + else + break + end + end + + ctr += 1 + jc += 1 + end + k = As[regmod].colptr[k] + end + else + while k>0 + if As[regmod].rowval[k] != 0 + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + elseif indices[ctr-jcc] == indices[ctr-jcc+1] + data[ctr-jcc] += data[ctr-jcc+1] + eqctr += 1 + + for jccc=1:jcc + indices[ctr-jcc+jccc] = indices[ctr-jcc+jccc+1] + data[ctr-jcc+jccc] = data[ctr-jcc+jccc+1] + end + + ctr -= 1 + jc -= 1 + + break + else + break + end + end + + ctr += 1 + jc += 1 + end + k = As[regmod].colptr[k] + end + + end + tmp[regionctr] = region + regionctr += 1 + + end + + end + + end + + #@warn ctr/nnz + + indptr[end] = ctr + resize!(indices, ctr-1) + resize!(data, ctr-1) + + + SparseArrays.SparseMatrixCSC( + As[1].m, As[1].m, indptr, indices, data + ) + +end + + +function dense_flush_removezeros!( + As::Vector{SuperSparseMatrixLNK{Tv, Ti}}, + onr, s, nt, rni + ) where {Tv, Ti <: Integer} + + nnz = sum([As[i].nnz for i=1:nt]) #you could also subtract the diagonal entries from shared columns, since those are definitely double + indptr = zeros(Ti, As[1].m+1) + indices = zeros(Ti, nnz) #sum(As.nnz)) + data = zeros(Float64, nnz) #sum(As.nnz)) + ctr = 1 + eqctr = 0 + tmp = zeros(Ti, size(onr)[1]) + + for nj=1:As[1].m + indptr[nj] = ctr + oj = rni[nj] + regionctr = 1 + jc = 0 + nrr = view(onr, :, oj) + tmp .= 0 + for region in nrr #nrr #[:,j] + regmod = region #(region-1)%nt+1 + if (region > 0) & !(region in tmp) + k = s[regmod, nj] + if regionctr == 1 + while k>0 + if As[regmod].nzval[k] != 0.0 + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + else + break + end + end + + ctr += 1 + jc += 1 + end + k = As[regmod].colptr[k] + end + else + while k>0 + if As[regmod].nzval[k] != 0.0 + indices[ctr] = As[regmod].rowval[k] + data[ctr] = As[regmod].nzval[k] + + for jcc=1:jc + if indices[ctr-jcc] > indices[ctr-jcc+1] + tmp_i = indices[ctr-jcc+1] + tmp_d = data[ctr-jcc+1] + indices[ctr-jcc+1] = indices[ctr-jcc] + data[ctr-jcc+1] = data[ctr-jcc] + + indices[ctr-jcc] = tmp_i + data[ctr-jcc] = tmp_d + elseif indices[ctr-jcc] == indices[ctr-jcc+1] + data[ctr-jcc] += data[ctr-jcc+1] + eqctr += 1 + + for jccc=1:jcc + indices[ctr-jcc+jccc] = indices[ctr-jcc+jccc+1] + data[ctr-jcc+jccc] = data[ctr-jcc+jccc+1] + end + + ctr -= 1 + jc -= 1 + + break + else + break + end + end + + ctr += 1 + jc += 1 + end + k = As[regmod].colptr[k] + end + + end + tmp[regionctr] = region + regionctr += 1 + + end + + end + + end + + #@warn ctr/nnz + + indptr[end] = ctr + resize!(indices, ctr-1) + resize!(data, ctr-1) + + + SparseArrays.SparseMatrixCSC( + As[1].m, As[1].m, indptr, indices, data + ) + +end + + + + + + + diff --git a/src/experimental/ExtendableSparseMatrixParallel/supersparse.jl b/src/experimental/ExtendableSparseMatrixParallel/supersparse.jl new file mode 100644 index 0000000..691e158 --- /dev/null +++ b/src/experimental/ExtendableSparseMatrixParallel/supersparse.jl @@ -0,0 +1,798 @@ + +mutable struct SuperSparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti} + """ + Number of rows + """ + m::Ti + + """ + Number of columns + """ + n::Ti + + """ + Number of nonzeros + """ + nnz::Ti + + """ + Length of arrays + """ + nentries::Ti + + """ + Linked list of column entries. Initial length is n, + it grows with each new entry. + + colptr[index] contains the next + index in the list or zero, in the later case terminating the list which + starts at index 1<=j<=n for each column j. + """ + colptr::Vector{Ti} + + """ + Row numbers. For each index it contains the zero (initial state) + or the row numbers corresponding to the column entry list in colptr. + + Initial length is n, + it grows with each new entry. + """ + rowval::Vector{Ti} + + """ + Nonzero entry values corresponding to each pair + (colptr[index],rowval[index]) + + Initial length is n, it grows with each new entry. + """ + nzval::Vector{Tv} + + """ + (Unsorted) list of all columns with non-zero entries + """ + collnk::Vector{Ti} + + # counts the number of columns in use + colctr::Ti +end + + +function SparseArrays.SparseMatrixCSC(A::SuperSparseMatrixLNK{Tv, Ti})::SparseArrays.SparseMatrixCSC where {Tv, Ti <: Integer} + SparseArrays.SparseMatrixCSC(SparseMatrixLNK{Tv, Ti}(A.m, A.n, A.nnz, A.nentries, A.colptr, A.rowval, A.nzval)) + +end + +function SuperSparseMatrixLNK{Tv, Ti}(m, n) where {Tv, Ti <: Integer} + SuperSparseMatrixLNK{Tv, Ti}(m, n, 0, n, zeros(Ti, n), zeros(Ti, n), zeros(Tv, n), zeros(Ti, n), 0) +end + + +function ExtendableSparse.findindex(lnk::SuperSparseMatrixLNK, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + k = j + k0 = j + while k > 0 + if lnk.rowval[k] == i + return k, 0 + end + k0 = k + k = lnk.colptr[k] + end + return 0, k0 +end + +""" +Return tuple containing size of the matrix. +""" +Base.size(lnk::SuperSparseMatrixLNK) = (lnk.m, lnk.n) + +""" +Return value stored for entry or zero if not found +""" +function Base.getindex(lnk::SuperSparseMatrixLNK{Tv, Ti}, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k == 0 + return zero(Tv) + else + return lnk.nzval[k] + end +end + +function addentry!(lnk::SuperSparseMatrixLNK, i, j, k, k0) + # increase number of entries + lnk.nentries += 1 + if length(lnk.nzval) < lnk.nentries + newsize = Int(ceil(5.0 * lnk.nentries / 4.0)) + resize!(lnk.nzval, newsize) + resize!(lnk.rowval, newsize) + resize!(lnk.colptr, newsize) + end + + # Append entry if not found + lnk.rowval[lnk.nentries] = i + + # Shift the end of the list + lnk.colptr[lnk.nentries] = 0 + lnk.colptr[k0] = lnk.nentries + + # Update number of nonzero entries + lnk.nnz += 1 + return lnk.nentries +end + +""" +Update value of existing entry, otherwise extend matrix if v is nonzero. +""" +function Base.setindex!(lnk::SuperSparseMatrixLNK, v, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + # Set the first column entry if it was not yet set. + if lnk.rowval[j] == 0 && !iszero(v) + lnk.colctr += 1 + lnk.collnk[lnk.colctr] = j + lnk.rowval[j] = i + lnk.nzval[j] = v + lnk.nnz += 1 + return lnk + end + + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = v + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = v + end + return lnk +end + +""" +Update element of the matrix with operation `op`. +It assumes that `op(0,0)==0`. If `v` is zero, no new +entry is created. +""" +function updateindex!(lnk::SuperSparseMatrixLNK{Tv, Ti}, op, v, i, j) where {Tv, Ti} + # Set the first column entry if it was not yet set. + if lnk.rowval[j] == 0 && !iszero(v) + lnk.colctr += 1 + lnk.collnk[lnk.colctr] = j + lnk.rowval[j] = i + lnk.nzval[j] = op(lnk.nzval[j], v) + lnk.nnz += 1 + return lnk + end + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +function rawupdateindex!(lnk::SuperSparseMatrixLNK{Tv, Ti}, op, v, i, j) where {Tv, Ti} + # Set the first column entry if it was not yet set. + if lnk.rowval[j] == 0 + lnk.colctr += 1 + lnk.collnk[lnk.colctr] = j + lnk.rowval[j] = i + lnk.nzval[j] = op(lnk.nzval[j], v) + lnk.nnz += 1 + return lnk + end + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +#= +mutable struct ColEntry{Tv, Ti <: Integer} + rowval::Ti + nzval::Tv +end + +# Comparison method for sorting +Base.isless(x::ColEntry, y::ColEntry) = (x.rowval < y.rowval) +=# + +function get_column!(col::Vector{ColEntry{Tv, Ti}}, lnk::SuperSparseMatrixLNK{Tv, Ti}, j::Ti)::Ti where {Tv, Ti <: Integer} + k = j + ctr = zero(Ti) + while k>0 + if abs(lnk.nzval[k]) > 0 + ctr += 1 + col[ctr] = ColEntry(lnk.rowval[k], lnk.nzval[k]) + end + k = lnk.colptr[k] + end + sort!(col, 1, ctr, Base.QuickSort, Base.Forward) + ctr +end + + +function remove_doubles!(col, coll) + #input_ctr = 1 + last = 1 + for j=2:coll + if col[j].rowval == col[last].rowval + col[last].nzval += col[j].nzval + else + last += 1 + if last != j + col[last] = col[j] + end + end + end + last +end + +function get_column_removezeros!(col::Vector{ColEntry{Tv, Ti}}, lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, js, tids, length)::Ti where {Tv, Ti <: Integer} + ctr = zero(Ti) + for i=1:length + tid = tids[i] + k = js[i] + #for (tid,j) in zip(tids, js) #j0:j1 + #tid = tids[j] + #k = j + while k>0 + if abs(lnks[tid].nzval[k]) > 0 + ctr += 1 + col[ctr] = ColEntry(lnks[tid].rowval[k], lnks[tid].nzval[k]) + end + k = lnks[tid].colptr[k] + end + end + + sort!(col, 1, ctr, Base.QuickSort, Base.Forward) + ctr = remove_doubles!(col, ctr) + #print_col(col, ctr) + ctr + +end + +function get_column_keepzeros!(col::Vector{ColEntry{Tv, Ti}}, lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, js, tids, length)::Ti where {Tv, Ti <: Integer} + ctr = zero(Ti) + for i=1:length + tid = tids[i] + k = js[i] + #for (tid,j) in zip(tids, js) #j0:j1 + #tid = tids[j] + #k = j + while k>0 + #if abs(lnks[tid].nzval[k]) > 0 + ctr += 1 + col[ctr] = ColEntry(lnks[tid].rowval[k], lnks[tid].nzval[k]) + #end + k = lnks[tid].colptr[k] + end + end + + sort!(col, 1, ctr, Base.QuickSort, Base.Forward) + ctr = remove_doubles!(col, ctr) + #print_col(col, ctr) + ctr + +end + +function merge_into!(rowval::Vector{Ti}, nzval::Vector{Tv}, C::SparseArrays.SparseMatrixCSC{Tv, Ti}, col::Vector{ColEntry{Tv, Ti}}, J::Ti, coll::Ti, ptr1::Ti) where {Tv, Ti <: Integer} + j_min = 1 + numshifts = 0 + j_last = 0 + last_row = 0 + + #@warn "MERGING $J" + + #rowval0 = copy(C.rowval[C.colptr[J]:C.colptr[J+1]-1]) + #endptr = C.colptr[J+1] + + for (di,i) in enumerate(C.colptr[J]:C.colptr[J+1]-1) + for j=j_min:coll + #if col[j].rowval == last_row + # #@info "!! col j rowval == last row" + #end + if col[j].rowval < C.rowval[i] #ptr1+di+numshifts] #i+numshifts] + if col[j].rowval == last_row + #@info "$(ptr1+di+numshifts) : backwards EQUALITY: " + nzval[ptr1+di+numshifts] += col[j].nzval + else + #@info "$(ptr1+di+numshifts) : Insert from col: j=$j" + #shift_e!(C.rowval, C.nzval, 1, i+numshifts, C.colptr[end]-1) + rowval[ptr1+di+numshifts] = col[j].rowval + nzval[ptr1+di+numshifts] = col[j].nzval + numshifts += 1 + #endptr += 1 + end + j_last = j + elseif col[j].rowval > C.rowval[i] #if col[j].rowval + #@info "$(ptr1+di+numshifts) : Insert from C: i=$i" + rowval[ptr1+di+numshifts] = C.rowval[i] + nzval[ptr1+di+numshifts] = C.nzval[i] + j_min = j + break + else + #@info "$(ptr1+di+numshifts) : normal EQUALITY: i=$i, j=$j" + rowval[ptr1+di+numshifts] = C.rowval[i] + nzval[ptr1+di+numshifts] = C.nzval[i]+col[j].nzval + #numshifts += 1 + j_min = j+1 + j_last = j + + if j == coll + #@info "$(ptr1+di+numshifts+1) → $(ptr1+numshifts+(C.colptr[J+1]-C.colptr[J]))" + rowval[ptr1+di+numshifts+1:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.rowval, i+1:C.colptr[J+1]-1) #C.rowval[i:C.colptr[J+1]-1] + nzval[ptr1+di+numshifts+1:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.nzval, i+1:C.colptr[J+1]-1) #C.nzval[i:C.colptr[J+1]-1] + + #@info "FINISH" + return numshifts + end + + break + end + + if j == coll + #@info "$(ptr1+di+numshifts) → $(ptr1+numshifts+(C.colptr[J+1]-C.colptr[J]))" + rowval[ptr1+di+numshifts:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.rowval, i:C.colptr[J+1]-1) #C.rowval[i:C.colptr[J+1]-1] + nzval[ptr1+di+numshifts:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.nzval, i:C.colptr[J+1]-1) #C.nzval[i:C.colptr[J+1]-1] + + #@info "FINISH" + return numshifts + end + + last_row = col[j].rowval + end + end + endptr = ptr1 + numshifts + (C.colptr[J+1]-C.colptr[J]) + last_row = 0 + numshifts_old = numshifts + numshifts = 0 + #start_ptr = endptr - 1 #C.colptr[J+1]-1 + if j_last > 0 + last_row = col[j_last].rowval + end + + if j_last != coll + for j=j_last+1:coll + if col[j].rowval != last_row + numshifts += 1 + #shift_e!(C.rowval, C.nzval, 1, start_ptr+numshifts, C.colptr[end]-1) + #for k=start_ptr+numshifts: + #@info "$(endptr+numshifts) : after..." + rowval[endptr+numshifts] = col[j].rowval + nzval[endptr+numshifts] = col[j].nzval + last_row = rowval[endptr+numshifts] + #colptr[J+1:end] .+= 1 + else + nzval[endptr+numshifts] += col[j].nzval + end + end + end + + return numshifts + numshifts_old + +end + + +function print_col(col, coll) + v = zeros((2, coll)) + for j=1:coll + v[1,j] = col[j].rowval + v[2,j] = col[j].nzval + end + @info v +end + + +""" +$(SIGNATURES) + +Add the matrices `lnks` of type SuperSparseMatrixLNK onto the SparseMatrixCSC `csc`. +`gi[i]` maps the indices in `lnks[i]` to the indices of `csc`. +""" +function plus_remap(lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Vector{Ti}}; keep_zeros=true) where {Tv, Ti <: Integer} + nt = length(lnks) + + if keep_zeros + get_col! = get_column_keepzeros! + else + get_col! = get_column_removezeros! + end + lnkscols = vcat([lnks[i].collnk[1:lnks[i].colctr] for i=1:nt]...) + supersparsecolumns = vcat([gi[i][lnks[i].collnk[1:lnks[i].colctr]] for i=1:nt]...) + num_cols = sum([lnks[i].colctr for i=1:nt]) + tids = Vector{Ti}(undef, num_cols) + ctr = 0 + for i=1:nt + for j=1:lnks[i].colctr + ctr += 1 + tids[ctr] = i + end + end + + + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + sortedcolumns = vcat(sortedcolumns, [Ti(csc.n+1)]) + + coll = sum([lnks[i].nnz for i=1:nt]) + nnz_sum = length(csc.rowval) + coll + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + if csc.m < coll + coll = csc.m + end + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:coll] + numshifts = 0 + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + J = 1 + i0 = 0 + #lj_last = [] + #tid_last = [] + lj_last = Vector{Ti}(undef, nt) + tid_last = Vector{Ti}(undef, nt) + ctr_last = 1 + gj_last = 0 + for J=1:length(sortedcolumns)-1 + gj_now = sortedcolumns[J] + gj_next = sortedcolumns[J+1] + + lj_last[ctr_last] = lnkscols[sortedcolumnids[J]] + tid_last[ctr_last] = tids[sortedcolumnids[J]] + + if gj_now != gj_next + #@info typeof(lnks) + # do stuff from gj_last to gj_now / from last_lj to J + #@info lj_last, tid_last + coll = get_col!(col, lnks, lj_last, tid_last, ctr_last) + + nns = merge_into!(rowval, nzval, csc, col, gj_now, coll, colptr[gj_now]-one(Ti)) + numshifts += nns + + + colptr[gj_now+1:sortedcolumns[J+1]] = csc.colptr[gj_now+1:sortedcolumns[J+1]].+(-csc.colptr[gj_now]+colptr[gj_now] + nns) + + rowval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1) + nzval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.nzval, csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1) + + #rowval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = csc.rowval[csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1] + #nzval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = csc.nzval[csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1] + + + #for k=csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1 + # k2 = k+(-csc.colptr[gj_now]+colptr[gj_now] + nns) + # rowval[k2] = csc.rowval[k] + # nzval[k2] = csc.nzval[k] + #end + + gj_last = gj_now + ctr_last = 0 #tids[sortedcolumnids[J]]] + end + + ctr_last += 1 + + + end + + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseArrays.SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + + + #for ... + # take many columns together if necessary in `get_column` + #end + + + +end + + +""" +$(SIGNATURES) + +Add the SuperSparseMatrixLNK `lnk` onto the SparseMatrixCSC `csc`. +`gi` maps the indices in `lnk` to the indices of `csc`. +""" +function plus_remap(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Ti}) where {Tv, Ti <: Integer} + + #@info lnk.collnk[1:lnk.colctr] + + + supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]] + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + #sortedcolumns = vcat([1], sortedcolumns) + #@info typeof(supersparsecolumns), typeof(sortedcolumns) + + sortedcolumns = vcat(sortedcolumns, [Ti(csc.n+1)]) + + #@info typeof(supersparsecolumns), typeof(sortedcolumns) + + + #@info supersparsecolumns + #@info sortedcolumns + #@info lnk.collnk[1:length(sortedcolumns)-1] + #@info lnk.collnk[sortedcolumnids[1:length(sortedcolumns)-1]] + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m] + + #@info sortedcolumnids + + nnz_sum = length(csc.rowval) + lnk.nnz + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + #first part: columns between 1 and first column of lnk + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + numshifts = 0 + + for J=1:length(sortedcolumns)-1 + i = sortedcolumns[J] + + coll = get_column!(col, lnk, lnk.collnk[sortedcolumnids[J]] ) + #@info typeof(i), typeof(coll), typeof(colptr), typeof(colptr[i]), typeof(colptr[i]-1) + nns = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-one(Ti)) + numshifts += nns + + + colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns) + rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + + #= + for k=csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1 + k2 = k+(-csc.colptr[i]+colptr[i] + nns) + rowval[k2] = csc.rowval[k] + nzval[k2] = csc.nzval[k] + end + =# + end + + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseArrays.SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + +end + + +""" + +function plus(lnk::SparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} + if lnk.nnz == 0 + return csc + elseif length(csc.rowval) == 0 + return SparseMatrixCSC(lnk) + else + return lnk + csc + end +end + +function plus(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} + gi = collect(1:csc.n) + + + supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]] + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + #sortedcolumns = vcat([1], sortedcolumns) + sortedcolumns = vcat(sortedcolumns, [csc.n+1]) + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m] + + #@info sortedcolumnids + + nnz_sum = length(csc.rowval) + lnk.nnz + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + #first part: columns between 1 and first column of lnk + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + numshifts = 0 + + for J=1:length(sortedcolumns)-1 + #@info ">>>>>>> J <<<<<<<<<<<<<<<" + # insert new added column here / dummy + i = sortedcolumns[J] + coll = get_column!(col, lnk, i) + #print_col(col, coll) + + nns = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1) + + numshifts += nns + #j = colptr[i] #sortedcolumns[J]] + #rowval[j] = J + #nzval[j] = J + # insertion end + + #colptr[i+1] = colptr[i] + csc.colptr[i+1]-csc.colptr[i] + numshifts + + #a = i+1 + #b = sortedcolumns[J+1] + #@info a, b + + + #colptr[i+1:sortedcolumns[J+1]] = (csc.colptr[i+1:sortedcolumns[J+1]]-csc.colptr[i:sortedcolumns[J+1]-1]).+(colptr[i] + nns) + + colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns) + + + rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1) + + + #= + + @info csc.colptr[a:b] + + colptr[a:b] = csc.colptr[a:b].+numshifts + + #colptr[i+2:sortedcolumns[J+1]] = csc.colptr[i+2:sortedcolumns[J+1]].+numshifts + @info i, J, colptr[i+2], colptr[sortedcolumns[J+1]], csc.colptr[i+2], csc.colptr[sortedcolumns[J+1]] + @info i, J, colptr[a], colptr[b], csc.colptr[a], csc.colptr[b] + rowval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.rowval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]]) + nzval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.nzval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]]) + #rowval[colptrsortedcolumns[J+1]] + =# + end + + #@info colptr + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + + + +end + +function plus_loop(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer} + gi = collect(1:csc.n) + + supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]] + sortedcolumnids = sortperm(supersparsecolumns) + sortedcolumns = supersparsecolumns[sortedcolumnids] + #sortedcolumns = vcat([1], sortedcolumns) + sortedcolumns = vcat(sortedcolumns, [csc.n+1]) + + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m] + + #@info sortedcolumnids + + nnz_sum = length(csc.rowval) + lnk.nnz + colptr = Vector{Ti}(undef, csc.n+1) + rowval = Vector{Ti}(undef, nnz_sum) + nzval = Vector{Tv}(undef, nnz_sum) + colptr[1] = one(Ti) + + #first part: columns between 1 and first column of lnk + + colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1]) + rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1) + nzval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1) + + numshifts = 0 + + for J=1:length(sortedcolumns)-1 + i = sortedcolumns[J] + coll = get_column!(col, lnk, i) + + nns = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1) + numshifts += nns + + colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns) + + + for k=csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1 + k2 = k+(-csc.colptr[i]+colptr[i] + nns) + rowval[k2] = csc.rowval[k] + nzval[k2] = csc.nzval[k] + end + + + end + + #@info colptr + + resize!(rowval, length(csc.rowval)+numshifts) + resize!(nzval, length(csc.rowval)+numshifts) + + + SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval) + + + +end + + +function twodisjointsets(n, k) + A = rand(1:n, k) + B = zeros(Int64, k) + done = false + ctr = 0 + while ctr != k + v = rand(1:n) + if !(v in A) + ctr += 1 + B[ctr] = v + end + end + + A, B +end + +function distinct(x, n) + y = zeros(typeof(x[1]), n) + ctr = 0 + while ctr != n + v = rand(x) + if !(v in y[1:ctr]) + ctr += 1 + y[ctr] = v + end + end + y +end +""" + +function mean(x) + sum(x)/length(x) +end + +function form(x) + [minimum(x), mean(x), maximum(x)] +end + + + + + + + + + + + diff --git a/src/factorizations/blockpreconditioner.jl b/src/factorizations/blockpreconditioner.jl index 7c97eca..bf0d5de 100644 --- a/src/factorizations/blockpreconditioner.jl +++ b/src/factorizations/blockpreconditioner.jl @@ -49,7 +49,7 @@ function update!(precon::BlockPreconditioner) np=length(precon.partitioning) precon.facts=Vector{Any}(undef,np) - Threads.@threads for ipart=1:np + @tasks for ipart=1:np factorization=deepcopy(precon.factorization) AP=precon.A[precon.partitioning[ipart],precon.partitioning[ipart]] FP=factorization(AP) @@ -66,11 +66,11 @@ function LinearAlgebra.ldiv!(p::BlockPreconditioner,v) np=length(partitioning) if allow_views(p.factorization) - Threads.@threads for ipart=1:np + @tasks for ipart=1:np ldiv!(facts[ipart],view(v,partitioning[ipart])) end else - Threads.@threads for ipart=1:np + @tasks for ipart=1:np vv=v[partitioning[ipart]] ldiv!(facts[ipart],vv) view(v,partitioning[ipart]).=vv @@ -85,11 +85,11 @@ function LinearAlgebra.ldiv!(u,p::BlockPreconditioner,v) np=length(partitioning) if allow_views(p.factorization) - Threads.@threads for ipart=1:np + @tasks for ipart=1:np ldiv!(view(u,partitioning[ipart]),facts[ipart],view(v,partitioning[ipart])) end else - Threads.@threads for ipart=1:np + @tasks for ipart=1:np uu=u[partitioning[ipart]] ldiv!(uu,facts[ipart],v[partitioning[ipart]]) view(u,partitioning[ipart]).=uu diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl index d278d8b..c9809d3 100644 --- a/src/factorizations/factorizations.jl +++ b/src/factorizations/factorizations.jl @@ -51,6 +51,48 @@ Determine if factorization is a solver or not issolver(::AbstractLUFactorization) = true issolver(::AbstractPreconditioner) = false + + +"""" + @makefrommatrix(fact) + +For an AbstractFactorization `MyFact`, provide methods +``` + MyFact(A::ExtendableSparseMatrix; kwargs...) + MyFact(A::SparseMatrixCSC; kwargs...) +``` +""" +macro makefrommatrix(fact) + return quote + function $(esc(fact))(A::ExtendableSparseMatrix; kwargs...) + factorize!($(esc(fact))(;kwargs...), A) + end + function $(esc(fact))(A::SparseMatrixCSC; kwargs...) + $(esc(fact))(ExtendableSparseMatrix(A); kwargs...) + end + end +end + +include("ilu0.jl") +include("iluzero.jl") +include("parallel_jacobi.jl") +include("parallel_ilu0.jl") +include("sparspak.jl") +include("blockpreconditioner.jl") +include("jacobi.jl") + +@eval begin + @makefrommatrix ILU0Preconditioner + @makefrommatrix ILUZeroPreconditioner + @makefrommatrix PointBlockILUZeroPreconditioner + @makefrommatrix JacobiPreconditioner + @makefrommatrix ParallelJacobiPreconditioner + @makefrommatrix ParallelILU0Preconditioner + @makefrommatrix SparspakLU + @makefrommatrix UpdateteableBlockpreconditioner + @makefrommatrix BlockPreconditioner +end + """ ``` factorize!(factorization, matrix) @@ -64,9 +106,9 @@ function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix) update!(p) p end +factorize!(p::AbstractFactorization, A::SparseMatrixCSC)=factorize!(p,ExtendableSparseMatrix(A)) -factorize!(p::AbstractFactorization, A::SparseMatrixCSC)=factorize!(p,ExtendableSparseMatrix(A)) """ ``` lu!(factorization, matrix) @@ -134,45 +176,6 @@ LinearAlgebra.ldiv!(fact::AbstractFactorization, v) = ldiv!(fact.factorization, -"""" - @makefrommatrix(fact) - -For an AbstractFactorization `MyFact`, provide methods -``` - MyFact(A::ExtendableSparseMatrix; kwargs...) - MyFact(A::SparseMatrixCSC; kwargs...) -``` -""" -macro makefrommatrix(fact) - return quote - function $(esc(fact))(A::ExtendableSparseMatrix; kwargs...) - factorize!($(esc(fact))(;kwargs...), A) - end - function $(esc(fact))(A::SparseMatrixCSC; kwargs...) - $(esc(fact))(ExtendableSparseMatrix(A); kwargs...) - end - end -end - -include("jacobi.jl") -include("ilu0.jl") -include("iluzero.jl") -include("parallel_jacobi.jl") -include("parallel_ilu0.jl") -include("sparspak.jl") -include("blockpreconditioner.jl") - -@eval begin - @makefrommatrix ILU0Preconditioner - @makefrommatrix ILUZeroPreconditioner - @makefrommatrix PointBlockILUZeroPreconditioner - @makefrommatrix JacobiPreconditioner - @makefrommatrix ParallelJacobiPreconditioner - @makefrommatrix ParallelILU0Preconditioner - @makefrommatrix SparspakLU - @makefrommatrix UpdateteableBlockpreconditioner - @makefrommatrix BlockPreconditioner -end if USE_GPL_LIBS #requires SuiteSparse which is not available in non-GPL builds diff --git a/src/matrix/abstractextendablesparsematrixcsc.jl b/src/matrix/abstractextendablesparsematrixcsc.jl new file mode 100644 index 0000000..491ebfb --- /dev/null +++ b/src/matrix/abstractextendablesparsematrixcsc.jl @@ -0,0 +1,299 @@ +""" + +Subtypes must implement: +- SparseArrays.sparse (may be should be sparse! ?) flush+return SparseMatrixCSC +- Constructor from SparseMatrixCSC +rawupdateindex! +reset!: empty all internals, just keep size +""" + +abstract type AbstractExtendableSparseMatrixCSC{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and return number of nonzeros in ext.cscmatrix. +""" +SparseArrays.nnz(ext::AbstractExtendableSparseMatrixCSC)=nnz(sparse(ext)) + +""" +$(SIGNATURES) + +[`flush!`](@ref) and return nonzeros in ext.cscmatrix. +""" +SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrixCSC)=nonzeros(sparse(ext)) + +Base.size(ext::AbstractExtendableSparseMatrixCSC)=size(ext.cscmatrix) + + + +""" +$(SIGNATURES) + +Return element type. +""" +Base.eltype(::AbstractExtendableSparseMatrixCSC{Tv, Ti}) where {Tv, Ti} = Tv + + + +""" +$(SIGNATURES) + + Create SparseMatrixCSC from ExtendableSparseMatrix +""" +SparseArrays.SparseMatrixCSC(A::AbstractExtendableSparseMatrixCSC)=sparse(A) + + + + +function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrixCSC) + A=sparse(ext) + xnnz = nnz(A) + m, n = size(A) + print(io, + m, + "×", + n, + " ", + typeof(ext), + " with ", + xnnz, + " stored ", + xnnz == 1 ? "entry" : "entries") + + if !haskey(io, :compact) + io = IOContext(io, :compact => true) + end + + if !(m == 0 || n == 0 || xnnz == 0) + print(io, ":\n") + Base.print_array(IOContext(io), A) + end +end + + +""" +$(SIGNATURES) + +[`flush!`](@ref) and return rowvals in ext.cscmatrix. +""" +SparseArrays.rowvals(ext::AbstractExtendableSparseMatrixCSC)=rowvals(sparse(ext)) + + +""" +$(SIGNATURES) + +[`flush!`](@ref) and return colptr of in ext.cscmatrix. +""" +SparseArrays.getcolptr(ext::AbstractExtendableSparseMatrixCSC)=getcolptr(sparse(ext)) + + +""" +$(SIGNATURES) + +[`flush!`](@ref) and return findnz(ext.cscmatrix). +""" +SparseArrays.findnz(ext::AbstractExtendableSparseMatrixCSC)=findnz(sparse(ext)) + + +@static if VERSION >= v"1.7" + SparseArrays._checkbuffers(ext::AbstractExtendableSparseMatrixCSC)= SparseArrays._checkbuffers(sparse(ext)) +end + +""" + A\b + +[`\\`](@ref) for ExtendableSparse. It calls the LU factorization form Sparspak.jl, unless GPL components +are allowed in the Julia sysimage and the floating point type of the matrix is Float64 or Complex64. +In that case, Julias standard `\` is called, which is realized via UMFPACK. +""" +function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrixCSC{Tv, Ti}, + b::AbstractVector) where {Tv, Ti} + SparspakLU(sparse(ext)) \ b +end + + +""" +$(SIGNATURES) + +[`\\`](@ref) for Symmetric{ExtendableSparse} +""" +function LinearAlgebra.:\(symm_ext::Symmetric{Tm, T}, + b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrixCSC{Tm,Ti}} + Symmetric(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ... +end + +""" +$(SIGNATURES) + +[`\\`](@ref) for Hermitian{ExtendableSparse} +""" +function LinearAlgebra.:\(symm_ext::Hermitian{Tm, T}, + b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrixCSC{Tm,Ti}} + Hermitian(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ... +end + +if USE_GPL_LIBS + for (Tv) in (:Float64, :ComplexF64) + @eval begin function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrixCSC{$Tv, Ti}, + B::AbstractVector) where {Ti} + sparse(ext) \ B + end end + + @eval begin function LinearAlgebra.:\(symm_ext::Symmetric{$Tv, + AbstractExtendableSparseMatrixCSC{ + $Tv, + Ti + }}, + B::AbstractVector) where {Ti} + symm_csc = Symmetric(sparse(symm_ext.data), Symbol(symm_ext.uplo)) + symm_csc \ B + end end + + @eval begin function LinearAlgebra.:\(symm_ext::Hermitian{$Tv, + AbstractExtendableSparseMatrixCSC{ + $Tv, + Ti + }}, + B::AbstractVector) where {Ti} + symm_csc = Hermitian(sparse(symm_ext.data), Symbol(symm_ext.uplo)) + symm_csc \ B + end end + end +end # USE_GPL_LIBS + +""" +$(SIGNATURES) + +[`flush!`](@ref) and ldiv with ext.cscmatrix +""" +function LinearAlgebra.ldiv!(r, ext::AbstractExtendableSparseMatrixCSC, x) + LinearAlgebra.ldiv!(r, sparse(ext), x) +end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and multiply with ext.cscmatrix +""" +function LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrixCSC, x) + LinearAlgebra.mul!(r, sparse(ext), x) +end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and calculate norm from cscmatrix +""" +function LinearAlgebra.norm(A::AbstractExtendableSparseMatrixCSC, p::Real = 2) + return LinearAlgebra.norm(sparse(A), p) +end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and calculate opnorm from cscmatrix +""" +function LinearAlgebra.opnorm(A::AbstractExtendableSparseMatrixCSC, p::Real = 2) + return LinearAlgebra.opnorm(sparse(A), p) +end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and calculate cond from cscmatrix +""" +function LinearAlgebra.cond(A::AbstractExtendableSparseMatrixCSC, p::Real = 2) + return LinearAlgebra.cond(sparse(A), p) +end + +""" +$(SIGNATURES) + +[`flush!`](@ref) and check for symmetry of cscmatrix +""" +function LinearAlgebra.issymmetric(A::AbstractExtendableSparseMatrixCSC) + return LinearAlgebra.issymmetric(sparse(A)) +end + + + + + + +function Base.:+(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC + T(sparse(A) + sparse(B)) +end + +function Base.:-(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC + T(sparse(A) - sparse(B)) +end + +function Base.:*(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC + T(sparse(A) * sparse(B)) +end + +""" +$(SIGNATURES) +""" +function Base.:*(d::Diagonal, ext::T)where T<:AbstractExtendableSparseMatrixCSC + return T(d * sparse(ext)) +end + +""" +$(SIGNATURES) +""" +function Base.:*(ext::T, d::Diagonal) where T<:AbstractExtendableSparseMatrixCSC + return T(sparse(ext) * d) +end + + +""" +$(SIGNATURES) + +Add SparseMatrixCSC matrix and [`ExtendableSparseMatrix`](@ref) ext. +""" +function Base.:+(ext::AbstractExtendableSparseMatrixCSC, csc::SparseMatrixCSC) + return sparse(ext) + csc +end + + +""" +$(SIGNATURES) + +Subtract SparseMatrixCSC matrix from [`ExtendableSparseMatrix`](@ref) ext. +""" +function Base.:-(ext::AbstractExtendableSparseMatrixCSC, csc::SparseMatrixCSC) + return sparse(ext) - csc +end + +""" +$(SIGNATURES) + +Subtract [`ExtendableSparseMatrix`](@ref) ext from SparseMatrixCSC. +""" +function Base.:-(csc::SparseMatrixCSC, ext::AbstractExtendableSparseMatrixCSC) + return csc - sparse(ext) +end + +""" +$(SIGNATURES) +""" +function SparseArrays.dropzeros!(ext::AbstractExtendableSparseMatrixCSC) + dropzeros!(sparse(ext)) +end + + + +function mark_dirichlet(A::AbstractExtendableSparseMatrixCSC;penalty=1.0e20) + mark_dirichlet(sparse(A);penalty) +end + +function eliminate_dirichlet(A::T,dirichlet) where T<:AbstractExtendableSparseMatrixCSC + T(eliminate_dirichlet(sparse(A),dirichlet)) +end + +function eliminate_dirichlet!(A::AbstractExtendableSparseMatrixCSC,dirichlet) + eliminate_dirichlet!(sparse(A),dirichlet) + A +end diff --git a/src/matrix/abstractsparsematrixextension.jl b/src/matrix/abstractsparsematrixextension.jl new file mode 100644 index 0000000..c206483 --- /dev/null +++ b/src/matrix/abstractsparsematrixextension.jl @@ -0,0 +1,19 @@ +""" + $(TYPEDEF) + +Abstract type for sparse matrix extension. + +Subtypes T_ext must implement: + +- Constructor `T_ext(m,n)` +- `SparseArrays.nnz(ext::T_ext)` +- `Base.size(ext::T_ext)` +- `Base.sum(extmatrices::Vector{T_ext}, csx)`: add csr or csc matrix and extension matrices (one per partition) and return csx matrix +- `Base.+(ext::T_ext, csx)` (optional) - Add extension matrix and csc/csr matrix, return csx matrix +- `rawupdateindex!(ext::Text, op, v, i, j, tid) where {Tv, Ti}`: Set `ext[i,j]op=v`, possibly insert new entry into matrix. `tid` is a +task or partition id + +""" +abstract type AbstractSparseMatrixExtension{Tv, Ti} <: AbstractSparseMatrix{Tv,Ti} end + +Base.:+(ext::AbstractSparseMatrixExtension, csx) = sum([ext],csx) diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl index cba26b3..d4c35b5 100644 --- a/src/matrix/extendable.jl +++ b/src/matrix/extendable.jl @@ -7,7 +7,7 @@ either in cscmatrix, or in lnkmatrix, never in both. $(TYPEDFIELDS) """ -mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractSparseMatrixCSC{Tv, Ti} +mutable struct ExtendableSparseMatrixCSC{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti} """ Final matrix data """ @@ -17,122 +17,118 @@ mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractSparseMatrix Linked list structure holding data of extension """ lnkmatrix::Union{SparseMatrixLNK{Tv, Ti}, Nothing} - + """ Pattern hash """ phash::UInt64 end + """ ``` -ExtendableSparseMatrix(Tv,Ti,m,n) -ExtendableSparseMatrix(Tv,m,n) -ExtendableSparseMatrix(m,n) +ExtendableSparseMatrixCSC(Tv,Ti,m,n) +ExtendableSparseMatrixCSC(Tv,m,n) +ExtendableSparseMatrixCSC(m,n) ``` -Create empty ExtendableSparseMatrix. This is equivalent to `spzeros(m,n)` for +Create empty ExtendableSparseMatrixCSC. This is equivalent to `spzeros(m,n)` for `SparseMartrixCSC`. """ -function ExtendableSparseMatrix{Tv, Ti}(m, n) where {Tv, Ti <: Integer} - ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0) +function ExtendableSparseMatrixCSC{Tv, Ti}(m, n) where {Tv, Ti <: Integer} + ExtendableSparseMatrixCSC{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0) end -function ExtendableSparseMatrix(valuetype::Type{Tv}, +function ExtendableSparseMatrixCSC(valuetype::Type{Tv}, indextype::Type{Ti}, m, n) where {Tv, Ti <: Integer} - ExtendableSparseMatrix{Tv, Ti}(m, n) + ExtendableSparseMatrixCSC{Tv, Ti}(m, n) end -function ExtendableSparseMatrix(valuetype::Type{Tv}, m, n) where {Tv} - ExtendableSparseMatrix{Tv, Int}(m, n) +function ExtendableSparseMatrixCSC(valuetype::Type{Tv}, m, n) where {Tv} + ExtendableSparseMatrixCSC{Tv, Int}(m, n) end -ExtendableSparseMatrix(m, n) = ExtendableSparseMatrix{Float64, Int}(m, n) +ExtendableSparseMatrixCSC(m, n) = ExtendableSparseMatrixCSC{Float64, Int}(m, n) """ $(SIGNATURES) - Create ExtendableSparseMatrix from SparseMatrixCSC +Create ExtendableSparseMatrixCSC from SparseMatrixCSC """ +function ExtendableSparseMatrixCSC(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} + ExtendableSparseMatrixCSC{Tv, Ti}(csc, nothing, phash(csc)) +end -function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} - return ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc)) +function ExtendableSparseMatrixCSC{Tv,Ti}(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer} + ExtendableSparseMatrixCSC{Tv, Ti}(csc, nothing, phash(csc)) end """ $(SIGNATURES) - Create ExtendableSparseMatrix from Diagonal + Create ExtendableSparseMatrixCSC from Diagonal """ -ExtendableSparseMatrix(D::Diagonal) = ExtendableSparseMatrix(sparse(D)) +ExtendableSparseMatrixCSC(D::Diagonal) = ExtendableSparseMatrixCSC(sparse(D)) """ $(SIGNATURES) - Create ExtendableSparseMatrix from AbstractMatrix, dropping all zero entries. + Create ExtendableSparseMatrixCSC from AbstractMatrix, dropping all zero entries. This is the equivalent to `sparse(A)`. """ -ExtendableSparseMatrix(A::AbstractMatrix) = ExtendableSparseMatrix(sparse(A)) +ExtendableSparseMatrixCSC(A::AbstractMatrix) = ExtendableSparseMatrixCSC(sparse(A)) """ - ExtendableSparseMatrix(I,J,V) - ExtendableSparseMatrix(I,J,V,m,n) - ExtendableSparseMatrix(I,J,V,combine::Function) - ExtendableSparseMatrix(I,J,V,m,n,combine::Function) + ExtendableSparseMatrixCSC(I,J,V) + ExtendableSparseMatrixCSC(I,J,V,m,n) + ExtendableSparseMatrixCSC(I,J,V,combine::Function) + ExtendableSparseMatrixCSC(I,J,V,m,n,combine::Function) -Create ExtendableSparseMatrix from triplet (COO) data. +Create ExtendableSparseMatrixCSC from triplet (COO) data. """ -ExtendableSparseMatrix(I, J, V::AbstractVector) = ExtendableSparseMatrix(sparse(I, J, V)) -function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n) - ExtendableSparseMatrix(sparse(I, J, V, m, n)) +ExtendableSparseMatrixCSC(I, J, V::AbstractVector) = ExtendableSparseMatrixCSC(sparse(I, J, V)) + +function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, m, n) + ExtendableSparseMatrixCSC(sparse(I, J, V, m, n)) end -function ExtendableSparseMatrix(I, J, V::AbstractVector, combine::Function) - ExtendableSparseMatrix(sparse(I, J, V, combine)) + +function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, combine::Function) + ExtendableSparseMatrixCSC(sparse(I, J, V, combine)) end -function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n, combine::Function) - ExtendableSparseMatrix(sparse(I, J, V, m, n, combine)) + +function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, m, n, combine::Function) + ExtendableSparseMatrixCSC(sparse(I, J, V, m, n, combine)) end # THese are probably too much... -# function Base.transpose(A::ExtendableSparseMatrix) +# function Base.transpose(A::ExtendableSparseMatrixCSC) # flush!(A) -# ExtendableSparseMatrix(Base.transpose(sparse(A))) +# ExtendableSparseMatrixCSC(Base.transpose(sparse(A))) # end -# function Base.adjoint(A::ExtendableSparseMatrix) +# function Base.adjoint(A::ExtendableSparseMatrixCSC) # flush!(A) -# ExtendableSparseMatrix(Base.adjoint(sparse(A))) +# ExtendableSparseMatrixCSC(Base.adjoint(sparse(A))) # end -# function SparseArrays.sparse(text::LinearAlgebra.Transpose{Tv,ExtendableSparseMatrix{Tv,Ti}}) where {Tv,Ti} +# function SparseArrays.sparse(text::LinearAlgebra.Transpose{Tv,ExtendableSparseMatrixCSC{Tv,Ti}}) where {Tv,Ti} # transpose(sparse(parent(text))) # end -""" -$(SIGNATURES) - - Create SparseMatrixCSC from ExtendableSparseMatrix -""" -function SparseArrays.SparseMatrixCSC(A::ExtendableSparseMatrix) - flush!(A) - A.cscmatrix -end - - """ $(SIGNATURES) Create similar but emtpy extendableSparseMatrix """ -function Base.similar(m::ExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti} - ExtendableSparseMatrix{Tv, Ti}(size(m)...) +function Base.similar(m::ExtendableSparseMatrixCSC{Tv, Ti}) where {Tv, Ti} + ExtendableSparseMatrixCSC{Tv, Ti}(size(m)...) end -function Base.similar(m::ExtendableSparseMatrix{Tv, Ti}, ::Type{T}) where {Tv, Ti, T} - ExtendableSparseMatrix{T, Ti}(size(m)...) +function Base.similar(m::ExtendableSparseMatrixCSC{Tv, Ti}, ::Type{T}) where {Tv, Ti, T} + ExtendableSparseMatrixCSC{T, Ti}(size(m)...) end """ @@ -144,7 +140,7 @@ search during acces: ```@example using ExtendableSparse # hide -A=ExtendableSparseMatrix(3,3) +A=ExtendableSparseMatrixCSC(3,3) A[1,2]+=0.1 A ``` @@ -152,7 +148,7 @@ A ```@example using ExtendableSparse # hide -A=ExtendableSparseMatrix(3,3) +A=ExtendableSparseMatrixCSC(3,3) updateindex!(A,+,0.1,1,2) A ``` @@ -160,7 +156,7 @@ A If `v` is zero, no new entry is created. """ -function updateindex!(ext::ExtendableSparseMatrix{Tv, Ti}, +function updateindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti}, op, v, i, @@ -182,19 +178,20 @@ $(SIGNATURES) Like [`updateindex!`](@ref) but without checking if v is zero. """ -function rawupdateindex!(ext::ExtendableSparseMatrix{Tv, Ti}, +function rawupdateindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti}, op, v, i, - j) where {Tv, Ti <: Integer} + j, + part=1) where {Tv, Ti <: Integer} k = findindex(ext.cscmatrix, i, j) if k > 0 ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) else - if ext.lnkmatrix == nothing - ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) - end - rawupdateindex!(ext.lnkmatrix, op, v, i, j) + if ext.lnkmatrix == nothing + ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n) + end + rawupdateindex!(ext.lnkmatrix, op, v, i, j) end ext end @@ -205,7 +202,7 @@ $(SIGNATURES) Find index in CSC matrix and set value if it exists. Otherwise, set index in extension if `v` is nonzero. """ -function Base.setindex!(ext::ExtendableSparseMatrix{Tv, Ti}, +function Base.setindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti}, v::Union{Number,AbstractVecOrMat}, i::Integer, j::Integer) where {Tv, Ti} @@ -226,7 +223,7 @@ $(SIGNATURES) Find index in CSC matrix and return value, if it exists. Otherwise, return value from extension. """ -function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti}, +function Base.getindex(ext::ExtendableSparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer) where {Tv, Ti <: Integer} k = findindex(ext.cscmatrix, i, j) @@ -235,46 +232,12 @@ function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti}, elseif ext.lnkmatrix == nothing return zero(Tv) else - return ext.lnkmatrix[i, j] + v=zero(Tv) + v=ext.lnkmatrix[i, j] end end -""" -$(SIGNATURES) -Size of ExtendableSparseMatrix. -""" -Base.size(ext::ExtendableSparseMatrix) = (ext.cscmatrix.m, ext.cscmatrix.n) - -""" -$(SIGNATURES) - -Show matrix -""" -function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrix) - flush!(ext) - xnnz = nnz(ext) - m, n = size(ext) - print(io, - m, - "×", - n, - " ", - typeof(ext), - " with ", - xnnz, - " stored ", - xnnz == 1 ? "entry" : "entries") - - if !haskey(io, :compact) - io = IOContext(io, :compact => true) - end - - if !(m == 0 || n == 0 || xnnz == 0) - print(io, ":\n") - Base.print_array(IOContext(io), ext.cscmatrix) - end -end """ $(SIGNATURES) @@ -282,7 +245,7 @@ $(SIGNATURES) If there are new entries in extension, create new CSC matrix by adding the cscmatrix and linked list matrix and reset the linked list based extension. """ -function flush!(ext::ExtendableSparseMatrix) +function flush!(ext::ExtendableSparseMatrixCSC) if ext.lnkmatrix != nothing && nnz(ext.lnkmatrix) > 0 ext.cscmatrix = ext.lnkmatrix + ext.cscmatrix ext.lnkmatrix = nothing @@ -291,275 +254,33 @@ function flush!(ext::ExtendableSparseMatrix) return ext end -""" -$(SIGNATURES) -[`flush!`](@ref) and return number of nonzeros in ext.cscmatrix. -""" -function SparseArrays.nnz(ext::ExtendableSparseMatrix) +function SparseArrays.sparse(ext::ExtendableSparseMatrixCSC) flush!(ext) - return nnz(ext.cscmatrix) + ext.cscmatrix end -""" -$(SIGNATURES) - -[`flush!`](@ref) and return nonzeros in ext.cscmatrix. -""" -function SparseArrays.nonzeros(ext::ExtendableSparseMatrix) - flush!(ext) - return nonzeros(ext.cscmatrix) -end """ $(SIGNATURES) -Return element type. -""" -Base.eltype(::ExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti} = Tv - +Reset ExtenableSparseMatrix into state similar to that after creation. """ -$(SIGNATURES) - -[`flush!`](@ref) and return rowvals in ext.cscmatrix. -""" -function SparseArrays.rowvals(ext::ExtendableSparseMatrix) - flush!(ext) - rowvals(ext.cscmatrix) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and return colptr of in ext.cscmatrix. -""" -function SparseArrays.getcolptr(ext::ExtendableSparseMatrix) - flush!(ext) - return getcolptr(ext.cscmatrix) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and return findnz(ext.cscmatrix). -""" -function SparseArrays.findnz(ext::ExtendableSparseMatrix) - flush!(ext) - return findnz(ext.cscmatrix) -end - -@static if VERSION >= v"1.7" - function SparseArrays._checkbuffers(ext::ExtendableSparseMatrix) - flush!(ext) - SparseArrays._checkbuffers(ext.cscmatrix) - end -end - -""" - A\b - -[`\\`](@ref) for ExtendableSparse. It calls the LU factorization form Sparspak.jl, unless GPL components -are allowed in the Julia sysimage and the floating point type of the matrix is Float64 or Complex64. -In that case, Julias standard `\` is called, which is realized via UMFPACK. -""" -function LinearAlgebra.:\(ext::ExtendableSparseMatrix{Tv, Ti}, - b::AbstractVector) where {Tv, Ti} - flush!(ext) - SparspakLU(ext) \ b +function reset!(A::ExtendableSparseMatrixCSC) + A.cscmatrix=spzeros(size(A)...) + A.lnkmatrix=nothing end -""" -$(SIGNATURES) -[`\\`](@ref) for Symmetric{ExtendableSparse} -""" -function LinearAlgebra.:\(symm_ext::Symmetric{Tm, ExtendableSparseMatrix{Tm, Ti}}, - b::AbstractVector) where {Tm, Ti} - symm_ext.data \ b # no ldlt yet ... -end """ $(SIGNATURES) - -[`\\`](@ref) for Hermitian{ExtendableSparse} -""" -function LinearAlgebra.:\(symm_ext::Hermitian{Tm, ExtendableSparseMatrix{Tm, Ti}}, - b::AbstractVector) where {Tm, Ti} - symm_ext.data \ B # no ldlt yet ... -end - -if USE_GPL_LIBS - for (Tv) in (:Float64, :ComplexF64) - @eval begin function LinearAlgebra.:\(ext::ExtendableSparseMatrix{$Tv, Ti}, - B::AbstractVector) where {Ti} - flush!(ext) - ext.cscmatrix \ B - end end - - @eval begin function LinearAlgebra.:\(symm_ext::Symmetric{$Tv, - ExtendableSparseMatrix{ - $Tv, - Ti - }}, - B::AbstractVector) where {Ti} - flush!(symm_ext.data) - symm_csc = Symmetric(symm_ext.data.cscmatrix, Symbol(symm_ext.uplo)) - symm_csc \ B - end end - - @eval begin function LinearAlgebra.:\(symm_ext::Hermitian{$Tv, - ExtendableSparseMatrix{ - $Tv, - Ti - }}, - B::AbstractVector) where {Ti} - flush!(symm_ext.data) - symm_csc = Hermitian(symm_ext.data.cscmatrix, Symbol(symm_ext.uplo)) - symm_csc \ B - end end - end -end # USE_GPL_LIBS - -""" -$(SIGNATURES) - -[`flush!`](@ref) and ldiv with ext.cscmatrix -""" -function LinearAlgebra.ldiv!(r, ext::ExtendableSparse.ExtendableSparseMatrix, x) - flush!(ext) - return LinearAlgebra.ldiv!(r, ext.cscmatrix, x) -end - """ -$(SIGNATURES) - -[`flush!`](@ref) and multiply with ext.cscmatrix -""" -function LinearAlgebra.mul!(r, ext::ExtendableSparse.ExtendableSparseMatrix, x) - flush!(ext) - return LinearAlgebra.mul!(r, ext.cscmatrix, x) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and calculate norm from cscmatrix -""" -function LinearAlgebra.norm(A::ExtendableSparseMatrix, p::Real = 2) - flush!(A) - return LinearAlgebra.norm(A.cscmatrix, p) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and calculate opnorm from cscmatrix -""" -function LinearAlgebra.opnorm(A::ExtendableSparseMatrix, p::Real = 2) - flush!(A) - return LinearAlgebra.opnorm(A.cscmatrix, p) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and calculate cond from cscmatrix -""" -function LinearAlgebra.cond(A::ExtendableSparseMatrix, p::Real = 2) - flush!(A) - return LinearAlgebra.cond(A.cscmatrix, p) -end - -""" -$(SIGNATURES) - -[`flush!`](@ref) and check for symmetry of cscmatrix -""" -function LinearAlgebra.issymmetric(A::ExtendableSparseMatrix) - flush!(A) - return LinearAlgebra.issymmetric(A.cscmatrix) -end - -""" -$(SIGNATURES) - -Add SparseMatrixCSC matrix and [`ExtendableSparseMatrix`](@ref) ext. -""" -function Base.:+(ext::ExtendableSparseMatrix, csc::SparseMatrixCSC) - flush!(ext) - return ext.cscmatrix + csc -end - -function Base.:+(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix) - flush!(A) - flush!(B) - return ExtendableSparseMatrix(A.cscmatrix + B.cscmatrix) -end - -function Base.:-(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix) - flush!(A) - flush!(B) - return ExtendableSparseMatrix(A.cscmatrix - B.cscmatrix) -end - -function Base.:*(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix) - flush!(A) - flush!(B) - return ExtendableSparseMatrix(A.cscmatrix * B.cscmatrix) -end - -""" -$(SIGNATURES) -""" -function Base.:*(d::Diagonal, ext::ExtendableSparseMatrix) - flush!(ext) - return ExtendableSparseMatrix(d * ext.cscmatrix) -end - -""" -$(SIGNATURES) -""" -function Base.:*(ext::ExtendableSparseMatrix, d::Diagonal) - flush!(ext) - return ExtendableSparseMatrix(ext.cscmatrix * d) -end - -""" -$(SIGNATURES) - -Subtract SparseMatrixCSC matrix from [`ExtendableSparseMatrix`](@ref) ext. -""" -function Base.:-(ext::ExtendableSparseMatrix, csc::SparseMatrixCSC) - flush!(ext) - return ext.cscmatrix - csc -end - -""" -$(SIGNATURES) - -Subtract [`ExtendableSparseMatrix`](@ref) ext from SparseMatrixCSC. -""" -function Base.:-(csc::SparseMatrixCSC, ext::ExtendableSparseMatrix) - flush!(ext) - return csc - ext.cscmatrix -end - -""" -$(SIGNATURES) -""" -function SparseArrays.dropzeros!(ext::ExtendableSparseMatrix) - flush!(ext) - dropzeros!(ext.cscmatrix) -end - -""" -$(SIGNATURES) -""" -function Base.copy(S::ExtendableSparseMatrix) +function Base.copy(S::ExtendableSparseMatrixCSC) if isnothing(S.lnkmatrix) - ExtendableSparseMatrix(copy(S.cscmatrix), nothing, S.phash) + ExtendableSparseMatrixCSC(copy(S.cscmatrix), nothing,S.phash) else - ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash) + ExtendableSparseMatrixCSC(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash) end end @@ -568,7 +289,7 @@ end Create a pointblock matrix. """ -function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti} +function pointblock(A0::ExtendableSparseMatrixCSC{Tv,Ti},blocksize) where {Tv,Ti} A=SparseMatrixCSC(A0) colptr=A.colptr rowval=A.rowval @@ -578,7 +299,7 @@ function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti} nblock=n÷blocksize b=SMatrix{blocksize,blocksize}(block) Tb=typeof(b) - Ab=ExtendableSparseMatrix{Tb,Ti}(nblock,nblock) + Ab=ExtendableSparseMatrixCSC{Tb,Ti}(nblock,nblock) for i=1:n @@ -597,19 +318,3 @@ function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti} end -function mark_dirichlet(A::ExtendableSparseMatrix;penalty=1.0e20) - flush!(A) - mark_dirichlet(A.cscmatrix;penalty) -end - -function eliminate_dirichlet(A::ExtendableSparseMatrix,dirichlet) - flush!(A) - ExtendableSparseMatrix(eliminate_dirichlet(A.cscmatrix,dirichlet)) -end - -function eliminate_dirichlet!(A::ExtendableSparseMatrix,dirichlet) - flush!(A) - eliminate_dirichlet!(A.cscmatrix,dirichlet) - A -end - diff --git a/src/matrix/genericextendablesparsematrixcsc.jl b/src/matrix/genericextendablesparsematrixcsc.jl new file mode 100644 index 0000000..457413e --- /dev/null +++ b/src/matrix/genericextendablesparsematrixcsc.jl @@ -0,0 +1,93 @@ +mutable struct GenericExtendableSparseMatrixCSC{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti} + """ + Final matrix data + """ + cscmatrix::SparseMatrixCSC{Tv, Ti} + + """ + Matrix for new entries + """ + xmatrix::Tm +end + + +function GenericExtendableSparseMatrixCSC{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti<:Integer} + GenericExtendableSparseMatrixCSC(spzeros(Tv, Ti, m, n), + Tm(m,n) + ) +end + + +nnznew(ext::GenericExtendableSparseMatrixCSC)=nnz(ext.xmatrix) + +function reset!(ext::GenericExtendableSparseMatrixCSC{Tm,Tv,Ti}) where {Tm,Tv,Ti} + m,n=size(ext.cscmatrix) + ext.cscmatrix=spzeros(Tv, Ti, m, n) + ext.xmatrix=Tm(m,n) + ext +end + + +function flush!(ext::GenericExtendableSparseMatrixCSC{Tm,Tv,Ti}) where{Tm,Tv,Ti} + if nnz(ext.xmatrix)>0 + ext.cscmatrix=ext.xmatrix+ext.cscmatrix + ext.xmatrix=Tm(size(ext.cscmatrix)...) + end + ext +end + +function SparseArrays.sparse(ext::GenericExtendableSparseMatrixCSC) + flush!(ext) + ext.cscmatrix +end + +function Base.setindex!(ext::GenericExtendableSparseMatrixCSC, + v::Union{Number,AbstractVecOrMat}, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = v + else + setindex!(ext.xmatrix,v,i,j) + end +end + + +function Base.getindex(ext::GenericExtendableSparseMatrixCSC, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] + else + getindex(ext.xmatrix,i,j) + end +end + +function rawupdateindex!(ext::GenericExtendableSparseMatrixCSC, + op, + v, + i, + j) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + rawupdateindex!(ext.xmatrix,op,v,i,j) + end +end + +function updateindex!(ext::GenericExtendableSparseMatrixCSC, + op, + v, + i, + j) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + updateindex!(ext.xmatrix,op,v,i,j) + end +end + diff --git a/src/matrix/genericmtextendablesparsematrixcsc.jl b/src/matrix/genericmtextendablesparsematrixcsc.jl new file mode 100644 index 0000000..88a4d68 --- /dev/null +++ b/src/matrix/genericmtextendablesparsematrixcsc.jl @@ -0,0 +1,143 @@ +mutable struct GenericMTExtendableSparseMatrixCSC{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti} + """ + Final matrix data + """ + cscmatrix::SparseMatrixCSC{Tv, Ti} + + """ + Vector of dictionaries for new entries + """ + xmatrices::Vector{Tm} + + colparts::Vector{Ti} + partnodes::Vector{Ti} +end + +function GenericMTExtendableSparseMatrixCSC{Tm, Tv, Ti}(n,m,p::Integer=1) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti} + GenericMTExtendableSparseMatrixCSC(spzeros(Tv, Ti, m, n), + [Tm(m,n) for i=1:p], + Ti[1,2], + Ti[1,n+1], + ) +end + +function partitioning!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti}, colparts, partnodes) where {Tm, Tv, Ti} + ext.partnodes=partnodes + ext.colparts=colparts + ext +end + + +function reset!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti},p::Integer) where {Tm,Tv,Ti} + m,n=size(ext.cscmatrix) + ext.cscmatrix=spzeros(Tv, Ti, m, n) + ext.xmatrices=[Tm(m,n) for i=1:p] + ext.colparts=Ti[1,2] + ext.partnodes=Ti[1,n+1] + ext +end + +function reset!(ext::GenericMTExtendableSparseMatrixCSC) + reset!(ext,length(ext.xmatrices)) +end + + +function flush!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti}) where{Tm,Tv,Ti} + ext.cscmatrix=Base.sum(ext.xmatrices, ext.cscmatrix) + np=length(ext.xmatrices) + (m,n)=size(ext.cscmatrix) + ext.xmatrices=[Tm(m,n) for i=1:np] + ext +end + + +function SparseArrays.sparse(ext::GenericMTExtendableSparseMatrixCSC) + flush!(ext) + ext.cscmatrix +end + +function Base.setindex!(ext::GenericMTExtendableSparseMatrixCSC, + v::Union{Number,AbstractVecOrMat}, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = v + else + error("use rawupdateindex! for new entries into GenericMTExtendableSparseMatrixCSC") + end +end + +function Base.getindex(ext::GenericMTExtendableSparseMatrixCSC, + i::Integer, + j::Integer) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + return ext.cscmatrix.nzval[k] + elseif sum(nnz,ext.xmatrices) == 0 + return zero(eltype(ext.cscmatrix)) + else + error("flush! GenericMTExtendableSparseMatrixCSC before using getindex") + end +end + +nnznew(ext::GenericMTExtendableSparseMatrixCSC)=sum(nnz,ext.xmatrices) + + +function rawupdateindex!(ext::GenericMTExtendableSparseMatrixCSC, + op, + v, + i, + j, + tid=1) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + rawupdateindex!(ext.xmatrices[tid],op,v,i,j) + end +end + + +function updateindex!(ext::GenericMTExtendableSparseMatrixCSC, + op, + v, + i, + j, + tid=1) + k = findindex(ext.cscmatrix, i, j) + if k > 0 + ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v) + else + updateindex!(ext.xmatrices[tid],op,v,i,j) + end +end + + + + +# Needed in 1.9 +function Base.:*(ext::GenericMTExtendableSparseMatrixCSC{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA + mul!(similar(x),ext,x) +end + +function LinearAlgebra.mul!(r, ext::GenericMTExtendableSparseMatrixCSC, x) + flush!(ext) + A=ext.cscmatrix + colparts=ext.colparts + partnodes=ext.partnodes + rows = SparseArrays.rowvals(A) + vals = nonzeros(A) + r.=zero(eltype(ext)) + m,n=size(A) + for icol=1:length(colparts)-1 + @tasks for ip in colparts[icol]:colparts[icol+1]-1 + @inbounds for inode in partnodes[ip]:partnodes[ip+1]-1 + @inbounds for i in nzrange(A,inode) + r[rows[i]]+=vals[i]*x[inode] + end + end + end + end + r +end diff --git a/src/matrix/sparsematrixdict.jl b/src/matrix/sparsematrixdict.jl new file mode 100644 index 0000000..c5ee469 --- /dev/null +++ b/src/matrix/sparsematrixdict.jl @@ -0,0 +1,122 @@ +""" + $(TYPEDEF) + +Sparse matrix where entries are organized as dictionary. +""" +mutable struct SparseMatrixDict{Tv,Ti} <: AbstractSparseMatrixExtension{Tv,Ti} + m::Ti + n::Ti + values::Dict{Pair{Ti,Ti}, Tv} + SparseMatrixDict{Tv,Ti}(m,n) where {Tv,Ti} = new(m,n,Dict{Pair{Ti,Ti}, Tv}()) +end + +function reset!(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} + m.values=Dict{Pair{Ti,Ti}, Tv}() +end + +function Base.setindex!(m::SparseMatrixDict,v,i,j) + m.values[Pair(i,j)]=v +end + +function rawupdateindex!(m::SparseMatrixDict{Tv,Ti},op,v,i,j) where {Tv,Ti} + p=Pair(i,j) + m.values[p]=op(get(m.values, p, zero(Tv)),v) +end + +function Base.getindex(m::SparseMatrixDict{Tv},i,j) where Tv + get(m.values,Pair(i,j),zero(Tv)) +end + +Base.size(m::SparseMatrixDict)=(m.m,m.n) + +SparseArrays.nnz(m::SparseMatrixDict)=length(m.values) + +function SparseArrays.sparse(mat::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} + l=length(mat.values) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + for (p,v) in mat.values + I[i]=first(p) + J[i]=last(p) + V[i]=v + i=i+1 + end + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,size(mat)...,+) + else + return SparseArrays.sparse(I,J,V,size(mat)...,+) + end +end + +function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} + lnew=length(dictmatrix.values) + if lnew>0 + (;colptr,nzval,rowval,m,n)=cscmatrix + l=lnew+nnz(cscmatrix) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + + for (p,v) in dictmatrix.values + I[i]=first(p) + J[i]=last(p) + V[i]=v + i=i+1 + end + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse(I,J,V,m,n,+) + end + end + cscmatrix +end + +function Base.sum(dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} + lnew=sum(m->length(m.values),dictmatrices) + if lnew>0 + (;colptr,nzval,rowval,m,n)=cscmatrix + l=lnew+nnz(cscmatrix) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + + ip=1 + for m in dictmatrices + for (p,v) in m.values + I[i]=first(p) + J[i]=last(p) + V[i]=v + i=i+1 + end + ip=ip+1 + end + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse(I,J,V,m,n,+) + end + end + return cscmatrix +end diff --git a/src/matrix/sparsematrixdilnkc.jl b/src/matrix/sparsematrixdilnkc.jl new file mode 100644 index 0000000..ea58534 --- /dev/null +++ b/src/matrix/sparsematrixdilnkc.jl @@ -0,0 +1,472 @@ +""" + $(TYPEDEF) + + Modification of SparseMatrixLNK where the pointer to first index of +column j is stored in a dictionary. + """ +mutable struct SparseMatrixDILNKC{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti} + """ + Number of rows + """ + m::Ti + + """ + Number of columns + """ + n::Ti + + """ + Number of nonzeros + """ + nnz::Ti + + """ + Length of arrays + """ + nentries::Ti + + """ + Linked list of column entries. Initial length is n, + it grows with each new entry. + + colptr[index] contains the next + index in the list or zero, in the later case terminating the list which + starts at index 1<=j<=n for each column j. + """ + colptr::Vector{Ti} + + """ + Dictionary to store start indices of columns + """ + colstart::Dict{Ti,Ti} + + """ + Row numbers. For each index it contains the zero (initial state) + or the row numbers corresponding to the column entry list in colptr. + """ + rowval::Vector{Ti} + + """ + Nonzero entry values correspondin to each pair + (colptr[index],rowval[index]) + """ + nzval::Vector{Tv} +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +function SparseMatrixDILNKC{Tv, Ti}(m, n) where {Tv, Ti <: Integer} + SparseMatrixDILNKC{Tv, Ti}(m, n, 0, 0, zeros(Ti,10), Dict{Ti,Ti}(), zeros(Ti,10), zeros(Ti,10)) +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +function SparseMatrixDILNKC(valuetype::Type{Tv}, indextype::Type{Ti}, m, + n) where {Tv, Ti <: Integer} + SparseMatrixDILNKC{Tv, Ti}(m, n) +end + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +SparseMatrixDILNKC(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixDILNKC(Tv, Int, m, n) + +""" +$(SIGNATURES) + +Constructor of empty matrix. +""" +SparseMatrixDILNKC(m, n) = SparseMatrixDILNKC(Float64, m, n) + +""" +$(SIGNATURES) + +Constructor from SparseMatrixCSC. + +""" +function SparseMatrixDILNKC(csc::SparseArrays.SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: + Integer} + lnk = SparseMatrixDILNKC{Tv, Ti}(csc.m, csc.n) + for j = 1:(csc.n) + for k = csc.colptr[j]:(csc.colptr[j + 1] - 1) + lnk[csc.rowval[k], j] = csc.nzval[k] + end + end + lnk +end + +""" +$(SIGNATURES) + +Find index in matrix. +""" +function findindex(lnk::SparseMatrixDILNKC, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + k = get(lnk.colstart, j, 0) + if k==0 + return 0,0 + end + k0 = k + while k > 0 + if lnk.rowval[k] == i + return k, 0 + end + k0 = k + k = lnk.colptr[k] + end + return 0, k0 +end + +""" +$(SIGNATURES) + +Return value stored for entry or zero if not found +""" +function Base.getindex(lnk::SparseMatrixDILNKC{Tv, Ti}, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k == 0 + return zero(Tv) + else + return lnk.nzval[k] + end +end + +""" + $(SIGNATURES) + +Add entry. +""" +function addentry!(lnk::SparseMatrixDILNKC, i, j, k, k0) + # increase number of entries + lnk.nentries += 1 + if length(lnk.nzval) < lnk.nentries + newsize = Int(ceil(5.0 * lnk.nentries / 4.0)) + resize!(lnk.nzval, newsize) + resize!(lnk.rowval, newsize) + resize!(lnk.colptr, newsize) + end + + if k0==0 + lnk.colstart[j]=lnk.nentries + end + + # Append entry if not found + lnk.rowval[lnk.nentries] = i + + # Shift the end of the list + lnk.colptr[lnk.nentries] = 0 + + if k0>0 + lnk.colptr[k0] = lnk.nentries + end + + # Update number of nonzero entries + lnk.nnz += 1 + return lnk.nentries +end + +""" +$(SIGNATURES) + +Update value of existing entry, otherwise extend matrix if v is nonzero. +""" +function Base.setindex!(lnk::SparseMatrixDILNKC, v, i, j) + if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n)) + throw(BoundsError(lnk, (i, j))) + end + + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = v + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = v + end + return lnk +end + +""" +$(SIGNATURES) + +Update element of the matrix with operation `op`. +It assumes that `op(0,0)==0`. If `v` is zero, no new +entry is created. +""" +function updateindex!(lnk::SparseMatrixDILNKC{Tv, Ti}, op, v, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + return lnk + end + if !iszero(v) + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +""" +$(SIGNATURES) + +Update element of the matrix with operation `op`. +It assumes that `op(0,0)==0`. If `v` is zero a new entry +is created nevertheless. +""" +function rawupdateindex!(lnk::SparseMatrixDILNKC{Tv, Ti}, op, v, i, j) where {Tv, Ti} + k, k0 = findindex(lnk, i, j) + if k > 0 + lnk.nzval[k] = op(lnk.nzval[k], v) + else + k = addentry!(lnk, i, j, k, k0) + lnk.nzval[k] = op(zero(Tv), v) + end + lnk +end + +""" +$(SIGNATURES) + +Return tuple containing size of the matrix. +""" +Base.size(lnk::SparseMatrixDILNKC) = (lnk.m, lnk.n) + +""" +$(SIGNATURES) + +Return number of nonzero entries. +""" +SparseArrays.nnz(lnk::SparseMatrixDILNKC) = lnk.nnz + + +""" + $(SIGNATURES) +Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V. +""" +function add_via_COO(lnk::SparseMatrixDILNKC{Tv, Ti}, + csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} + (;colptr,nzval,rowval,m,n)=csc + l=nnz(lnk)+nnz(csc) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + if nnz(csc)>0 + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=icsc + J[i]=rowval[j] + V[i]=nzval[j] + i=i+1 + end + end + end + for (j,k) in lnk.colstart + while k>0 + I[i]=lnk.rowval[k] + J[i]=j + V[i]=lnk.nzval[k] + k=lnk.colptr[k] + i=i+1 + end + end + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse(I,J,V,m,n,+) + end +end + + +""" + $(SIGNATURES) +Add lnk and csc without creation of intermediate data. +(to be fixed) +""" +function add_directly(lnk::SparseMatrixDILNKC{Tv, Ti}, + csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer} + @assert(csc.m==lnk.m) + @assert(csc.n==lnk.n) + + # overallocate arrays in order to avoid + # presumably slower push! + xnnz = nnz(csc) + nnz(lnk) + colptr = Vector{Ti}(undef, csc.n + 1) + rowval = Vector{Ti}(undef, xnnz) + nzval = Vector{Tv}(undef, xnnz) + + # Detect the maximum column length of lnk + lnk_maxcol = 0 + for (j,k) in lnk.colstart + lcol = zero(Ti) + while k > 0 + lcol += 1 + k = lnk.colptr[k] + end + lnk_maxcol = max(lcol, lnk_maxcol) + end + + # pre-allocate column data + col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol] + + inz = 1 # counts the nonzero entries in the new matrix + + in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1]) + + in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col) + + # loop over all columns + for j = 1:(csc.n) + # Copy extension entries into col and sort them + k = get(lnk.colstart, j, 0) + l_lnk_col = 0 + while k > 0 + if lnk.rowval[k] > 0 + l_lnk_col += 1 + col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k]) + end + k = lnk.colptr[k] + end + sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward) + + # jointly sort lnk and csc entries into new matrix data + # this could be replaced in a more transparent manner by joint sorting: + # make a joint array for csc and lnk col, sort them. + # Will this be faster? + + colptr[j] = inz + jlnk = one(Ti) # counts the entries in col + jcsc = csc.colptr[j] # counts entries in csc + + while true + if in_csc_col(jcsc, j) && + (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval || + !in_lnk_col(jlnk, l_lnk_col)) + # Insert entries from csc into new structure + rowval[inz] = csc.rowval[jcsc] + nzval[inz] = csc.nzval[jcsc] + jcsc += 1 + inz += 1 + elseif in_csc_col(jcsc, j) && + (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval) + # Add up entries from csc and lnk + rowval[inz] = csc.rowval[jcsc] + nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval + jcsc += 1 + inz += 1 + jlnk += 1 + elseif in_lnk_col(jlnk, l_lnk_col) + # Insert entries from lnk res. col into new structure + rowval[inz] = col[jlnk].rowval + nzval[inz] = col[jlnk].nzval + jlnk += 1 + inz += 1 + else + break + end + end + end + colptr[csc.n + 1] = inz + resize!(rowval, inz - 1) + resize!(nzval, inz - 1) + SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval) +end + + + +""" + $(SIGNATURES) + +Add SparseMatrixCSC matrix and [`SparseMatrixDILNKC`](@ref) lnk, returning a SparseMatrixCSC +""" +#Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = add_directly(lnk, csc) +Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = sum([lnk],csc) + +function Base.sum(lnkdictmatrices::Vector{SparseMatrixDILNKC{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} + lnew=sum(nnz,lnkdictmatrices) + if lnew>0 + (;colptr,nzval,rowval,m,n)=cscmatrix + l=lnew+nnz(cscmatrix) + I=Vector{Ti}(undef,l) + J=Vector{Ti}(undef,l) + V=Vector{Tv}(undef,l) + i=1 + + for icsc=1:length(colptr)-1 + for j=colptr[icsc]:colptr[icsc+1]-1 + I[i]=rowval[j] + J[i]=icsc + V[i]=nzval[j] + i=i+1 + end + end + + for lnk in lnkdictmatrices + for (j,k) in lnk.colstart + while k>0 + I[i]=lnk.rowval[k] + J[i]=j + V[i]=lnk.nzval[k] + k=lnk.colptr[k] + i=i+1 + end + end + end + @assert l==i-1 + @static if VERSION>=v"1.10" + return SparseArrays.sparse!(I,J,V,m,n,+) + else + return SparseArrays.sparse(I,J,V,m,n,+) + end + end + return cscmatrix +end + +function reset!(m::SparseMatrixDILNKC{Tv,Ti}) where {Tv,Ti} + m.nnz=0 + m.nentries=0 + m.colptr=zeros(Ti,10) + m.colstart::Dict{Ti,Ti} + m.rowval=zeros(Ti,10) + m.nzval=zeros(Ti,10) + m +end + + +""" +$(SIGNATURES) + +Constructor from SparseMatrixDILNKC. + +""" +function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixDILNKC)::SparseMatrixCSC + csc = spzeros(lnk.m, lnk.n) + lnk + csc +end + +function SparseArrays.sparse(lnk::SparseMatrixDILNKC) + lnk + spzeros(lnk.m, lnk.n) +end + +function Base.copy(S::SparseMatrixDILNKC) + SparseMatrixDILNKC(size(S, 1), + size(S, 2), + S.nnz, + S.nentries, + copy(S.colptr), + copy(S.colstart), + copy(S.rowvals), + copy(S.nzval)) +end diff --git a/src/matrix/sparsematrixlnk.jl b/src/matrix/sparsematrixlnk.jl index bc45abc..2ff3f05 100644 --- a/src/matrix/sparsematrixlnk.jl +++ b/src/matrix/sparsematrixlnk.jl @@ -18,7 +18,7 @@ can be conveniently updated via `push!`. No copying of existing data is necessa $(TYPEDFIELDS) """ -mutable struct SparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti} +mutable struct SparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti} """ Number of rows """ @@ -278,7 +278,7 @@ end # Struct holding pair of value and row # number, for sorting -struct ColEntry{Tv, Ti <: Integer} +mutable struct ColEntry{Tv, Ti <: Integer} rowval::Ti nzval::Tv end diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallel.jl new file mode 100644 index 0000000..45b05a9 --- /dev/null +++ b/test/ExperimentalParallel.jl @@ -0,0 +1,486 @@ +module ExperimentalParallel + +using ExtendableSparse,SparseArrays +using ExtendableSparse.Experimental +using BenchmarkTools +using OhMyThreads: @tasks +using Test + +import ChunkSplitters +# Methods to test parallel assembly +# Will eventually become part of the package. + +""" + +Return colored partitioing of grid made up by `X` and `Y` for work with `max(nt,4)` threads +as a vector `p` of a vector pairs of index ranges such that `p[i]` containes partions +of color i which can be assembled independently. + +The current algorithm creates `nt^2` partitions with `nt` colors. +""" +function part2d(X,Y, nt) + nt=max(4,nt) + XP=collect(ChunkSplitters.chunks(1:length(X)-1,n=nt)) + YP=collect(ChunkSplitters.chunks(1:length(Y)-1,n=nt)) + partitions = [Tuple{StepRange{Int64}, StepRange{Int64}}[] for i = 1:nt] + ipart=1 + col=1 + for jp=1:nt + for ip=1:nt + push!(partitions[col], (XP[ip], YP[jp])) + col=(col -1 +1 )%nt+1 + end + col=(col -1 +2)%nt+1 + end + partitions +end + +function colpart2d(X,Y,nt) + Nx=length(X) + Ny=length(Y) + p=part2d(X,Y,nt) + pc=zeros(Int,sum(length,p)) + jp=1 + for icol=1:length(p) + for ip=1:length(p[icol]) + pc[jp]=icol + jp+=1 + end + end + p,pc +end + + +""" + showgrid(Makie, ColorSchemes, X,Y,nt) + +Show grid partitioned according to [`part2d`](@ref). Needs a makie variant and ColorSchemes +to be passed as modules. +""" +function showgrid(Makie, ColorSchemes, X,Y,nt) + f = Makie.Figure() + ax = Makie.Axis(f[1, 1]; aspect = 1) + p=part2d(X,Y,nt) + ncol=length(p) + @show sum(length,p), ncol + colors=get(ColorSchemes.rainbow,collect(1:ncol)/ncol) + poly=Vector{Makie.Point2f}(undef,4) + for icol = 1:ncol + for (xp, yp) in p[icol] + for j in yp + for i in xp + poly[1]=Makie.Point2f(X[i], Y[j]) + poly[2]=Makie.Point2f(X[i + 1], Y[j]) + poly[3]=Makie.Point2f(X[i + 1], Y[j + 1]) + poly[4]=Makie.Point2f(X[i], Y[j + 1]) + Makie.poly!(copy(poly),color = colors[icol]) + end + end + end + end + f +end + + +""" + +Assemble edge for finite volume laplacian. +Used by [`partassemble!`](@ref). +""" +function assembleedge!(A,v,k,l) + rawupdateindex!(A,+,v,k,k) + rawupdateindex!(A,+,-v,k,l) + rawupdateindex!(A,+,-v,l,k) + rawupdateindex!(A,+,v,l,l) +end + +function assembleedge!(A,v,k,l,tid) + rawupdateindex!(A,+,v,k,k,tid) + rawupdateindex!(A,+,-v,k,l,tid) + rawupdateindex!(A,+,-v,l,k,tid) + rawupdateindex!(A,+,v,l,l,tid) +end + +""" +Assemble finite volume Laplacian + diagnonal term +on grid cell `i,j`. +Used by [`partassemble!`](@ref). +""" +function assemblecell!(A,lindexes,X,Y,i,j,d) + hx=X[i+1]-X[i] + hy=Y[j+1]-Y[j] + ij00=lindexes[i,j] + ij10=lindexes[i+1,j] + ij11=lindexes[i+1,j+1] + ij01=lindexes[i,j+1] + + assembleedge!(A,0.5*hx/hy,ij00,ij01) + assembleedge!(A,0.5*hx/hy,ij10,ij11) + assembleedge!(A,0.5*hy/hx,ij00,ij10) + assembleedge!(A,0.5*hy/hx,ij01,ij11) + v=0.25*hx*hy + rawupdateindex!(A,+,v*d,ij00,ij00) + rawupdateindex!(A,+,v*d,ij01,ij01) + rawupdateindex!(A,+,v*d,ij10,ij10) + rawupdateindex!(A,+,v*d,ij11,ij11) +end + +function assemblecell!(A,lindexes,X,Y,i,j,d,tid) + hx=X[i+1]-X[i] + hy=Y[j+1]-Y[j] + ij00=lindexes[i,j] + ij10=lindexes[i+1,j] + ij11=lindexes[i+1,j+1] + ij01=lindexes[i,j+1] + + assembleedge!(A,0.5*hx/hy,ij00,ij01,tid) + assembleedge!(A,0.5*hx/hy,ij10,ij11,tid) + assembleedge!(A,0.5*hy/hx,ij00,ij10,tid) + assembleedge!(A,0.5*hy/hx,ij01,ij11,tid) + v=0.25*hx*hy + rawupdateindex!(A,+,v*d,ij00,ij00,tid) + rawupdateindex!(A,+,v*d,ij01,ij01,tid) + rawupdateindex!(A,+,v*d,ij10,ij10,tid) + rawupdateindex!(A,+,v*d,ij11,ij11,tid) +end + +""" + +Assemble finite volume Laplacian + diagnonal term +on grid cells in partition described by ranges xp,yp. +Used by [`partassemble!`](@ref). +""" +function assemblepartition!(A,lindexes,X,Y,xp,yp,d) + for j in yp + for i in xp + assemblecell!(A,lindexes,X,Y,i,j,d) + end + end +end + +function assemblepartition!(A,lindexes,X,Y,xp,yp,d,tid) + for j in yp + for i in xp + assemblecell!(A,lindexes,X,Y,i,j,d,tid) + end + end +end + +""" + partassemble!(A,N,nt=1;xrange=(0,1),yrange=(0,1), d=0.1) + +Partitioned, cellwise, multithreaded assembly of finite difference matrix for +` -Δu + d*u=f` with homogeneous Neumann bc on grid set up by coordinate vectors +`X` and `Y` partitioned for work with `nt` threads +Does not work during structure setup. +""" +function partassemble!(A,X,Y,nt=1;d=0.1) + Nx=length(X) + Ny=length(Y) + size(A,1)==Nx*Ny || error("incompatible size of A") + size(A,2)==Nx*Ny || error("incompatible size of A") + + lindexes=LinearIndices((1:Nx,1:Ny)) + if nt==1 + assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d) + else + p=part2d(X,Y,nt) + for icol=1:length(p) + @tasks for (xp, yp) in p[icol] + assemblepartition!(A,lindexes,X,Y,xp,yp,d) + end + end + end + flush!(A) +end + + +function partassemble!(A::Union{MTExtendableSparseMatrixCSC},X,Y,nt=1;d=0.1, reset=true) + Nx=length(X) + Ny=length(Y) + + size(A,1)==Nx*Ny || error("incompatible size of A") + size(A,2)==Nx*Ny || error("incompatible size of A") + + lindexes=LinearIndices((1:Nx,1:Ny)) + if nt==1 + reset!(A,1) + assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1) + else + p,pc=colpart2d(X,Y,nt) + if reset + reset!(A,pc) + end + jp0=0 + for icol=1:length(p) + npc=length(p[icol]) + @tasks for ip=1:npc + (xp, yp)=p[icol][ip] + assemblepartition!(A,lindexes,X,Y,xp,yp,d,jp0+ip) + end + jp0+=npc + end + end + flush!(A) +end + + + + +""" +`test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)` + +Measure and output times for build and update for a rectangle grid with `n * n` cells. +Calculations are done on `nt` threads (`nt` >= 1). +Returns the assembled matrix. +""" +function test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10) + m = n + lindexes = LinearIndices((1:n,1:m)) + mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti) + if nt > 1 + A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false) + else + A = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m) + end + + X = collect(1:n) #LinRange(0,1,n) + Y = collect(1:n) #LinRange(0,1,m) + + #Build + times_build = zeros(k) + for i=1:k + ExtendableSparse.reset!(A) + times_build[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false) + end + + + + #update + times_update = zeros(k) + for i=1:k + times_update[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=true) + end + + @info "TIMES: MIN, AVG, MAX" + info_minmax(times_build, "build ") + info_minmax(times_update, "update") + + A +end + + +function test_correctness_build(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10]) + m = n + lindexes = LinearIndices((1:n,1:m)) + X = collect(1:n) #LinRange(0,1,n) + Y = collect(1:n) #LinRange(0,1,m) + + mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti) + + A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m) + assemble_ESMP(A0, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false) + + for nt in allnp + A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false) + assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false) + @assert A.cscmatrix≈A0.cscmatrix + end +end + + +function speedup_build(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10]) + m = n + lindexes = LinearIndices((1:n,1:m)) + X = collect(1:n) #LinRange(0,1,n) + Y = collect(1:n) #LinRange(0,1,m) + + mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti) + + A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m) + t0=@belapsed assemble_ESMP($A0, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) seconds=1 setup=(reset!($A0)) + + result=[] + + for nt in allnp + A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false) + t=@belapsed assemble_ESMP($A, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) setup=(ExtendableSparse.reset!($A)) seconds=1 + @assert A.cscmatrix≈A0.cscmatrix + push!(result,(nt,round(t0/t,digits=2))) + end + + result + +end + + +function speedup_update(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10]) + m = n + lindexes = LinearIndices((1:n,1:m)) + X = collect(1:n) #LinRange(0,1,n) + Y = collect(1:n) #LinRange(0,1,m) + + mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti) + + A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m) + assemble_ESMP(A0, n-1, m-1, mat_cell_node, X, Y) + t0=@belapsed assemble_ESMP($A0, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) seconds=1 setup=(nonzeros($A0.cscmatrix).=0) + + + + result=[] + + for nt in allnp + A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false) + assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false) + t=@belapsed assemble_ESMP($A, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) setup=(nonzeros($A.cscmatrix).=0) seconds=1 + @assert A.cscmatrix≈A0.cscmatrix + push!(result,(nt,round(t0/t,digits=2))) + end + + result + +end + + +""" +`generate_rectangle_grid(lindexes, Ti)` + +Generate a rectangle grid (i.e. a CellNodes matrix) based on LinerIndices +""" +function generate_rectangle_grid(lindexes, Ti) + n,m = size(lindexes) + nn = n*m # num nodes + nc = (n-1)*(m-1) + #lindexes=LinearIndices((1:n,1:m)) + + mat_cell_node = zeros(Ti, 4, nc) + + # links oben, rechts oben, rechts unten, links unten + cell_id = 1 + for ir in 1:n-1 + for jr in 1:m-1 + mat_cell_node[1,cell_id] = lindexes[ir,jr] + mat_cell_node[2,cell_id] = lindexes[ir,jr+1] + mat_cell_node[3,cell_id] = lindexes[ir+1,jr+1] + mat_cell_node[4,cell_id] = lindexes[ir+1,jr] + cell_id += 1 + end + end + + + mat_cell_node, nc, nn + +end + +function info_minmax(x, name; digits=3) + n = length(x) + @info name*" $(round(minimum(x),digits=digits)), $(round(sum(x)/n,digits=digits)), $(round(maximum(x),digits=digits))" +end + +""" +Assembly functions for ExtendableSparseMatrixParallel +""" +function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell_node, X, Y; d=0.1, set_CSC_zero=true) where {Tv, Ti <: Integer} + if set_CSC_zero + A.cscmatrix.nzval .= 0 + end + + for level=1:A.depth + @tasks for tid=1:A.nt + for cell in A.cellsforpart[(level-1)*A.nt+tid] + assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, tid) + end + end + end + + for cell in A.cellsforpart[A.depth*A.nt+1] + assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, 1) + end + nnzCSC, nnzLNK = nnz_noflush(A) + if nnzCSC > 0 && nnzLNK > 0 + flush!(A; do_dense=false) + #sparse flush + elseif nnzCSC == 0 && nnzLNK > 0 + flush!(A; do_dense=true) + #dense flush + end +end + + +function assembleedge!(A::ExtendableSparseMatrixParallel{Tv, Ti},v,k,l,tid) where {Tv, Ti <: Integer} + addtoentry!(A, k, k, tid, +v) + addtoentry!(A, k, l, tid, -v) + addtoentry!(A, l, k, tid, -v) + addtoentry!(A, l, l, tid, +v) +end + +function assemblecell!(A::ExtendableSparseMatrixParallel{Tv, Ti},n,m,mcn,X,Y,d,cell,tid) where {Tv, Ti <: Integer} + ij00=mcn[1,cell] + ij10=mcn[2,cell] + ij11=mcn[3,cell] + ij01=mcn[4,cell] + + ix = (cell-1)%n+1 + iy = Int64(ceil(cell/n)) + + hx=X[ix+1]-X[ix] + hy=Y[iy+1]-Y[iy] + + assembleedge!(A,0.5*hx/hy,ij00,ij01,tid) + assembleedge!(A,0.5*hx/hy,ij10,ij11,tid) + assembleedge!(A,0.5*hy/hx,ij00,ij10,tid) + assembleedge!(A,0.5*hy/hx,ij01,ij11,tid) + v=0.25*hx*hy + addtoentry!(A, ij00, ij00, tid, v*d) + addtoentry!(A, ij01, ij01, tid, v*d) + addtoentry!(A, ij10, ij10, tid, v*d) + addtoentry!(A, ij11, ij11, tid, v*d) +end + + + +""" +Assembly functions for ExtendableSparseMatrix +""" +function assemble_ESMP(A::ExtendableSparseMatrix{Tv, Ti}, n, m, mat_cell_node, X, Y; d=0.1, set_CSC_zero=true) where {Tv, Ti <: Integer} + if set_CSC_zero + A.cscmatrix.nzval .= 0 + end + nc = size(mat_cell_node,2) + for cell=1:nc + assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell) + end + ExtendableSparse.flush!(A) +end + +function assembleedge!(A::ExtendableSparseMatrix{Tv, Ti},v,k,l) where {Tv, Ti <: Integer} + A[k,k]+=v + A[k,l]-=v + A[l,k]-=v + A[l,l]+=v +end + +function assemblecell!(A::ExtendableSparseMatrix{Tv, Ti},n,m,mcn,X,Y,d,cell) where {Tv, Ti <: Integer} + ij00=mcn[1,cell] + ij10=mcn[2,cell] + ij11=mcn[3,cell] + ij01=mcn[4,cell] + + ix = (cell-1)%n+1 + iy = Int64(ceil(cell/n)) + + hx=X[ix+1]-X[ix] + hy=Y[iy+1]-Y[iy] + + assembleedge!(A,0.5*hx/hy,ij00,ij01) + assembleedge!(A,0.5*hx/hy,ij10,ij11) + assembleedge!(A,0.5*hy/hx,ij00,ij10) + assembleedge!(A,0.5*hy/hx,ij01,ij11) + v=0.25*hx*hy + A[ij00,ij00]+=v*d + A[ij01,ij01]+=v*d + A[ij10,ij10]+=v*d + A[ij11,ij11]+=v*d +end +end diff --git a/test/Project.toml b/test/Project.toml index e195dd2..76e3ae8 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,19 +1,27 @@ [deps] AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c" +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e" +ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7" +ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f" IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895" IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" +Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" MultiFloats = "bdf0d083-296b-4888-a5b6-7498122e68a5" +OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] +ExtendableGrids = "1.9" IterativeSolvers = "0.9" diff --git a/test/femtools.jl b/test/femtools.jl new file mode 100644 index 0000000..8c7e652 --- /dev/null +++ b/test/femtools.jl @@ -0,0 +1,110 @@ +using OhMyThreads: @tasks +using RecursiveFactorization + +function testgrid(N; dim=3) + X = range(0, 1; length=N^(1.0 / dim) |> ceil |> Int) + simplexgrid((X for i = 1:dim)...) +end + +function coordmatrix!(C, coord, cellnodes, k) + spacedim=size(coord,1) + celldim=size(cellnodes,1) + @inbounds for jj = 1:celldim + C[1, jj] = 1 + @inbounds for ii = 1:spacedim + C[ii + 1, jj] = coord[ii, cellnodes[jj, k]] + end + end +end + +function gradient!(G, C, factdim, I, ipiv) + clu = RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false)) + ldiv!(G, clu, I) + abs(det(clu)) / factdim +end + +function scalpro(G, dim, jl, il) + s = 0.0 + @inbounds @simd for k = 1:dim + s += G[jl, k + 1] * G[il, k + 1] + end + return s +end + +function stiffness!(S, dim, G) + @inbounds for il = 1:(dim + 1) + S[il, il] = scalpro(G, dim, il, il) + @inbounds for jl = (il + 1):(dim + 1) + S[il, jl] = scalpro(G, dim, jl, il) + S[jl, il] = S[il, jl] + end + end + return S +end + +function testassemble!(A_h, grid) + coord = grid[Coordinates] + cellnodes = grid[CellNodes] + ncells = num_cells(grid) + dim = size(coord, 1) + lnodes = dim + 1 + factdim::Float64 = factorial(dim) + S = zeros(lnodes, lnodes) # local stiffness matrix + C = zeros(lnodes, lnodes) # local coordinate matrix + G = zeros(lnodes, lnodes) # shape function gradients + ipiv = zeros(Int, lnodes) + I = Matrix(Diagonal(ones(lnodes))) + ncells = size(cellnodes, 2) + for icell = 1:ncells + coordmatrix!(C, coord, cellnodes, icell) + vol = gradient!(G, C, factdim, I, ipiv) + stiffness!(S, dim, G) + for il = 1:lnodes + i = cellnodes[il, icell] + rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i) + for jl = 1:lnodes + j = cellnodes[jl, icell] + rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j) + end + end + end + flush!(A_h) +end + +function testassemble_parallel!(A_h, grid) + coord = grid[Coordinates] + cellnodes = grid[CellNodes] + ncells = num_cells(grid) + dim = size(coord, 1) + lnodes = dim + 1 + npart = num_partitions(grid) + factdim::Float64 = factorial(dim) + SS = [zeros(lnodes, lnodes) for i = 1:npart] # local stiffness matrix + CC = [zeros(lnodes, lnodes) for i = 1:npart] # local coordinate matrix + GG = [zeros(lnodes, lnodes) for i = 1:npart] # shape function gradients + IP = [zeros(Int, lnodes) for i = 1:npart] # shape function gradients + I = Matrix(Diagonal(ones(lnodes))) + ncells = size(cellnodes, 2) + for color in pcolors(grid) + @tasks for part in pcolor_partitions(grid, color) + C = CC[part] + S = SS[part] + G = GG[part] + ipiv = IP[part] + for icell in partition_cells(grid, part) + coordmatrix!(C, coord, cellnodes, icell) + vol = gradient!(G, C, factdim, I, ipiv) + stiffness!(S, dim, G) + for il = 1:lnodes + i = cellnodes[il, icell] + rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i, part) + for jl = 1:lnodes + j = cellnodes[jl, icell] + rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j, part) + end + end + end + end + end + flush!(A_h) +end diff --git a/test/runtests.jl b/test/runtests.jl index 4d7ae07..b154d1d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,11 +2,46 @@ using Test using LinearAlgebra using SparseArrays using ExtendableSparse +using ExtendableSparse.Experimental using Printf using BenchmarkTools using MultiFloats using ForwardDiff +using ExplicitImports + +@testset "ExplicitImports" begin + @test ExplicitImports.check_no_implicit_imports(ExtendableSparse, allow_unanalyzable=(ExtendableSparse.Experimental,)) === nothing + @test ExplicitImports.check_no_stale_explicit_imports(ExtendableSparse, allow_unanalyzable=(ExtendableSparse.Experimental,)) === nothing +end + +@testset "Parallel" begin + include("test_parallel.jl") + + for Tm in [STExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC, ExtendableSparseMatrix] + for N in [10000,20000] + test_parallel.test_correctness_build_seq(N,Tm, dim=2) + end + end + + for Tm in [MTExtendableSparseMatrixCSC] + for N in [10000,20000] + test_parallel.test_correctness_update(N,Tm, dim=2) + test_parallel.test_correctness_build(N,Tm, dim=2) + test_parallel.test_correctness_mul(N,Tm,dim=2) + end + end +end + +@testset "ExperimentalParallel" begin + include("ExperimentalParallel.jl") + for d=[1,2,3] + for N in [10,rand(30:40),50] + ExperimentalParallel.test_correctness_build(N,d) + end + end +end + @testset "Constructors" begin include("test_constructors.jl") end diff --git a/test/test_parallel.jl b/test/test_parallel.jl new file mode 100644 index 0000000..de26229 --- /dev/null +++ b/test/test_parallel.jl @@ -0,0 +1,263 @@ +module test_parallel + +using ExtendableSparse, SparseArrays, ExtendableSparse.Experimental +using BenchmarkTools +using ExtendableGrids +#using MKLSparse +#using SparseMatricesCSR +using Test + +using ExtendableSparse, ExtendableGrids, Metis +using LinearAlgebra +using BenchmarkTools +using Test + +include("femtools.jl") + +function test_correctness_build_seq(N, Tm::Type{<:AbstractSparseMatrix}; dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + A = Tm{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + testassemble!(A, grid) + @test sparse(A0) ≈ sparse(A) +end + +function speedup_build_seq(N, Tm::Type{<:AbstractSparseMatrix}; dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + A = Tm{Float64,Int}(nnodes, nnodes) + tbase = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (reset!($A0)) + tx = @belapsed testassemble!($A, $grid) seconds = 1 setup = (reset!($A)) + tbase / tx +end + +function test_correctness_update(N, + Tm::Type{<:AbstractSparseMatrix}; + Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning, + allnp=[10, 15, 20], + dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) + A = Tm{Float64,Int}(nnodes, nnodes, 1) + + # Assembele without partitioning + # this gives the "base truth" to compare with + testassemble_parallel!(A, grid) + + # Save the nonzeros + nz = sort(copy(nonzeros(A))) + for np in allnp + # Reset the nonzeros, keeping the structure intact + nonzeros(A) .= 0 + # Parallel assembly whith np threads + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) + reset!(A, np) + @show num_partitions_per_color(pgrid) + testassemble_parallel!(A, pgrid) + @test sort(nonzeros(A)) ≈ nz + end +end + +""" + test_correctness_build(N) + +Test correctness of parallel assembly on NxN grid during +build phase, assuming that no structure has been assembled. +""" +function test_correctness_build(N, + Tm::Type{<:AbstractSparseMatrix}; + Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning, + allnp=[10, 15, 20], + dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) + # Get the "ground truth" + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + nz = sort(copy(nonzeros(A0))) + for np in allnp + # Make a new matrix and assemble parallel. + # this should result in the same nonzeros + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) + A = Tm(nnodes, nnodes, num_partitions(pgrid)) + @show num_partitions_per_color(pgrid) + @test check_partitioning(pgrid) + testassemble_parallel!(A, pgrid) + @test sort(nonzeros(A)) ≈ nz + end +end + +function test_correctness_mul(N, + Tm::Type{<:AbstractSparseMatrix}; + Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning, + allnp=[10, 15, 20], + dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) + # Get the "ground truth" + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + b = rand(nnodes) + A0b = A0 * b + for np in allnp + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) + @test check_partitioning(pgrid) + A = Tm(nnodes, nnodes, num_partitions(pgrid)) + partitioning!(A, pgrid[PColorPartitions], + pgrid[PartitionNodes]) + testassemble_parallel!(A, pgrid) + invp = invperm(pgrid[NodePermutation]) + diff = norm(A0b[invp] - A * b[invp], Inf) + @show diff + @test diff < sqrt(eps()) + end +end + +function speedup_update(N, + Tm::Type{<:AbstractSparseMatrix}; + Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning, + allnp=[10, 15, 20], + dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) + # Get the "ground truth" + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + nz = copy(nonzeros(A0)) |> sort + # Get the base timing + # During setup, set matrix entries to zero while keeping the structure + t0 = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (nonzeros($A0) .= 0) + result = [] + A = Tm(nnodes, nnodes, 1) + for np in allnp + # Get the parallel timing + # During setup, set matrix entries to zero while keeping the structure + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) + @show num_partitions_per_color(pgrid) + reset!(A, num_partitions(pgrid)) + testassemble_parallel!(A, pgrid) + t = @belapsed testassemble_parallel!($A, $pgrid) seconds = 1 setup = (nonzeros($A) .= 0) + @assert sort(nonzeros(A)) ≈ nz + push!(result, (np, round(t0 / t; digits=2))) + end + result +end + +function speedup_build(N, + Tm::Type{<:AbstractSparseMatrix}; + Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning, + allnp=[10, 15, 20], + dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) + # Get the "ground truth" + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + nz = nonzeros(A0) + reset!(A0) + testassemble!(A0, grid) + @assert nonzeros(A0) ≈ (nz) + nz = sort(nz) + + # Get the base timing + # During setup, reset matrix to empty state. + t0 = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (reset!($A0)) + + result = [] + A = Tm(nnodes, nnodes, 1) + for np in allnp + # Get the parallel timing + # During setup, reset matrix to empty state. + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) + reset!(A, num_partitions(pgrid)) + @show num_partitions_per_color(pgrid) + t = @belapsed testassemble_parallel!($A, $pgrid) seconds = 1 setup = (reset!($A, + num_partitions($pgrid))) + @assert sort(nonzeros(A)) ≈ nz + push!(result, (np, round(t0 / t; digits=2))) + end + result +end + +function speedup_mul(N, + Tm::Type{<:AbstractSparseMatrix}; + Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning, + allnp=[10, 15, 20], + dim=3) + grid = testgrid(N; dim) + nnodes = num_nodes(grid) + # Get the "ground truth" + A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes) + testassemble!(A0, grid) + b = rand(nnodes) + t0 = @belapsed $A0 * $b seconds = 1 + A0b = A0 * b + result = [] + A = Tm(nnodes, nnodes, 1) + for np in allnp + pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true) + @show num_partitions_per_color(pgrid) + reset!(A, num_partitions(pgrid)) + testassemble_parallel!(A, pgrid) + flush!(A) + partitioning!(A, pgrid[PColorPartitions], + pgrid[PartitionNodes]) + t = @belapsed $A * $b seconds = 1 + invp = invperm(pgrid[NodePermutation]) + @assert A0b[invp] ≈ A * b[invp] + push!(result, (np, round(t0 / t; digits=2))) + end + result +end + +#= +function mymul(A::SparseMatrixCSR,v::AbstractVector) + y=copy(v) + A.n == size(v, 1) || throw(DimensionMismatch()) + A.m == size(y, 1) || throw(DimensionMismatch()) + @tasks for row = 1:size(y, 1) + y[row]=0.0 + @inbounds for nz in nzrange(A,row) + col = A.colval[nz] + y[row] += A.nzval[nz]*v[col] + end + end + return y +end + +function speedup_csrmul(N; dim=3) + grid=testgrid(N;dim) + nnodes=num_nodes(grid) + # Get the "ground truth" + A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes) + t00=@belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0)) + + reset!(A0) + testassemble!(A0,grid) + b=rand(nnodes) + t0=@belapsed $A0*$b seconds=1 + A0b=A0*b + + t0x=@belapsed A0x=sparse(transpose(sparse($A0))) + + A0x=sparse(transpose(sparse(A0))) + + tx=@belapsed A=SparseMatrixCSR{1}(transpose($A0x)) + + A=SparseMatrixCSR{1}(transpose(sparse(A0x))) + t1=@belapsed $A*$b seconds=1 + + t2=@belapsed mymul($A, $b) seconds=1 + + @info t00,t0,t0x, tx,t1, t2 + + @assert A0b≈A*b + t0/t1 +end + +=# + +end