From ac3c30c26d7937be7589a240113962a0b95345cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Fri, 10 May 2024 13:45:09 +0200
Subject: [PATCH 01/44] Test code for parallel assembly

---
 test/parallel_testtools.jl | 137 ++++++++++++++++++++++++++++++++++++
 test/test_parallel.jl      | 138 +++++++++++++++++++++++++++++++++++++
 2 files changed, 275 insertions(+)
 create mode 100644 test/parallel_testtools.jl
 create mode 100644 test/test_parallel.jl

diff --git a/test/parallel_testtools.jl b/test/parallel_testtools.jl
new file mode 100644
index 0000000..2a92521
--- /dev/null
+++ b/test/parallel_testtools.jl
@@ -0,0 +1,137 @@
+using ChunkSplitters
+# Methods to test parallel assembly
+# Will eventually become part of the package.
+
+"""
+    $(SIGNATURES)
+
+Return colored partitioing of grid made up by `X` and `Y`  for work with `max(nt,4)` threads
+as a vector `p` of a vector pairs of index ranges such that `p[i]` containes partions
+of color i which can be assembled independently.
+
+The current algorithm 
+"""
+function part2d(X,Y, nt)
+    nt=max(4,nt)
+    XP=collect(chunks(1:length(X)-1,n=nt))
+    YP=collect(chunks(1:length(Y)-1,n=nt))
+    partitions = [Tuple{StepRange{Int64}, StepRange{Int64}}[] for i = 1:nt]
+    ipart=1
+    col=1
+    for jp=1:nt
+        for ip=1:nt
+            push!(partitions[col], (XP[ip], YP[jp]))
+            col=(col -1 +1 )%nt+1
+        end
+        col=(col -1 +2)%nt+1
+    end
+    partitions
+end
+
+
+function showgrid(Makie, ColorSchemes, X,Y,nt)
+    f = Makie.Figure()
+    ax = Makie.Axis(f[1, 1]; aspect = 1)
+    p=part2d(X,Y,nt)
+    ncol=length(p)
+    @show sum(length,p), ncol
+    colors=get(ColorSchemes.rainbow,collect(1:ncol)/ncol)
+    poly=Vector{Makie.Point2f}(undef,4)
+    for icol = 1:ncol
+        for (xp, yp) in p[icol]
+            for j in yp
+                for i in xp
+                    poly[1]=Makie.Point2f(X[i], Y[j])
+                    poly[2]=Makie.Point2f(X[i + 1], Y[j])
+                    poly[3]=Makie.Point2f(X[i + 1], Y[j + 1])
+                    poly[4]=Makie.Point2f(X[i], Y[j + 1])
+                    Makie.poly!(copy(poly),color = colors[icol])
+                end
+            end
+        end
+    end
+    f
+end
+
+
+"""
+    $(SIGNATURES)
+
+Assemble edge for finite volume laplacian.
+Used by [`partassemble!`](@ref).
+"""
+function assembleedge!(A,v,k,l)
+    A[k,k]+=v
+    A[k,l]-=v
+    A[l,k]-=v
+    A[l,l]+=v
+end
+
+"""
+    $(SIGNATURES)
+
+Assemble finite volume Laplacian + diagnonal term
+on grid cell `i,j`.
+Used by [`partassemble!`](@ref).
+"""
+function assemblecell!(A,lindexes,X,Y,i,j,d)
+    hx=X[i+1]-X[i]
+    hy=Y[j+1]-Y[j]
+    ij00=lindexes[i,j]
+    ij10=lindexes[i+1,j]
+    ij11=lindexes[i+1,j+1]
+    ij01=lindexes[i,j+1]
+    
+    assembleedge!(A,0.5*hx/hy,ij00,ij01)
+    assembleedge!(A,0.5*hx/hy,ij10,ij11)
+    assembleedge!(A,0.5*hy/hx,ij00,ij10)
+    assembleedge!(A,0.5*hy/hx,ij01,ij11)
+    v=0.25*hx*hy
+    A[ij00,ij00]+=v*d
+    A[ij01,ij01]+=v*d
+    A[ij10,ij10]+=v*d
+    A[ij11,ij11]+=v*d
+end
+
+"""
+    $(SIGNATURES)
+
+Assemble finite volume Laplacian + diagnonal term
+on grid cells in partition described by ranges xp,yp.
+Used by [`partassemble!`](@ref).
+"""
+function assemblepartition!(A,lindexes,X,Y,xp,yp,d)
+    for j in yp
+	for i in xp
+	    assemblecell!(A,lindexes,X,Y,i,j,d)
+	end
+    end
+end
+
+"""
+    partassemble!(A,N,np=1;xrange=(0,1),yrange=(0,1), d=0.1)
+
+Partitioned, cellwise, multithreaded assembly of finite difference matrix for
+` -Δu + d*u=f` with homogeneous Neumann bc on grid  set up by coordinate vectors
+`X` and `Y` partitioned for work with `nt` threads
+Does not work during structure setup.
+"""
+function partassemble!(A,X,Y,nt=1;d=0.1)
+    Nx=length(X)
+    Ny=length(Y)
+    size(A,1)==Nx*Ny || error("incompatible size of A")
+    size(A,2)==Nx*Ny || error("incompatible size of A")
+
+    lindexes=LinearIndices((1:Nx,1:Ny))
+    if nt==1
+	assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d)
+    else
+        p=part2d(X,Y,nt)
+        for icol=1:length(p)
+	    Threads.@threads for (xp, yp) in p[icol]
+	        assemblepartition!(A,lindexes,X,Y,xp,yp,d)
+	    end
+        end
+    end
+    flush!(A)
+end
diff --git a/test/test_parallel.jl b/test/test_parallel.jl
new file mode 100644
index 0000000..1fe3f1d
--- /dev/null
+++ b/test/test_parallel.jl
@@ -0,0 +1,138 @@
+using ExtendableSparse,SparseArrays
+using DocStringExtensions
+using BenchmarkTools
+using Test
+
+include("parallel_testtools.jl")
+
+"""
+    test_correctness_update(N)
+
+Test correctness of parallel assembly on NxN grid  during 
+update phase, assuming that the structure has been assembled.
+"""
+function test_correctness_update(N)
+    X=1:N
+    Y=1:N
+    A=ExtendableSparseMatrix(N^2,N^2)
+    allnp=[4,5,6,7,8]
+
+    # Assembele without partitioning
+    # this gives the "base truth" to compare with
+    partassemble!(A,X,Y)
+
+    # Save the nonzeros 
+    nz=copy(nonzeros(A))
+    for np in allnp
+        # Reset the nonzeros, keeping the structure intact
+        nonzeros(A).=0
+        # Parallel assembly whith np threads
+        partassemble!(A,X,Y, np)
+        @test nonzeros(A)≈nz
+    end
+end
+
+"""
+    test_correctness_build(N)
+
+Test correctness of parallel assembly on NxN grid  during 
+build phase, assuming that no structure has been assembled.
+"""
+function test_correctness_build(N)
+    X=1:N
+    Y=1:N
+    allnp=[4,5,6,7,8]
+    # Get the "ground truth"
+    A=ExtendableSparseMatrix(N^2,N^2)
+    partassemble!(A,X,Y)
+    nz=copy(nonzeros(A))
+    for np in allnp
+        # Make a new matrix and assemble parallel.
+        # this should result in the same nonzeros
+        A=ExtendableSparseMatrix(N^2,N^2)
+        partassemble!(A,X,Y, np)
+        @test nonzeros(A)≈nz
+    end
+end
+
+
+@testset "update correctness" begin
+    test_correctness_update(50)
+    test_correctness_update(100)
+    test_correctness_update(rand(30:200))
+end
+
+@testset "build correctness" begin
+    test_correctness_build(50)
+    test_correctness_build(100)
+    test_correctness_build(rand(30:200))
+end
+
+"""
+    speedup_update(N)
+
+Benchmark parallel speedup of update phase of parallel assembly on NxN grid.
+Check for correctness as well.
+"""
+function speedup_update(N; allnp=[4,5,6,7,8,9,10])
+    X=1:N
+    Y=1:N
+    A=ExtendableSparseMatrix(N^2,N^2)
+    partassemble!(A,X,Y)
+    nz=copy(nonzeros(A))
+    # Get the base timing
+    # During setup, set matrix entries to zero while keeping  the structure 
+    t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(nonzeros($A).=0)
+    result=[]
+    for np in allnp
+        # Get the parallel timing
+        # During setup, set matrix entries to zero while keeping  the structure 
+        t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(nonzeros($A).=0)
+        @assert nonzeros(A)≈nz
+        push!(result,(np,round(t0/t,digits=2)))
+    end
+    result
+end
+
+"""
+    reset!(A)
+
+Reset ExtenableSparseMatrix into state similar to that after creation.
+"""
+function reset!(A)
+    A.cscmatrix=spzeros(size(A)...)
+    A.lnkmatrix=nothing
+end
+
+"""
+    speedup_build(N)
+
+Benchmark parallel speedup of structure build phase of parallel assembly on NxN grid.
+Check for correctness as well.
+
+Works in the moment with locking.
+"""
+function speedup_build(N; allnp=[4,5,6,7,8,9,10])
+    X=1:N
+    Y=1:N
+    A=ExtendableSparseMatrix(N^2,N^2)
+    partassemble!(A,X,Y)
+    nz=copy(nonzeros(A))
+    reset!(A)
+    partassemble!(A,X,Y)
+    @assert nonzeros(A)≈(nz)
+    
+    # Get the base timing
+    # During setup, reset matrix to empty state.
+    t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A))
+    
+    result=[]
+    for np in allnp
+        # Get the parallel timing
+        # During setup, reset matrix to empty state.
+        t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(reset!($A))
+        @assert nonzeros(A)≈nz
+        push!(result,(np,round(t0/t,digits=2)))
+    end
+    result
+end

From 31501b2da02cdba2ae63e4ef3e3096877a4fa363 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Fri, 10 May 2024 13:45:25 +0200
Subject: [PATCH 02/44] Allow for parallel assembly via locking

---
 src/matrix/extendable.jl | 51 +++++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl
index cba26b3..37cc015 100644
--- a/src/matrix/extendable.jl
+++ b/src/matrix/extendable.jl
@@ -18,6 +18,8 @@ mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractSparseMatrix
     """
     lnkmatrix::Union{SparseMatrixLNK{Tv, Ti}, Nothing}
 
+    lock::Base.ReentrantLock
+    
     """
     Pattern hash
     """
@@ -36,7 +38,7 @@ Create empty ExtendableSparseMatrix. This is equivalent to `spzeros(m,n)` for
 """
 
 function ExtendableSparseMatrix{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
-    ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0)
+    ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing,Base.ReentrantLock(), 0)
 end
 
 function ExtendableSparseMatrix(valuetype::Type{Tv},
@@ -59,7 +61,7 @@ $(SIGNATURES)
 """
 
 function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
-    return ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc))
+    return ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc))
 end
 
 """
@@ -169,10 +171,15 @@ function updateindex!(ext::ExtendableSparseMatrix{Tv, Ti},
     if k > 0
         ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
     else
-        if ext.lnkmatrix == nothing
-            ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
+        lock(ext.lock)
+        try
+            if ext.lnkmatrix == nothing
+                ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
+            end
+            updateindex!(ext.lnkmatrix, op, v, i, j)
+        finally
+            unlock(ext.lock)
         end
-        updateindex!(ext.lnkmatrix, op, v, i, j)
     end
     ext
 end
@@ -191,10 +198,15 @@ function rawupdateindex!(ext::ExtendableSparseMatrix{Tv, Ti},
     if k > 0
         ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
     else
-        if ext.lnkmatrix == nothing
-            ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
+        lock(ext.lock)
+        try
+            if ext.lnkmatrix == nothing
+                ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
+            end
+            rawupdateindex!(ext.lnkmatrix, op, v, i, j)
+        finally
+            unlock(ext.lock)
         end
-        rawupdateindex!(ext.lnkmatrix, op, v, i, j)
     end
     ext
 end
@@ -213,10 +225,15 @@ function Base.setindex!(ext::ExtendableSparseMatrix{Tv, Ti},
     if k > 0
         ext.cscmatrix.nzval[k] = v
     else
-        if ext.lnkmatrix == nothing
-            ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
+        lock(ext.lock)
+        try
+            if ext.lnkmatrix == nothing
+                ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
+            end
+            ext.lnkmatrix[i, j] = v
+        finally
+            unlock(ext.lock)
         end
-        ext.lnkmatrix[i, j] = v
     end
 end
 
@@ -235,7 +252,13 @@ function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti},
     elseif ext.lnkmatrix == nothing
         return zero(Tv)
     else
-        return ext.lnkmatrix[i, j]
+        v=zero(Tv)
+        lock(ext.lock)
+        try
+            v=ext.lnkmatrix[i, j]
+        finally
+            unlock(ext.lock)
+        end
     end
 end
 
@@ -557,9 +580,9 @@ $(SIGNATURES)
 """
 function Base.copy(S::ExtendableSparseMatrix)
     if isnothing(S.lnkmatrix)
-        ExtendableSparseMatrix(copy(S.cscmatrix), nothing, S.phash)
+        ExtendableSparseMatrix(copy(S.cscmatrix), nothing,  Base.ReentrantLock(),S.phash)
     else
-        ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash)
+        ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), Base.ReentrantLock(), S.phash)
     end
 end
 

From b2c141e4dae1f2e0dc315dd484da05d718522818 Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Tue, 20 Feb 2024 16:54:29 +0100
Subject: [PATCH 03/44] t2

---
 src/ExtendableSparse.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 700bbda..36dca95 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -16,6 +16,7 @@ if USE_GPL_LIBS
     using SuiteSparse
 end
 
+@info "test2"
 
 using DocStringExtensions
 

From 751cf6cf2828a740d48449d56023428113609a68 Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Tue, 20 Feb 2024 17:48:30 +0100
Subject: [PATCH 04/44] add ExtendableSparseParallel

---
 Project.toml                                  |   2 +
 src/ExtendableSparse.jl                       |  13 +-
 .../ExtendableSparseParallel.jl               | 258 ++++++
 .../preparatory.jl                            | 427 ++++++++++
 .../struct_flush.jl                           | 263 ++++++
 .../supersparse.jl                            | 788 ++++++++++++++++++
 6 files changed, 1749 insertions(+), 2 deletions(-)
 create mode 100644 src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
 create mode 100644 src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
 create mode 100644 src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
 create mode 100644 src/matrix/ExtendableSparseMatrixParallel/supersparse.jl

diff --git a/Project.toml b/Project.toml
index 89ce6e1..46d6e61 100644
--- a/Project.toml
+++ b/Project.toml
@@ -15,6 +15,8 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
+ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8"
 
 [weakdeps]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 36dca95..372df82 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -4,6 +4,10 @@ using LinearAlgebra
 using Sparspak
 using ILUZero
 
+using Metis
+using Base.Threads
+using ExtendableGrids
+
 if  !isdefined(Base, :get_extension)
     using Requires
 end
@@ -16,8 +20,6 @@ if USE_GPL_LIBS
     using SuiteSparse
 end
 
-@info "test2"
-
 using DocStringExtensions
 
 import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros
@@ -31,6 +33,13 @@ export SparseMatrixLNK,
 
 export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet
 
+
+include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl")
+
+export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK
+export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light
+
+
 include("factorizations/factorizations.jl")
 
 export JacobiPreconditioner,
diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
new file mode 100644
index 0000000..68dace8
--- /dev/null
+++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
@@ -0,0 +1,258 @@
+include("supersparse.jl")
+include("preparatory.jl")
+#include("prep_time.jl")
+
+mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti}
+    """
+    Final matrix data
+    """
+    cscmatrix::SparseMatrixCSC{Tv, Ti}
+
+    """
+    Linked list structure holding data of extension
+    """
+    lnkmatrices::Vector{SuperSparseMatrixLNK{Tv, Ti}}
+
+	grid::ExtendableGrid
+
+	nnts::Vector{Ti}
+    
+    sortednodesperthread::Matrix{Ti}
+    
+    old_noderegions::Matrix{Ti}
+    
+    cellsforpart::Vector{Vector{Ti}}
+    
+    globalindices::Vector{Vector{Ti}}
+    
+    new_indices::Vector{Ti}
+    
+    rev_new_indices::Vector{Ti}
+    
+    start::Vector{Ti}
+    
+    cellparts::Vector{Ti}
+    
+    nt::Ti
+    
+    depth::Ti
+    
+    
+end
+
+
+
+function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) where {Tv, Ti <: Integer}
+	grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1)
+	csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid))
+	lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt]
+	ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth)
+end
+
+
+
+function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}
+	if known_that_unknown
+		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
+		return
+	end
+	
+	if updatentryCSC2!(A.cscmatrix, i, j, v)
+	else
+		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
+	end
+end
+
+
+#=
+function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer}
+	if known_that_unknown
+		level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
+		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
+		return
+	end
+	
+	if updatentryCSC2!(A.cscmatrix, i, j, v)
+	else
+		level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
+		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
+	end
+end
+=#
+
+
+"""
+`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=true) where {Tv, Ti <: Integer}`
+
+A[i,j] += v, using any partition.
+If the partition should be specified (for parallel use), use 
+`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=true) where {Tv, Ti <: Integer}`.
+"""
+function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer}
+	if known_that_unknown
+		level, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]])
+		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
+		return
+	end
+	
+	if updatentryCSC2!(A.cscmatrix, i, j, v)
+	else
+		level, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]])
+		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
+	end
+end
+
+#---------------------------------
+
+
+function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                      op,
+                      v,
+                      i,
+                      j) where {Tv, Ti <: Integer}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+        return
+    else
+    	level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
+		updateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j])
+    end
+    ext
+end
+
+function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                      op,
+                      v,
+                      i,
+                      j,
+                      tid) where {Tv, Ti <: Integer}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+        return
+    else
+    	updateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j])
+    end
+    ext
+end
+
+function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                         op,
+                         v,
+                         i,
+                         j) where {Tv, Ti <: Integer}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
+	    rawupdateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j])
+    end
+    ext
+end
+
+function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                         op,
+                         v,
+                         i,
+                         j, 
+                         tid) where {Tv, Ti <: Integer}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        rawupdateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j])
+    end
+    ext
+end
+
+function Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                       i::Integer,
+                       j::Integer) where {Tv, Ti <: Integer}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        return ext.cscmatrix.nzval[k]
+    end
+    
+    level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
+	ext.lnkmatrices[tid][i, ext.sortednodesperthread[tid, j]]
+    
+end
+
+function Base.setindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                        v::Union{Number,AbstractVecOrMat},
+                        i::Integer,
+                        j::Integer) where {Tv, Ti}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = v
+    else
+		level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
+		#@info typeof(tid), typeof(j)
+		jj = ext.sortednodesperthread[tid, j]
+		ext.lnkmatrices[tid][i, jj] = v
+    end
+end
+
+
+
+#------------------------------------
+
+function reset!(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer}
+	A.cscmatrix = spzeros(Tv, Ti, num_nodes(A.grid), num_nodes(A.grid))
+	A.lnkmatrices = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt]
+end
+
+function nnz_flush(ext::ExtendableSparseMatrixParallel)
+    flush!(ext)
+    return nnz(ext.cscmatrix)
+end
+
+function nnz_noflush(ext::ExtendableSparseMatrixParallel)
+    return nnz(ext.cscmatrix), sum([ext.lnkmatrices[i].nnz for i=1:ext.nt])
+end
+	
+function matrixindextype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer}
+	Ti
+end
+
+function matrixvaluetype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer}
+	Tv
+end
+
+
+
+function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrixParallel)
+    #flush!(ext)
+    xnnzCSC, xnnzLNK = nnz_noflush(ext)
+    m, n = size(ext)
+    print(io,
+          m,
+          "×",
+          n,
+          " ",
+          typeof(ext),
+          " with ",
+          xnnzCSC,
+          " stored ",
+          xnnzCSC == 1 ? "entry" : "entries",
+          " in CSC and ",
+          xnnzLNK,
+          " stored ",
+          xnnzLNK == 1 ? "entry" : "entries",
+          " in LNK. CSC:")
+
+    if !haskey(io, :compact)
+        io = IOContext(io, :compact => true)
+    end
+
+    if !(m == 0 || n == 0 || xnnzCSC == 0)
+        print(io, ":\n")
+        Base.print_array(IOContext(io), ext.cscmatrix)
+    end
+end
+
+Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n)
+
+include("struct_flush.jl")
diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
new file mode 100644
index 0000000..e14a066
--- /dev/null
+++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
@@ -0,0 +1,427 @@
+"""
+`function preparatory_multi_ps_less_reverse(nm, nt, depth)`
+
+`nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid).
+`nt` is the number of threads.
+`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices...
+To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here.
+"""
+function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0)
+	grid = getgrid(nm; x0, x1)
+	
+	if sequential
+		(allcells, start, cellparts) = grid_to_graph_ps_multi!(grid, nt, depth)#)
+	else
+		(allcells, start, cellparts) = grid_to_graph_ps_multi_par!(grid, nt, depth)
+	end
+
+	(nnts, s, onr, gi, gc, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(
+		cellparts, allcells, start, num_nodes(grid), Ti, nt
+	)
+	
+	cfp = bettercellsforpart(cellparts, depth*nt+1)
+	return grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts
+end
+
+
+"""
+`function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)`
+
+After the cellregions (partitioning of the grid) of the grid have been computed, other things have to be computed, such as `sortednodesperthread` a depth+1 x num_nodes matrix, here `sortednodesperthreads[i,j]` is the point at which the j-th node appears in the i-th level matrix in the corresponding submatrix.
+`cellregs` contains the partiton for each cell.
+Furthermore, `nnts` (number of nodes of the threads) is computed, which contain for each thread the number of nodes that are contained in the cells of that thread.
+`allcells` and `start` together behave like the rowval and colptr arrays of a CSC matrix, such that `allcells[start[j]:start[j+1]-1]` are all cells that contain the j-th node.
+`nn` is the number of nodes in the grid.
+`Ti` is the type (Int64,...) of the elements in the created arrays.
+`nt` is the number of threads.
+"""
+function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)
+		
+	num_matrices = maximum(cellregs)
+	depth = Int(floor((num_matrices-1)/nt))
+
+	#loop over each node, get the cellregion of the cell (the one not in the separator) write the position of that node inside the cellregions sorted ranking into a long vector
+	#nnts = [zeros(Ti, nt+1) for i=1:depth+1]
+	nnts = zeros(Ti, nt)
+	#noderegs_max_tmp = 0
+	old_noderegions = zeros(Ti, (depth+1, nn))
+	
+	# Count nodes per thread:
+	tmp = zeros(depth+1)
+	for j=1:nn
+		cells = @view allcells[start[j]:start[j+1]-1]
+		sortedcellregs = unique(sort(cellregs[cells]))
+		#tmp = []
+		tmpctr = 1
+		for cr in sortedcellregs
+			crmod = (cr-1)%nt+1
+			level = Int(ceil(cr/nt))
+			#nnts[crmod] += 1
+			old_noderegions[level,j] = crmod
+			if !(crmod in tmp[1:tmpctr-1])
+				nnts[crmod] += 1
+				#sortednodesperthread[crmod,j] = nnts[crmod] #nnts[i][cr]
+				#push!(tmp, crmod)
+				tmp[tmpctr] = crmod
+				tmpctr += 1
+			end
+		end
+	end
+	
+	# Reorder inidices to receive a block structure:
+	# Taking the original matrix [a_ij] and mapping each i and j to new_indices[i] and new_indices[j], gives a block structure
+	# the reverse is also defined rev_new_indices[new_indices[k]] = k
+	# From now on we will only use this new ordering
+	counter_for_reorder = zeros(Ti, depth*nt+1)
+	for j=1:nn
+		level, reg                                 = last_nz(old_noderegions[:, j])
+		counter_for_reorder[(level-1)*nt + reg]    += 1 #(reg-1)*depth + level] += 1
+	end
+	
+	starts               = vcat([0], cumsum(counter_for_reorder))
+	counter_for_reorder2 = zeros(Ti, depth*nt+1)
+	new_indices          = Vector{Ti}(undef, nn)
+	rev_new_indices      = Vector{Ti}(undef, nn)
+	origin               = Vector{Ti}(undef, nn)
+	for j=1:nn
+		level, reg                                  = last_nz(old_noderegions[:, j])
+		counter_for_reorder2[(level-1)*nt + reg]    += 1
+		origin[j]                                   = reg
+		new_indices[j]                              = starts[(level-1)*nt + reg]+counter_for_reorder2[(level-1)*nt + reg]
+		rev_new_indices[new_indices[j]]             = j
+	end
+	starts .+= 1
+	
+	# Build sortednodesperthread and globalindices array:
+	# They are inverses of each other: globalindices[tid][sortednodeperthread[tid][j]] = j
+	# Note that j has to be a `new index`
+	
+	sortednodesperthread = zeros(Ti, (nt, nn)) #vvcons(Ti, nnts)
+	globalindices = vvcons(Ti, nnts)
+	gictrs = zeros(Ti, nt)
+
+	for nj=1:nn
+		oj = rev_new_indices[nj]
+		cells = @view allcells[start[oj]:start[oj+1]-1]
+		sortedcellregs = unique(sort(cellregs[cells]))
+		#tmp = []
+		tmpctr = 1
+		for cr in sortedcellregs
+			crmod = (cr-1)%nt+1
+			level = Int(ceil(cr/nt))
+			if !(crmod in tmp[1:tmpctr-1])
+				gictrs[crmod] += 1 # , level] += 1
+				sortednodesperthread[crmod,nj] = gictrs[crmod]
+				globalindices[crmod][gictrs[crmod]] = nj
+				#push!(tmp, crmod)
+				tmp[tmpctr] = crmod
+				tmpctr += 1
+			end
+		end
+	end
+	
+	nnts, sortednodesperthread, old_noderegions, globalindices, gictrs, new_indices, rev_new_indices, starts
+end
+
+
+
+
+
+
+
+
+"""
+`function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes)`
+
+This function partitons the separator, which is done if `depth`>1 (see `grid_to_graph_ps_multi!` and/or `preparatory_multi_ps`).
+`cellregs` contains the regions/partitions/colors of each cell. 
+`nc` is the number of cells in the grid.
+`ACSC` is the adjacency matrix of the graph of the (separator-) grid (vertex in graph is cell in grid, edge in graph means two cells share a node) stored as a CSC. 
+`nt` is the number of threads.
+`level0` is the separator-partitoning level, if the (first) separator is partitioned, level0 = 1, in the next iteration, level0 = 2...
+`preparatory_multi_ps` is the number of separator-cells.
+"""
+function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes)
+	sepanodes = findall(x->x==nt+1, cellregs)
+
+	indptr = collect(1:nc+1)
+	indices = zeros(Int64, nc)
+	rowval = zeros(Int64, nc)
+
+	indptrT = collect(1:ctr_sepanodes+1)
+	indicesT = zeros(Int64, ctr_sepanodes)
+	rowvalT = zeros(Int64, ctr_sepanodes)
+
+	for (i,j) in enumerate(sepanodes)
+		indices[j] = i
+		indicesT[i] = j
+		rowval[j]  = 1
+		rowvalT[i] = 1
+	end
+
+	R = SparseMatrixCSC(ctr_sepanodes, nc, indptr, indices, rowval)
+	RT = SparseMatrixCSC(nc, ctr_sepanodes, indptrT, indicesT, rowvalT)
+	prod = ACSC*dropzeros(RT)
+	RART = dropzeros(R)*ACSC*dropzeros(RT)
+	
+	partition2 = Metis.partition(RART, nt)
+	cellregs2 = copy(partition2)
+
+	ctr_sepanodes = 0
+	for (i,j) in enumerate(sepanodes)
+		rows = RART.rowval[RART.colptr[i]:(RART.colptr[i+1]-1)]
+		cellregs[j] = level0*nt + cellregs2[i]
+		if minimum(partition2[rows]) != maximum(partition2[rows])
+			cellregs[j] = (level0+1)*nt+1
+			ctr_sepanodes += 1
+		end
+	end
+
+	RART, ctr_sepanodes
+end
+
+
+
+"""
+`function grid_to_graph_ps_multi!(grid, nt, depth)`
+
+The function assigns colors/partitons to each cell in the `grid`. First, the grid is partitoned into `nt` partitions. If `depth` > 1, the separator is partitioned again...
+`grid` is a simplexgrid. 
+`nt` is the number of threads.
+`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices...
+"""
+function grid_to_graph_ps_multi!(grid, nt, depth)
+	A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid))
+	number_cells_per_node = zeros(Int64, num_nodes(grid))
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			number_cells_per_node[node_id] += 1
+		end
+	end
+	allcells = zeros(Int64, sum(number_cells_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_cells_per_node)
+	number_cells_per_node .= 0
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			allcells[start[node_id] + number_cells_per_node[node_id]] = j
+			number_cells_per_node[node_id] += 1
+		end
+	end
+
+	for j=1:num_nodes(grid)
+		cells = @view allcells[start[j]:start[j+1]-1]
+		for (i,id1) in enumerate(cells)
+			for id2 in cells[i+1:end]
+				A[id1,id2] = 1
+				A[id2,id1] = 1
+			end
+		end	
+	end
+
+	ACSC = SparseArrays.SparseMatrixCSC(A)
+	
+	partition = Metis.partition(ACSC, nt)
+	cellregs  = copy(partition)
+	
+	ctr_sepanodes = 0
+	for j=1:num_cells(grid)
+		rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)]
+		if minimum(partition[rows]) != maximum(partition[rows])
+			cellregs[j] = nt+1
+			ctr_sepanodes += 1
+		end
+	end
+	RART = ACSC
+	for level=1:depth-1
+		RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes)
+	end
+
+			
+	return allcells, start, cellregs
+end
+
+
+
+function grid_to_graph_ps_multi_par!(grid, nt, depth)
+	time = zeros(12)
+	As = [ExtendableSparseMatrix{Int64, Int64}(num_cells(grid), num_cells(grid)) for tid=1:nt]
+	number_cells_per_node = zeros(Int64, num_nodes(grid))
+	
+	cn = grid[CellNodes]
+	
+	for j=1:num_cells(grid)
+		tmp = view(cn, :, j)
+		for node_id in tmp
+			number_cells_per_node[node_id] += 1
+		end
+	end
+		
+	
+	allcells = zeros(Int64, sum(number_cells_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_cells_per_node)
+	number_cells_per_node .= 0
+	
+	for j=1:num_cells(grid)
+		tmp = view(cn, :, j)
+		for node_id in tmp
+			allcells[start[node_id] + number_cells_per_node[node_id]] = j
+			number_cells_per_node[node_id] += 1
+		end
+	end
+
+	node_range = get_starts(num_nodes(grid), nt)
+	Threads.@threads for tid=1:nt
+		for j in node_range[tid]:node_range[tid+1]-1
+			cells = @view allcells[start[j]:start[j+1]-1]
+			l = length(cells)
+			for (i,id1) in enumerate(cells)
+				ce = view(cells, i+1:l)
+				for id2 in ce
+					As[tid][id1,id2] = 1
+					As[tid][id2,id1] = 1
+				end
+			end	
+		end
+		ExtendableSparse.flush!(As[tid])
+	end
+	
+	ACSC = add_all_par!(As).cscmatrix
+		
+	#SparseArrays.SparseMatrixCSC(A))
+	
+	
+	partition = Metis.partition(ACSC, nt)
+	cellregs  = copy(partition)
+	
+	ctr_sepanodes_a = zeros(Int64, nt)
+	
+	cell_range = get_starts(num_cells(grid), nt)
+	Threads.@threads :static for tid=1:nt
+		for j in cell_range[tid]:cell_range[tid+1]-1
+			rows = @view ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)]
+			if minimum(partition[rows]) != maximum(partition[rows])
+				cellregs[j] = nt+1
+				ctr_sepanodes_a[tid] += 1
+			end
+		end
+	end
+	
+	ctr_sepanodes = sum(ctr_sepanodes_a)
+			
+	#=
+	time[10] = @elapsed for j=1:num_cells(grid)
+		rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)]
+		if minimum(partition[rows]) != maximum(partition[rows])
+			cellregs[j] = nt+1
+			ctr_sepanodes += 1
+		end
+	end
+	=#
+	RART = ACSC
+	for level=1:depth-1
+		RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes)
+	end
+
+			
+	return allcells, start, cellregs
+end
+
+
+function add_all_par!(As)
+	nt = length(As)
+	depth = Int(floor(log2(nt)))
+	ende = nt
+	for level=1:depth
+		
+		@threads :static for tid=1:2^(depth-level)
+			#@info "$level, $tid"
+			start = tid+2^(depth-level)
+			while start <= ende
+				As[tid] += As[start]
+				start += 2^(depth-level)
+			end
+		end
+		ende = 2^(depth-level)
+	end
+	As[1]
+
+end
+
+
+"""
+`function vvcons(Ti, lengths)`
+
+`lengths` is a vector of integers.
+The function creates a vector of zero vectors of type `Ti` of length `lengths[i]`.
+"""
+function vvcons(Ti, lengths)
+	x::Vector{Vector{Ti}} = [zeros(Ti, i) for i in lengths]
+	return x
+end
+
+
+"""
+`function bettercellsforpart(xx, upper)`
+
+`xx` are the CellRegions (i.e. the color/partition of each cell).
+`upper` is the number of partitions (upper=depth*nt+1).
+The function returns a vector e.g. [v1, v2, v3, v4, v5].
+The element v1 would be the list of cells that are in partition 1 etc.
+The function is basically a faster findall.
+"""
+function bettercellsforpart(xx, upper)
+	ctr = zeros(Int64, upper)
+	for x in xx
+		ctr[x] += 1
+	end
+	cfp = vvcons(Int64, ctr)
+	ctr .= 1
+	for (i,x) in enumerate(xx)
+		cfp[x][ctr[x]] = i
+		ctr[x] += 1
+	end
+	cfp
+end
+
+"""
+`function getgrid(nm)`
+
+Returns a simplexgrid with a given number of nodes in each dimension.
+`nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid).
+"""
+function getgrid(nm; x0=0.0, x1=1.0)
+	if length(nm) == 2
+		n,m = nm
+		xx = collect(LinRange(x0, x1, n))
+		yy = collect(LinRange(x0, x1, m))
+		grid = simplexgrid(xx, yy)
+	else 
+		n,m,l = nm
+		xx = collect(LinRange(x0, x1, n))
+		yy = collect(LinRange(x0, x1, m))
+		zz = collect(LinRange(x0, x1, l))
+		grid = simplexgrid(xx, yy, zz)
+	end
+	grid
+end
+
+function get_starts(n, nt)
+	ret = ones(Int64, nt+1)
+	ret[end] = n+1
+	for i=nt:-1:2
+		ret[i] = ret[i+1] - Int(round(ret[i+1]/i)) #Int(round(n/nt))-1
+	end 
+	ret
+end
+
+function last_nz(x)
+	n = length(x)
+	for j=n:-1:1
+		if x[j] != 0
+			return (j, x[j])
+		end
+	end
+end
+
diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
new file mode 100644
index 0000000..c27aab0
--- /dev/null
+++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
@@ -0,0 +1,263 @@
+function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=true)
+
+
+	if !do_dense
+		A.cscmatrix = A.cscmatrix+sparse_flush!(A; keep_zeros)
+	
+	else
+		if keep_zeros
+			A.cscmatrix = dense_flush_keepzeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices)
+		else
+			A.cscmatrix = dense_flush_removezeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices)
+		end
+	end
+	
+	A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt]
+
+end
+
+"""
+`CSC_RLNK_plusequals_less3_reordered_super!` from `plusequals.jl`
+"""
+function sparse_flush!(A::ExtendableSparseMatrixParallel; keep_zeros=true)
+
+	#dropzeros!(
+	plus_remap(A.lnkmatrices, A.cscmatrix, A.globalindices; keep_zeros)
+	#)
+		
+end
+
+
+
+"""
+`CSC_RLNK_si_oc_ps_dz_less_reordered` from `conversion.jl`
+"""
+function dense_flush_keepzeros!(
+	As::Vector{SuperSparseMatrixLNK{Tv, Ti}}, 
+	onr, s, nt, rni
+	) where {Tv, Ti <: Integer}
+
+	nnz = sum([As[i].nnz for i=1:nt]) #you could also subtract the diagonal entries from shared columns, since those are definitely double
+	indptr = zeros(Ti, As[1].m+1)
+	indices = zeros(Ti, nnz) #sum(As.nnz))
+	data = zeros(Float64, nnz) #sum(As.nnz))
+	ctr = 1
+	eqctr = 0
+	tmp = zeros(Ti, size(onr)[1])
+	
+	for nj=1:As[1].m
+		indptr[nj] = ctr
+		oj = rni[nj]
+		regionctr = 1
+		jc = 0
+		nrr = view(onr, :, oj) 
+		tmp .= 0
+		for region in nrr #nrr #[:,j]
+			regmod = region #(region-1)%nt+1
+			if (region > 0) & !(region in tmp)
+				k = s[regmod, nj]
+				if regionctr == 1
+					while k>0
+						#if As[regmod].nzval[k] != 0.0
+							indices[ctr] = As[regmod].rowval[k]
+							data[ctr]    = As[regmod].nzval[k]
+							
+							for jcc=1:jc
+								if indices[ctr-jcc] > indices[ctr-jcc+1]
+									tmp_i = indices[ctr-jcc+1]
+									tmp_d = data[ctr-jcc+1]
+									indices[ctr-jcc+1] = indices[ctr-jcc]
+									data[ctr-jcc+1]    = data[ctr-jcc]
+									
+									indices[ctr-jcc] = tmp_i
+									data[ctr-jcc]    = tmp_d
+								else
+									break
+								end
+							end
+							
+							ctr += 1
+							jc += 1
+						#end
+						k = As[regmod].colptr[k]
+					end
+				else
+					while k>0
+						#if As[regmod].nzval[k] != 0.0
+							indices[ctr] = As[regmod].rowval[k]
+							data[ctr]    = As[regmod].nzval[k]
+							
+							for jcc=1:jc
+								if indices[ctr-jcc] > indices[ctr-jcc+1]
+									tmp_i = indices[ctr-jcc+1]
+									tmp_d = data[ctr-jcc+1]
+									indices[ctr-jcc+1] = indices[ctr-jcc]
+									data[ctr-jcc+1]    = data[ctr-jcc]
+									
+									indices[ctr-jcc] = tmp_i
+									data[ctr-jcc]    = tmp_d
+								elseif indices[ctr-jcc] == indices[ctr-jcc+1]
+									data[ctr-jcc] += data[ctr-jcc+1]
+									eqctr += 1
+									
+									for jccc=1:jcc
+										indices[ctr-jcc+jccc] = indices[ctr-jcc+jccc+1]
+										data[ctr-jcc+jccc]    = data[ctr-jcc+jccc+1]
+									end
+									
+									ctr -= 1
+									jc  -= 1
+									
+									break
+								else
+									break
+								end
+							end
+							
+							ctr += 1
+							jc += 1
+						#end
+						k = As[regmod].colptr[k]
+					end
+					
+				end
+				tmp[regionctr] = region
+				regionctr += 1
+				
+			end
+			
+		end
+		
+	end
+
+	#@warn ctr/nnz
+	
+	indptr[end] = ctr
+	resize!(indices, ctr-1)
+	resize!(data, ctr-1)
+
+	
+	SparseArrays.SparseMatrixCSC(
+		As[1].m, As[1].m, indptr, indices, data
+	)
+	
+end
+
+
+function dense_flush_removezeros!(
+	As::Vector{SuperSparseMatrixLNK{Tv, Ti}}, 
+	onr, s, nt, rni
+	) where {Tv, Ti <: Integer}
+
+	nnz = sum([As[i].nnz for i=1:nt]) #you could also subtract the diagonal entries from shared columns, since those are definitely double
+	indptr = zeros(Ti, As[1].m+1)
+	indices = zeros(Ti, nnz) #sum(As.nnz))
+	data = zeros(Float64, nnz) #sum(As.nnz))
+	ctr = 1
+	eqctr = 0
+	tmp = zeros(Ti, size(onr)[1])
+	
+	for nj=1:As[1].m
+		indptr[nj] = ctr
+		oj = rni[nj]
+		regionctr = 1
+		jc = 0
+		nrr = view(onr, :, oj) 
+		tmp .= 0
+		for region in nrr #nrr #[:,j]
+			regmod = region #(region-1)%nt+1
+			if (region > 0) & !(region in tmp)
+				k = s[regmod, nj]
+				if regionctr == 1
+					while k>0
+						if As[regmod].nzval[k] != 0.0
+							indices[ctr] = As[regmod].rowval[k]
+							data[ctr]    = As[regmod].nzval[k]
+							
+							for jcc=1:jc
+								if indices[ctr-jcc] > indices[ctr-jcc+1]
+									tmp_i = indices[ctr-jcc+1]
+									tmp_d = data[ctr-jcc+1]
+									indices[ctr-jcc+1] = indices[ctr-jcc]
+									data[ctr-jcc+1]    = data[ctr-jcc]
+									
+									indices[ctr-jcc] = tmp_i
+									data[ctr-jcc]    = tmp_d
+								else
+									break
+								end
+							end
+							
+							ctr += 1
+							jc += 1
+						end
+						k = As[regmod].colptr[k]
+					end
+				else
+					while k>0
+						if As[regmod].nzval[k] != 0.0
+							indices[ctr] = As[regmod].rowval[k]
+							data[ctr]    = As[regmod].nzval[k]
+							
+							for jcc=1:jc
+								if indices[ctr-jcc] > indices[ctr-jcc+1]
+									tmp_i = indices[ctr-jcc+1]
+									tmp_d = data[ctr-jcc+1]
+									indices[ctr-jcc+1] = indices[ctr-jcc]
+									data[ctr-jcc+1]    = data[ctr-jcc]
+									
+									indices[ctr-jcc] = tmp_i
+									data[ctr-jcc]    = tmp_d
+								elseif indices[ctr-jcc] == indices[ctr-jcc+1]
+									data[ctr-jcc] += data[ctr-jcc+1]
+									eqctr += 1
+									
+									for jccc=1:jcc
+										indices[ctr-jcc+jccc] = indices[ctr-jcc+jccc+1]
+										data[ctr-jcc+jccc]    = data[ctr-jcc+jccc+1]
+									end
+									
+									ctr -= 1
+									jc  -= 1
+									
+									break
+								else
+									break
+								end
+							end
+							
+							ctr += 1
+							jc += 1
+						end
+						k = As[regmod].colptr[k]
+					end
+					
+				end
+				tmp[regionctr] = region
+				regionctr += 1
+				
+			end
+			
+		end
+		
+	end
+
+	#@warn ctr/nnz
+	
+	indptr[end] = ctr
+	resize!(indices, ctr-1)
+	resize!(data, ctr-1)
+
+	
+	SparseArrays.SparseMatrixCSC(
+		As[1].m, As[1].m, indptr, indices, data
+	)
+	
+end
+
+
+
+
+
+
+
diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
new file mode 100644
index 0000000..ae52f60
--- /dev/null
+++ b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
@@ -0,0 +1,788 @@
+
+using SparseArrays
+using ExtendableSparse
+
+mutable struct SuperSparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti}
+    """
+    Number of rows
+    """
+    m::Ti
+
+    """
+    Number of columns
+    """
+    n::Ti
+
+    """
+    Number of nonzeros
+    """
+    nnz::Ti
+
+    """
+    Length of arrays
+    """
+    nentries::Ti
+
+    """
+    Linked list of column entries. Initial length is n,
+    it grows with each new entry.
+
+    colptr[index] contains the next
+    index in the list or zero, in the later case terminating the list which
+    starts at index 1<=j<=n for each column j.
+    """
+    colptr::Vector{Ti}
+
+    """
+    Row numbers. For each index it contains the zero (initial state)
+    or the row numbers corresponding to the column entry list in colptr.
+
+    Initial length is n,
+    it grows with each new entry.
+    """
+    rowval::Vector{Ti}
+
+    """
+    Nonzero entry values correspondin to each pair
+    (colptr[index],rowval[index])
+
+    Initial length is n,  it grows with each new entry.
+    """
+    nzval::Vector{Tv}
+    
+    
+    collnk::Vector{Ti}
+    
+    colctr::Ti
+end
+
+
+function SparseArrays.SparseMatrixCSC(A::SuperSparseMatrixLNK{Tv, Ti})::SparseArrays.SparseMatrixCSC where {Tv, Ti <: Integer}
+	SparseArrays.SparseMatrixCSC(SparseMatrixLNK{Tv, Ti}(A.m, A.n, A.nnz, A.nentries, A.colptr, A.rowval, A.nzval))
+
+end
+
+function SuperSparseMatrixLNK{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
+    SuperSparseMatrixLNK{Tv, Ti}(m, n, 0, n, zeros(Ti, n), zeros(Ti, n), zeros(Tv, n), zeros(Ti, n), 0)
+end
+
+
+function findindex(lnk::SuperSparseMatrixLNK, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    k = j
+    k0 = j
+    while k > 0
+        if lnk.rowval[k] == i
+            return k, 0
+        end
+        k0 = k
+        k = lnk.colptr[k]
+    end
+    return 0, k0
+end
+
+"""
+Return tuple containing size of the matrix.
+"""
+Base.size(lnk::SuperSparseMatrixLNK) = (lnk.m, lnk.n)
+
+"""    
+Return value stored for entry or zero if not found
+"""
+function Base.getindex(lnk::SuperSparseMatrixLNK{Tv, Ti}, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k == 0
+        return zero(Tv)
+    else
+        return lnk.nzval[k]
+    end
+end
+
+function addentry!(lnk::SuperSparseMatrixLNK, i, j, k, k0)
+    # increase number of entries
+    lnk.nentries += 1
+    if length(lnk.nzval) < lnk.nentries
+        newsize = Int(ceil(5.0 * lnk.nentries / 4.0))
+        resize!(lnk.nzval, newsize)
+        resize!(lnk.rowval, newsize)
+        resize!(lnk.colptr, newsize)
+    end
+
+    # Append entry if not found
+    lnk.rowval[lnk.nentries] = i
+
+    # Shift the end of the list
+    lnk.colptr[lnk.nentries] = 0
+    lnk.colptr[k0] = lnk.nentries
+
+    # Update number of nonzero entries
+    lnk.nnz += 1
+    return lnk.nentries
+end
+
+"""    
+Update value of existing entry, otherwise extend matrix if v is nonzero.
+"""
+function Base.setindex!(lnk::SuperSparseMatrixLNK, v, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    # Set the first  column entry if it was not yet set.
+    if lnk.rowval[j] == 0 && !iszero(v)
+    	lnk.colctr             += 1
+    	lnk.collnk[lnk.colctr] = j
+        lnk.rowval[j]          = i
+        lnk.nzval[j]           = v
+        lnk.nnz                += 1
+        return lnk
+    end
+
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = v
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = v
+    end
+    return lnk
+end
+
+"""
+Update element of the matrix  with operation `op`. 
+It assumes that `op(0,0)==0`. If `v` is zero, no new 
+entry is created.
+"""
+function updateindex!(lnk::SuperSparseMatrixLNK{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    # Set the first  column entry if it was not yet set.
+    if lnk.rowval[j] == 0 && !iszero(v)
+        lnk.colctr             += 1
+    	lnk.collnk[lnk.colctr] = j
+        lnk.rowval[j]          = i
+        lnk.nzval[j]           = op(lnk.nzval[j], v)
+        lnk.nnz                += 1
+        return lnk
+    end
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+function rawupdateindex!(lnk::SuperSparseMatrixLNK{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    # Set the first  column entry if it was not yet set.
+    if lnk.rowval[j] == 0
+        lnk.colctr             += 1
+    	lnk.collnk[lnk.colctr] = j
+        lnk.rowval[j]          = i
+        lnk.nzval[j]           = op(lnk.nzval[j], v)
+        lnk.nnz                += 1
+        return lnk
+    end
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+#=
+mutable struct ColEntry{Tv, Ti <: Integer}
+    rowval::Ti
+    nzval::Tv
+end
+
+# Comparison method for sorting
+Base.isless(x::ColEntry, y::ColEntry) = (x.rowval < y.rowval)
+=#
+
+function get_column!(col::Vector{ColEntry{Tv, Ti}}, lnk::SuperSparseMatrixLNK{Tv, Ti}, j::Ti)::Ti where {Tv, Ti <: Integer}
+	k = j
+	ctr = zero(Ti)
+	while k>0
+		if abs(lnk.nzval[k]) > 0
+			ctr += 1
+			col[ctr] = ColEntry(lnk.rowval[k], lnk.nzval[k])
+		end
+		k = lnk.colptr[k]
+	end
+	sort!(col, 1, ctr, Base.QuickSort, Base.Forward)
+	ctr
+end
+
+
+function remove_doubles!(col, coll)
+	#input_ctr = 1
+	last = 1
+	for j=2:coll
+		if col[j].rowval == col[last].rowval
+			col[last].nzval += col[j].nzval
+		else
+			last += 1
+			if last != j
+				col[last] = col[j]
+			end
+		end
+	end	
+	last
+end
+
+function get_column_removezeros!(col::Vector{ColEntry{Tv, Ti}}, lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, js, tids, length)::Ti where {Tv, Ti <: Integer}
+	ctr = zero(Ti)
+	for i=1:length
+		tid = tids[i]
+		k   = js[i] 
+		#for (tid,j) in zip(tids, js) #j0:j1
+		#tid = tids[j]
+		#k = j
+		while k>0
+			if abs(lnks[tid].nzval[k]) > 0
+				ctr += 1
+				col[ctr] = ColEntry(lnks[tid].rowval[k], lnks[tid].nzval[k])
+			end
+			k = lnks[tid].colptr[k]
+		end
+	end
+	
+	sort!(col, 1, ctr, Base.QuickSort, Base.Forward)
+	ctr = remove_doubles!(col, ctr)
+	#print_col(col, ctr)
+	ctr
+
+end
+
+function get_column_keepzeros!(col::Vector{ColEntry{Tv, Ti}}, lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, js, tids, length)::Ti where {Tv, Ti <: Integer}
+	ctr = zero(Ti)
+	for i=1:length
+		tid = tids[i]
+		k   = js[i] 
+		#for (tid,j) in zip(tids, js) #j0:j1
+		#tid = tids[j]
+		#k = j
+		while k>0
+			#if abs(lnks[tid].nzval[k]) > 0
+				ctr += 1
+				col[ctr] = ColEntry(lnks[tid].rowval[k], lnks[tid].nzval[k])
+			#end
+			k = lnks[tid].colptr[k]
+		end
+	end
+	
+	sort!(col, 1, ctr, Base.QuickSort, Base.Forward)
+	ctr = remove_doubles!(col, ctr)
+	#print_col(col, ctr)
+	ctr
+
+end
+
+function merge_into!(rowval::Vector{Ti}, nzval::Vector{Tv}, C::SparseArrays.SparseMatrixCSC{Tv, Ti}, col::Vector{ColEntry{Tv, Ti}}, J::Ti, coll::Ti, ptr1::Ti) where {Tv, Ti <: Integer}
+	j_min = 1
+	numshifts = 0
+	j_last = 0
+	last_row = 0
+	
+	#@warn "MERGING $J"
+	
+	#rowval0 = copy(C.rowval[C.colptr[J]:C.colptr[J+1]-1])
+	#endptr = C.colptr[J+1]
+	
+	for (di,i) in enumerate(C.colptr[J]:C.colptr[J+1]-1)
+		for j=j_min:coll
+			#if col[j].rowval == last_row
+			#	#@info "!! col j rowval == last row"
+			#end
+			if col[j].rowval < C.rowval[i] #ptr1+di+numshifts] #i+numshifts]
+				if col[j].rowval == last_row
+					#@info "$(ptr1+di+numshifts) : backwards EQUALITY: "
+					nzval[ptr1+di+numshifts] += col[j].nzval
+				else
+					#@info "$(ptr1+di+numshifts) : Insert from col: j=$j"
+					#shift_e!(C.rowval, C.nzval, 1, i+numshifts, C.colptr[end]-1)
+					rowval[ptr1+di+numshifts] = col[j].rowval
+					nzval[ptr1+di+numshifts]  = col[j].nzval
+					numshifts += 1
+					#endptr += 1
+				end
+				j_last = j
+			elseif col[j].rowval > C.rowval[i] #if col[j].rowval  
+				#@info "$(ptr1+di+numshifts) : Insert from C: i=$i"
+				rowval[ptr1+di+numshifts] = C.rowval[i]
+				nzval[ptr1+di+numshifts]  = C.nzval[i]
+				j_min = j
+				break
+			else
+				#@info "$(ptr1+di+numshifts) : normal EQUALITY: i=$i, j=$j"
+				rowval[ptr1+di+numshifts] = C.rowval[i]
+				nzval[ptr1+di+numshifts]  = C.nzval[i]+col[j].nzval
+				#numshifts += 1
+				j_min = j+1
+				j_last = j
+				
+				if j == coll
+					#@info "$(ptr1+di+numshifts+1) → $(ptr1+numshifts+(C.colptr[J+1]-C.colptr[J]))"
+					rowval[ptr1+di+numshifts+1:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.rowval, i+1:C.colptr[J+1]-1) #C.rowval[i:C.colptr[J+1]-1]
+					nzval[ptr1+di+numshifts+1:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])]  = view(C.nzval, i+1:C.colptr[J+1]-1) #C.nzval[i:C.colptr[J+1]-1]
+					
+					#@info "FINISH"
+					return numshifts
+				end
+				
+				break
+			end
+			
+			if j == coll
+				#@info "$(ptr1+di+numshifts) → $(ptr1+numshifts+(C.colptr[J+1]-C.colptr[J]))"
+				rowval[ptr1+di+numshifts:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.rowval, i:C.colptr[J+1]-1) #C.rowval[i:C.colptr[J+1]-1]
+				nzval[ptr1+di+numshifts:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])]  = view(C.nzval, i:C.colptr[J+1]-1) #C.nzval[i:C.colptr[J+1]-1]
+				
+				#@info "FINISH"
+				return numshifts
+			end
+			
+			last_row = col[j].rowval
+		end
+	end
+	endptr = ptr1 + numshifts + (C.colptr[J+1]-C.colptr[J])
+	last_row = 0
+	numshifts_old = numshifts
+	numshifts = 0
+	#start_ptr = endptr - 1 #C.colptr[J+1]-1
+	if j_last > 0
+		last_row = col[j_last].rowval
+	end
+	
+	if j_last != coll
+		for j=j_last+1:coll
+			if col[j].rowval != last_row
+				numshifts += 1
+				#shift_e!(C.rowval, C.nzval, 1, start_ptr+numshifts, C.colptr[end]-1)
+				#for k=start_ptr+numshifts:
+				#@info "$(endptr+numshifts) : after..."
+				rowval[endptr+numshifts] = col[j].rowval
+				nzval[endptr+numshifts]  = col[j].nzval
+				last_row                    = rowval[endptr+numshifts]
+				#colptr[J+1:end]             .+= 1
+			else
+				nzval[endptr+numshifts] += col[j].nzval
+			end
+		end
+	end
+	
+	return numshifts + numshifts_old
+
+end
+
+
+function print_col(col, coll)
+	v = zeros((2, coll))
+	for j=1:coll
+		v[1,j] = col[j].rowval
+		v[2,j] = col[j].nzval
+	end
+	@info v
+end
+
+function plus(lnk::SparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer}
+	if lnk.nnz == 0
+		return csc
+	elseif length(csc.rowval) == 0
+		return SparseMatrixCSC(lnk)
+	else
+		return lnk + csc
+	end
+end
+
+function plus(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer}
+	gi = collect(1:csc.n)
+	
+	
+	supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]]
+	sortedcolumnids    = sortperm(supersparsecolumns)
+	sortedcolumns      = supersparsecolumns[sortedcolumnids]
+	#sortedcolumns      = vcat([1], sortedcolumns)
+	sortedcolumns      = vcat(sortedcolumns, [csc.n+1])
+	
+	col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m]
+	
+	#@info sortedcolumnids 
+	
+	nnz_sum   = length(csc.rowval) + lnk.nnz
+	colptr    = Vector{Ti}(undef, csc.n+1)
+	rowval    = Vector{Ti}(undef, nnz_sum)
+	nzval     = Vector{Tv}(undef, nnz_sum)
+	colptr[1] = one(Ti)
+	
+	#first part: columns between 1 and first column of lnk
+	
+	colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1])
+	rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1)
+	nzval[1:csc.colptr[sortedcolumns[1]]-1]  = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1)
+	
+	numshifts = 0
+	
+	for J=1:length(sortedcolumns)-1
+		#@info ">>>>>>> $J <<<<<<<<<<<<<<<"
+		# insert new added column here / dummy
+		i = sortedcolumns[J]
+		coll = get_column!(col, lnk, i)
+		#print_col(col, coll)
+		
+		nns       = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1)
+		
+		numshifts += nns
+		#j = colptr[i] #sortedcolumns[J]] 
+		#rowval[j] = J
+		#nzval[j]  = J
+		# insertion end
+		
+		#colptr[i+1] = colptr[i] + csc.colptr[i+1]-csc.colptr[i] + numshifts
+		
+		#a = i+1
+		#b = sortedcolumns[J+1]
+		#@info a, b
+		
+		
+		#colptr[i+1:sortedcolumns[J+1]] = (csc.colptr[i+1:sortedcolumns[J+1]]-csc.colptr[i:sortedcolumns[J+1]-1]).+(colptr[i] + nns)
+		
+		colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns)
+		
+		
+		rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
+		nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1]  = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
+		
+		
+		#=
+		
+		@info csc.colptr[a:b]
+		
+		colptr[a:b] = csc.colptr[a:b].+numshifts
+		
+		#colptr[i+2:sortedcolumns[J+1]] = csc.colptr[i+2:sortedcolumns[J+1]].+numshifts
+		@info i, J, colptr[i+2], colptr[sortedcolumns[J+1]], csc.colptr[i+2], csc.colptr[sortedcolumns[J+1]]
+		@info i, J, colptr[a], colptr[b], csc.colptr[a], csc.colptr[b]
+		rowval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.rowval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]])
+		nzval[colptr[i+2]:colptr[sortedcolumns[J+1]]]  = view(csc.nzval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]])
+		#rowval[colptrsortedcolumns[J+1]]
+		=#
+	end
+	
+	#@info colptr
+	
+	resize!(rowval, length(csc.rowval)+numshifts)
+	resize!(nzval, length(csc.rowval)+numshifts)
+	
+	
+	SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval)
+
+		
+
+end
+
+
+function plus_remap(lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Vector{Ti}}; keep_zeros=true) where {Tv, Ti <: Integer}
+	nt = length(lnks)
+
+	if keep_zeros
+		get_col! = get_column_keepzeros!
+	else
+		get_col! = get_column_removezeros!
+	end
+	lnkscols           = vcat([lnks[i].collnk[1:lnks[i].colctr] for i=1:nt]...)
+	supersparsecolumns = vcat([gi[i][lnks[i].collnk[1:lnks[i].colctr]] for i=1:nt]...)
+	num_cols = sum([lnks[i].colctr for i=1:nt])
+	tids     = Vector{Ti}(undef, num_cols)
+	ctr = 0
+	for i=1:nt
+		for j=1:lnks[i].colctr
+			ctr += 1
+			tids[ctr] = i
+		end
+	end
+
+
+	sortedcolumnids    = sortperm(supersparsecolumns)
+	sortedcolumns      = supersparsecolumns[sortedcolumnids]
+	sortedcolumns      = vcat(sortedcolumns, [Ti(csc.n+1)])
+	
+	coll = sum([lnks[i].nnz for i=1:nt])
+	nnz_sum   = length(csc.rowval) + coll
+	colptr    = Vector{Ti}(undef, csc.n+1)
+	rowval    = Vector{Ti}(undef, nnz_sum)
+	nzval     = Vector{Tv}(undef, nnz_sum)
+	colptr[1] = one(Ti)
+	
+	if csc.m < coll
+		coll = csc.m
+	end
+	
+	col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:coll]
+	numshifts = 0
+	
+	colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1])
+	rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1)
+	nzval[1:csc.colptr[sortedcolumns[1]]-1]  = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1)
+	
+	J = 1
+	i0 = 0
+	#lj_last = []
+	#tid_last = []
+	lj_last  = Vector{Ti}(undef, nt)
+	tid_last = Vector{Ti}(undef, nt)
+	ctr_last = 1
+	gj_last = 0
+	for J=1:length(sortedcolumns)-1
+		gj_now  = sortedcolumns[J]
+		gj_next = sortedcolumns[J+1]
+			
+		lj_last[ctr_last]  = lnkscols[sortedcolumnids[J]]
+		tid_last[ctr_last] = tids[sortedcolumnids[J]]
+		
+		if gj_now != gj_next
+			#@info typeof(lnks)
+			# do stuff from gj_last to gj_now / from last_lj to J
+			#@info lj_last, tid_last
+			coll = get_col!(col, lnks, lj_last, tid_last, ctr_last)
+			
+			nns       = merge_into!(rowval, nzval, csc, col, gj_now, coll, colptr[gj_now]-one(Ti))
+			numshifts += nns
+			
+			
+			colptr[gj_now+1:sortedcolumns[J+1]] = csc.colptr[gj_now+1:sortedcolumns[J+1]].+(-csc.colptr[gj_now]+colptr[gj_now] + nns)
+			
+			rowval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1)
+			nzval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1]  = view(csc.nzval, csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1)
+			
+			#rowval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = csc.rowval[csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1]
+			#nzval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1]  = csc.nzval[csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1]
+			
+			
+			#for k=csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1
+			#	k2 = k+(-csc.colptr[gj_now]+colptr[gj_now] + nns)
+			#	rowval[k2] = csc.rowval[k]
+			#	nzval[k2]  = csc.nzval[k]
+			#end	
+			
+			gj_last  = gj_now
+			ctr_last = 0 #tids[sortedcolumnids[J]]]
+		end
+		
+		ctr_last += 1
+		
+		
+	end
+		
+	
+	resize!(rowval, length(csc.rowval)+numshifts)
+	resize!(nzval, length(csc.rowval)+numshifts)
+	
+	
+	SparseArrays.SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval)
+	
+
+	#for ...
+	#	take many columns together if necessary in `get_column`
+	#end
+
+
+
+end
+
+
+
+function plus_remap(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Ti}) where {Tv, Ti <: Integer}
+
+	#@info lnk.collnk[1:lnk.colctr]
+	
+	
+	supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]]
+	sortedcolumnids    = sortperm(supersparsecolumns)
+	sortedcolumns      = supersparsecolumns[sortedcolumnids]
+	#sortedcolumns      = vcat([1], sortedcolumns)
+	#@info typeof(supersparsecolumns), typeof(sortedcolumns)
+	
+	sortedcolumns      = vcat(sortedcolumns, [Ti(csc.n+1)])
+	
+	#@info typeof(supersparsecolumns), typeof(sortedcolumns)
+	
+	
+	#@info supersparsecolumns
+	#@info sortedcolumns
+	#@info lnk.collnk[1:length(sortedcolumns)-1]
+	#@info lnk.collnk[sortedcolumnids[1:length(sortedcolumns)-1]]
+	
+	col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m]
+	
+	#@info sortedcolumnids 
+	
+	nnz_sum   = length(csc.rowval) + lnk.nnz
+	colptr    = Vector{Ti}(undef, csc.n+1)
+	rowval    = Vector{Ti}(undef, nnz_sum)
+	nzval     = Vector{Tv}(undef, nnz_sum)
+	colptr[1] = one(Ti)
+	
+	#first part: columns between 1 and first column of lnk
+	
+	colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1])
+	rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1)
+	nzval[1:csc.colptr[sortedcolumns[1]]-1]  = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1)
+	
+	numshifts = 0
+	
+	for J=1:length(sortedcolumns)-1
+		i = sortedcolumns[J]
+		
+		coll = get_column!(col, lnk, lnk.collnk[sortedcolumnids[J]] )
+		#@info typeof(i), typeof(coll), typeof(colptr), typeof(colptr[i]), typeof(colptr[i]-1)
+		nns       = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-one(Ti))
+		numshifts += nns
+		
+		
+		colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns)
+		rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
+		nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1]  = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
+		
+		#=
+		for k=csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1
+			k2 = k+(-csc.colptr[i]+colptr[i] + nns)
+			rowval[k2] = csc.rowval[k]
+			nzval[k2]  = csc.nzval[k]
+		end
+		=#
+	end
+	
+	
+	resize!(rowval, length(csc.rowval)+numshifts)
+	resize!(nzval, length(csc.rowval)+numshifts)
+	
+	
+	SparseArrays.SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval)
+
+end
+
+
+
+
+function plus_loop(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer}
+	gi = collect(1:csc.n)
+	
+	supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]]
+	sortedcolumnids    = sortperm(supersparsecolumns)
+	sortedcolumns      = supersparsecolumns[sortedcolumnids]
+	#sortedcolumns      = vcat([1], sortedcolumns)
+	sortedcolumns      = vcat(sortedcolumns, [csc.n+1])
+	
+	col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m]
+	
+	#@info sortedcolumnids 
+	
+	nnz_sum   = length(csc.rowval) + lnk.nnz
+	colptr    = Vector{Ti}(undef, csc.n+1)
+	rowval    = Vector{Ti}(undef, nnz_sum)
+	nzval     = Vector{Tv}(undef, nnz_sum)
+	colptr[1] = one(Ti)
+	
+	#first part: columns between 1 and first column of lnk
+	
+	colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1])
+	rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1)
+	nzval[1:csc.colptr[sortedcolumns[1]]-1]  = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1)
+	
+	numshifts = 0
+	
+	for J=1:length(sortedcolumns)-1
+		i = sortedcolumns[J]
+		coll = get_column!(col, lnk, i)
+		
+		nns       = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1)
+		numshifts += nns
+		
+		colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns)
+		
+		
+		for k=csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1
+			k2 = k+(-csc.colptr[i]+colptr[i] + nns)
+			rowval[k2] = csc.rowval[k]
+			nzval[k2]  = csc.nzval[k]
+		end
+		
+		
+	end
+	
+	#@info colptr
+	
+	resize!(rowval, length(csc.rowval)+numshifts)
+	resize!(nzval, length(csc.rowval)+numshifts)
+	
+	
+	SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval)
+
+		
+
+end
+
+
+
+function twodisjointsets(n, k)
+	A = rand(1:n, k)
+	B = zeros(Int64, k)
+	done = false
+	ctr = 0
+	while ctr != k
+		v = rand(1:n)
+		if !(v in A)
+			ctr += 1
+			B[ctr] = v
+		end
+	end
+
+	A, B
+end
+
+function distinct(x, n)
+	y = zeros(typeof(x[1]), n)
+	ctr = 0
+	while ctr != n
+		v = rand(x)
+		if !(v in y[1:ctr])
+			ctr += 1
+			y[ctr] = v
+		end
+	end
+	y
+end 
+
+
+function mean(x)
+	sum(x)/length(x)
+end
+
+function form(x)
+	[minimum(x), mean(x), maximum(x)]
+end
+
+
+
+
+
+
+
+
+
+
+

From b0953fe2183eb896998df1bc8ea104fb0cfd3689 Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Tue, 20 Feb 2024 17:58:45 +0100
Subject: [PATCH 05/44] add Mittal,Al-Kurdi ILU

---
 src/ExtendableSparse.jl                   |   2 +
 src/factorizations/factorizations.jl      |   2 +
 src/factorizations/ilu_Al-Kurdi_Mittal.jl | 136 ++++++++++++++++++++++
 src/factorizations/iluam.jl               |  35 ++++++
 4 files changed, 175 insertions(+)
 create mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal.jl
 create mode 100644 src/factorizations/iluam.jl

diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 372df82..2894027 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -39,6 +39,8 @@ include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl")
 export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK
 export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light
 
+include("factorizations/ilu_Al-Kurdi_Mittal.jl")
+using .ILUAM
 
 include("factorizations/factorizations.jl")
 
diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index d278d8b..ead23c5 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -157,6 +157,7 @@ end
 include("jacobi.jl")
 include("ilu0.jl")
 include("iluzero.jl")
+include("iluam.jl")
 include("parallel_jacobi.jl")
 include("parallel_ilu0.jl")
 include("sparspak.jl")
@@ -165,6 +166,7 @@ include("blockpreconditioner.jl")
 @eval begin
     @makefrommatrix ILU0Preconditioner
     @makefrommatrix ILUZeroPreconditioner
+    @makefrommatrix ILUAMPreconditioner
     @makefrommatrix PointBlockILUZeroPreconditioner
     @makefrommatrix JacobiPreconditioner
     @makefrommatrix ParallelJacobiPreconditioner
diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl
new file mode 100644
index 0000000..a47bd50
--- /dev/null
+++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl
@@ -0,0 +1,136 @@
+module ILUAM
+using LinearAlgebra, SparseArrays
+
+import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
+
+
+mutable struct ILUAMPrecon{T,N}
+
+	diag::AbstractVector
+    nzval::AbstractVector
+	rowval::AbstractVector
+	colptr::AbstractVector
+	
+end
+
+function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer}
+    n      = A.n # number of columns
+	nzval  = copy(A.nzval)
+	diag   = Vector{N}(undef, n)
+	
+    ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr))
+end
+
+function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
+    nzval  = LU.nzval
+    diag   = LU.diag
+    
+    colptr = LU.colptr
+	rowval = LU.rowval
+	n      = A.n # number of columns
+	point  = zeros(N, n) #Vector{N}(undef, n)
+	
+	# find diagonal entries
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1
+			if rowval[v] == j
+				diag[j] = v
+				break
+			end
+			#elseif rowval[v] 
+		end
+	end
+	
+	# compute L and U
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
+			point[rowval[v]] = v
+		end
+		
+		for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			#nzval[v] /= nzval[diag[i]]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		
+		for v=colptr[j]:colptr[j+1]-1
+			point[rowval[v]] = zero(N)
+		end
+	end
+end
+
+function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
+    LU = ILUAMPrecon(A::SparseMatrixCSC{T,N})
+    iluAM!(LU, A)
+    LU
+end
+
+
+function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer}
+	n = ilu.A.n
+	nzval = ilu.nzval
+	colptr = ilu.colptr
+	rowval = ilu.rowval
+	diag = ilu.diag
+	y .= 0
+	@inbounds for j=1:n
+		y[j] += v[j]
+		for v=diag[j]+1:colptr[j+1]-1
+			y[rowval[v]] -= nzval[v]*y[j]
+		end
+	end
+	y
+end
+
+
+function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer}
+    n = ilu.A.n
+	nzval = ilu.nzval
+	colptr = ilu.colptr
+	rowval = ilu.rowval
+	diag = ilu.diag
+	wrk = copy(y)
+	@inbounds for j=n:-1:1
+		x[j] = wrk[j] / nzval[diag[j]]		
+		for i=colptr[j]:diag[j]-1
+			wrk[rowval[i]] -= nzval[i]*x[j]
+		end
+	end
+    x
+end
+
+function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
+    y = copy(b)
+    forward_substitution!(y, ilu, b)
+    backward_substitution!(x, ilu, y)
+    x
+end
+
+function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
+    y = copy(b)
+    forward_substitution!(y, ilu, b)
+    backward_substitution!(b, ilu, y)
+    b
+end
+
+function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
+    x = copy(b)
+    ldiv!(x, ilu, b)
+end
+
+function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer}
+    length(ilu.nzval)
+end
+
+
+end
\ No newline at end of file
diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl
new file mode 100644
index 0000000..5d65e40
--- /dev/null
+++ b/src/factorizations/iluam.jl
@@ -0,0 +1,35 @@
+mutable struct ILUAMPreconditioner <: AbstractPreconditioner
+    A::ExtendableSparseMatrix
+    factorization::ILUAM.ILUAMPrecon
+    phash::UInt64
+    function ILUAMPreconditioner()
+        p = new()
+        p.phash = 0
+        p
+    end
+end
+
+"""
+```
+ILUAMPreconditioner()
+ILUAMPreconditioner(matrix)
+```
+Incomplete LU preconditioner with zero fill-in using ... . This preconditioner
+also calculates and stores updates to the off-diagonal entries and thus delivers better convergence than  the [`ILU0Preconditioner`](@ref).
+"""
+function ILUAMPreconditioner end
+
+function update!(p::ILUAMPreconditioner)
+    flush!(p.A)
+    if p.A.phash != p.phash
+        p.factorization = ILUAM.iluAM(p.A.cscmatrix)
+        p.phash=p.A.phash
+    else
+        ILUAM.ilu0!(p.factorization, p.A.cscmatrix)
+    end
+    p
+end
+
+allow_views(::ILUAMPreconditioner)=true
+allow_views(::Type{ILUAMPreconditioner})=true
+

From 5df873594a31584fda8d3cacbb352b6682a73b2a Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Fri, 23 Feb 2024 12:47:51 +0100
Subject: [PATCH 06/44] implement ILU (sequential and parallel) based on Mittal
 and Al-Kurdi

---
 Project.toml                                  |   6 +-
 src/ExtendableSparse.jl                       |  33 ++-
 src/factorizations/factorizations.jl          | 119 +++++---
 src/factorizations/ilu_Al-Kurdi_Mittal.jl     | 176 ++++++++----
 src/factorizations/ilu_Al-Kurdi_Mittal_0.jl   | 146 ++++++++++
 src/factorizations/ilu_Al-Kurdi_Mittal_1.jl   | 229 +++++++++++++++
 src/factorizations/iluam.jl                   |   7 +-
 src/factorizations/pilu_Al-Kurdi_Mittal.jl    | 270 ++++++++++++++++++
 src/factorizations/piluam.jl                  |  36 +++
 .../ExtendableSparseParallel.jl               |  34 ++-
 .../struct_flush.jl                           |   2 +-
 11 files changed, 941 insertions(+), 117 deletions(-)
 create mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_0.jl
 create mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_1.jl
 create mode 100644 src/factorizations/pilu_Al-Kurdi_Mittal.jl
 create mode 100644 src/factorizations/piluam.jl

diff --git a/Project.toml b/Project.toml
index 46d6e61..8776054 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,13 +1,15 @@
 name = "ExtendableSparse"
 uuid = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3"
 authors = ["Juergen Fuhrmann <juergen.fuhrmann@wias-berlin.de>"]
-version = "1.4"
+version = "1.4.0"
 
 [deps]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8"
 ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
@@ -15,8 +17,6 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
-ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8"
 
 [weakdeps]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 2894027..285d0c2 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -28,25 +28,39 @@ include("matrix/sparsematrixcsc.jl")
 include("matrix/sparsematrixlnk.jl")
 include("matrix/extendable.jl")
 
-export SparseMatrixLNK,
-       ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse
+export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse
 
 export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet
 
 
+@warn "ESMP!"
 include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl")
 
-export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK
-export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light
 
 include("factorizations/ilu_Al-Kurdi_Mittal.jl")
-using .ILUAM
-
+#using .ILUAM
+include("factorizations/pilu_Al-Kurdi_Mittal.jl")
+#using .PILUAM
 include("factorizations/factorizations.jl")
 
+include("factorizations/simple_iteration.jl")
+export simple, simple!
+
+include("matrix/sprand.jl")
+export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark
+
+
+
+
+export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK
+export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light
+
+
 export JacobiPreconditioner,
     ILU0Preconditioner,
     ILUZeroPreconditioner,
+    ILUAMPreconditioner,
+    PILUAMPreconditioner,
     PointBlockILUZeroPreconditioner,
     ParallelJacobiPreconditioner,
     ParallelILU0Preconditioner,
@@ -57,13 +71,6 @@ export AbstractFactorization, LUFactorization, CholeskyFactorization, SparspakLU
 export issolver
 export factorize!, update!
 
-include("factorizations/simple_iteration.jl")
-export simple, simple!
-
-include("matrix/sprand.jl")
-export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark
-
-
 @static if  !isdefined(Base, :get_extension)
     function __init__()
         @require Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2"  begin
diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index ead23c5..2d56fce 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -51,6 +51,52 @@ Determine if factorization is a solver or not
 issolver(::AbstractLUFactorization) = true
 issolver(::AbstractPreconditioner) = false
 
+
+
+""""
+    @makefrommatrix(fact)
+
+For an AbstractFactorization `MyFact`, provide methods
+```
+    MyFact(A::ExtendableSparseMatrix; kwargs...)
+    MyFact(A::SparseMatrixCSC; kwargs...)
+```
+"""
+macro makefrommatrix(fact)
+    return quote
+        function $(esc(fact))(A::ExtendableSparseMatrix; kwargs...)
+            factorize!($(esc(fact))(;kwargs...), A)
+        end
+        function $(esc(fact))(A::SparseMatrixCSC; kwargs...)
+            $(esc(fact))(ExtendableSparseMatrix(A); kwargs...)
+        end
+    end
+end
+
+include("ilu0.jl")
+include("iluzero.jl")
+include("iluam.jl")
+include("piluam.jl")
+include("parallel_jacobi.jl")
+include("parallel_ilu0.jl")
+include("sparspak.jl")
+include("blockpreconditioner.jl")
+include("jacobi.jl")
+
+@eval begin
+    @makefrommatrix ILU0Preconditioner
+    @makefrommatrix ILUZeroPreconditioner
+    @makefrommatrix ILUAMPreconditioner
+    @makefrommatrix PILUAMPreconditioner
+    @makefrommatrix PointBlockILUZeroPreconditioner
+    @makefrommatrix JacobiPreconditioner
+    @makefrommatrix ParallelJacobiPreconditioner
+    @makefrommatrix ParallelILU0Preconditioner
+    @makefrommatrix SparspakLU
+    @makefrommatrix UpdateteableBlockpreconditioner
+    @makefrommatrix BlockPreconditioner
+end
+
 """
 ```
 factorize!(factorization, matrix)
@@ -65,8 +111,40 @@ function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix)
     p
 end
 
+function factorize!(p::PILUAMPreconditioner, A::ExtendableSparseMatrixParallel)
+    p.A = A
+    update!(p)
+    p
+end
+
+#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel)
+#    p.A = A
+#    update!(p)
+#    p
+#end
+
+#factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel)=factorize!(p,ExtendableSparseMatrix(A.cscmatrix))
+
+#factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)=factorize!(p,ExtendableSparseMatrix(A.cscmatrix))
 
 factorize!(p::AbstractFactorization, A::SparseMatrixCSC)=factorize!(p,ExtendableSparseMatrix(A))
+
+#function factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)
+#    factorize!(p, A)
+#end
+
+#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel)
+#    factorize!(p, A.cscmatrix)
+#end
+
+
+#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix)
+#    factorize!(p, A.cscmatrix)
+#end
+
+
+#factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)=factorize!(p,A)
+
 """
 ```
 lu!(factorization, matrix)
@@ -134,47 +212,6 @@ LinearAlgebra.ldiv!(fact::AbstractFactorization, v) = ldiv!(fact.factorization,
 
 
 
-""""
-    @makefrommatrix(fact)
-
-For an AbstractFactorization `MyFact`, provide methods
-```
-    MyFact(A::ExtendableSparseMatrix; kwargs...)
-    MyFact(A::SparseMatrixCSC; kwargs...)
-```
-"""
-macro makefrommatrix(fact)
-    return quote
-        function $(esc(fact))(A::ExtendableSparseMatrix; kwargs...)
-            factorize!($(esc(fact))(;kwargs...), A)
-        end
-        function $(esc(fact))(A::SparseMatrixCSC; kwargs...)
-            $(esc(fact))(ExtendableSparseMatrix(A); kwargs...)
-        end
-    end
-end
-
-include("jacobi.jl")
-include("ilu0.jl")
-include("iluzero.jl")
-include("iluam.jl")
-include("parallel_jacobi.jl")
-include("parallel_ilu0.jl")
-include("sparspak.jl")
-include("blockpreconditioner.jl")
-
-@eval begin
-    @makefrommatrix ILU0Preconditioner
-    @makefrommatrix ILUZeroPreconditioner
-    @makefrommatrix ILUAMPreconditioner
-    @makefrommatrix PointBlockILUZeroPreconditioner
-    @makefrommatrix JacobiPreconditioner
-    @makefrommatrix ParallelJacobiPreconditioner
-    @makefrommatrix ParallelILU0Preconditioner
-    @makefrommatrix SparspakLU
-    @makefrommatrix UpdateteableBlockpreconditioner
-    @makefrommatrix BlockPreconditioner
-end
 
 if USE_GPL_LIBS
     #requires SuiteSparse which is not available in non-GPL builds
diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl
index a47bd50..97bb9a8 100644
--- a/src/factorizations/ilu_Al-Kurdi_Mittal.jl
+++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl
@@ -1,34 +1,27 @@
-module ILUAM
-using LinearAlgebra, SparseArrays
+#module ILUAM
+#using LinearAlgebra, SparseArrays
 
 import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
 
+@info "ILUAM"
 
 mutable struct ILUAMPrecon{T,N}
 
 	diag::AbstractVector
     nzval::AbstractVector
-	rowval::AbstractVector
-	colptr::AbstractVector
+	A::AbstractMatrix
 	
 end
 
-function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer}
-    n      = A.n # number of columns
-	nzval  = copy(A.nzval)
-	diag   = Vector{N}(undef, n)
-	
-    ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr))
-end
-
-function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
-    nzval  = LU.nzval
-    diag   = LU.diag
-    
-    colptr = LU.colptr
-	rowval = LU.rowval
-	n      = A.n # number of columns
-	point  = zeros(N, n) #Vector{N}(undef, n)
+function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer}
+	@info "iluAM"
+	nzval = copy(A.nzval)
+	colptr = A.colptr
+	rowval = A.rowval
+	#nzval  = ILU.nzval
+	n = A.n # number of columns
+	point = zeros(Ti, n) #Vector{Ti}(undef, n)
+	diag  = Vector{Ti}(undef, n)
 	
 	# find diagonal entries
 	for j=1:n
@@ -64,25 +57,23 @@ function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integ
 		
 		
 		for v=colptr[j]:colptr[j+1]-1
-			point[rowval[v]] = zero(N)
+			point[rowval[v]] = zero(Ti)
 		end
 	end
+	#nzval, diag
+	ILUAMPrecon{Tv,Ti}(diag, nzval, A)
 end
 
-function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
-    LU = ILUAMPrecon(A::SparseMatrixCSC{T,N})
-    iluAM!(LU, A)
-    LU
-end
-
+function forward_subst_old!(y, v, nzval, diag, A)
+	n      = A.n
+	colptr = A.colptr
+	rowval = A.rowval
+	
+	for i in eachindex(y)
+        y[i] = zero(Float64)
+    end
 
-function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer}
-	n = ilu.A.n
-	nzval = ilu.nzval
-	colptr = ilu.colptr
-	rowval = ilu.rowval
-	diag = ilu.diag
-	y .= 0
+	#y .= 0
 	@inbounds for j=1:n
 		y[j] += v[j]
 		for v=diag[j]+1:colptr[j+1]-1
@@ -93,44 +84,119 @@ function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer}
 end
 
 
-function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer}
-    n = ilu.A.n
-	nzval = ilu.nzval
-	colptr = ilu.colptr
-	rowval = ilu.rowval
-	diag = ilu.diag
-	wrk = copy(y)
+function backward_subst_old!(x, y, nzval, diag, A)
+	n      = A.n
+	colptr = A.colptr
+	rowval = A.rowval
 	@inbounds for j=n:-1:1
-		x[j] = wrk[j] / nzval[diag[j]]		
+		x[j] = y[j] / nzval[diag[j]] 
+		
 		for i=colptr[j]:diag[j]-1
-			wrk[rowval[i]] -= nzval[i]*x[j]
+			y[rowval[i]] -= nzval[i]*x[j]
 		end
+		
 	end
-    x
+	x
 end
 
-function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
-    y = copy(b)
-    forward_substitution!(y, ilu, b)
-    backward_substitution!(x, ilu, y)
-    x
+function ldiv!(x, ILU::ILUAMPrecon, b)
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, A)
+	backward_subst_old!(x, y, nzval, diag, A)
+	x
 end
 
-function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
-    y = copy(b)
-    forward_substitution!(y, ilu, b)
-    backward_substitution!(b, ilu, y)
-    b
+function ldiv!(ILU::ILUAMPrecon, b)
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, A)
+	backward_subst_old!(b, y, nzval, diag, A)
+	b
 end
 
 function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
     x = copy(b)
     ldiv!(x, ilu, b)
+	x
 end
 
 function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer}
     length(ilu.nzval)
 end
 
+#=
+function forward_subst!(y, v, ilu) #::ILUAMPrecon{T,N}) where {T,N<:Integer}
+	@info "fw"
+	n = ilu.A.n
+	nzval  = ilu.nzval
+	diag   = ilu.diag
+	colptr = ilu.A.colptr
+	rowval = ilu.A.rowval
+	
+	for i in eachindex(y)
+        y[i] = zero(Float64)
+    end
+
+	#y .= 0
+	@inbounds for j=1:n
+		y[j] += v[j]
+		for v=diag[j]+1:colptr[j+1]-1
+			y[rowval[v]] -= nzval[v]*y[j]
+		end
+	end
+	y
+end
+
+function backward_subst!(x, y, ilu) #::ILUAMPrecon{T,N}) where {T,N<:Integer}
+    @info "bw"
+	n = ilu.A.n
+	nzval = ilu.nzval
+	diag = ilu.diag
+	colptr = ilu.A.colptr
+	rowval = ilu.A.rowval
+	#wrk = copy(y)
+	@inbounds for j=n:-1:1
+		x[j] = y[j] / nzval[diag[j]] 
+		
+		for i=colptr[j]:diag[j]-1
+			y[rowval[i]] -= nzval[i]*x[j]
+		end
+		
+	end
+	x
+end
+
+function iluam_subst(ILU::ILUAMPrecon, b)
+	y = copy(b)
+	forward_subst!(y, b, ILU)
+	z = copy(b)
+	backward_subst!(z, y, ILU)
+	z
+end
+
+
+
+function iluam_subst_old(ILU::ILUAMPrecon, b)
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, A)
+	z = copy(b)
+	backward_subst_old!(z, y, nzval, diag, A)
+	#backward_subst!(z, y, ILU)
+	z
+end
+=#
+
+
 
-end
\ No newline at end of file
+#end
\ No newline at end of file
diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl
new file mode 100644
index 0000000..26f9788
--- /dev/null
+++ b/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl
@@ -0,0 +1,146 @@
+module ILUAM
+using LinearAlgebra, SparseArrays
+
+import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
+
+
+mutable struct ILUAMPrecon{T,N}
+
+	diag::AbstractVector
+    nzval::AbstractVector
+	rowval::AbstractVector
+	colptr::AbstractVector
+	
+end
+
+function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer}
+    @info "ILUAMPrecon"
+	n      = A.n # number of columns
+	nzval  = copy(A.nzval)
+	diag   = Vector{N}(undef, n)
+	
+    ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr))
+end
+
+function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
+    @info "iluAM!"
+	nzval  = LU.nzval
+    diag   = LU.diag
+    
+    colptr = LU.colptr
+	rowval = LU.rowval
+	n      = A.n # number of columns
+	point  = zeros(N, n) #Vector{N}(undef, n)
+	
+	t = zeros(5)
+
+	# find diagonal entries
+	t[1] = @elapsed for j=1:n
+		for v=colptr[j]:colptr[j+1]-1
+			if rowval[v] == j
+				diag[j] = v
+				break
+			end
+			#elseif rowval[v] 
+		end
+	end
+	
+	# compute L and U
+	for j=1:n
+		t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
+			point[rowval[v]] = v
+		end
+		
+		t[3] += @elapsed for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			#nzval[v] /= nzval[diag[i]]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		
+		t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1
+			point[rowval[v]] = zero(N)
+		end
+	end
+	t
+end
+
+function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
+    t = zeros(6)
+	t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N}))
+    t[2:6] = iluAM!(LU, A)
+	@info t
+    LU
+end
+
+
+function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer}
+	n = ilu.A.n
+	nzval = ilu.nzval
+	colptr = ilu.colptr
+	rowval = ilu.rowval
+	diag = ilu.diag
+	y .= 0
+	@inbounds for j=1:n
+		y[j] += v[j]
+		for v=diag[j]+1:colptr[j+1]-1
+			y[rowval[v]] -= nzval[v]*y[j]
+		end
+	end
+	y
+end
+
+
+function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer}
+    n = ilu.A.n
+	nzval = ilu.nzval
+	colptr = ilu.colptr
+	rowval = ilu.rowval
+	diag = ilu.diag
+	wrk = copy(y)
+	@inbounds for j=n:-1:1
+		x[j] = wrk[j] / nzval[diag[j]]		
+		for i=colptr[j]:diag[j]-1
+			wrk[rowval[i]] -= nzval[i]*x[j]
+		end
+	end
+    x
+end
+
+function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
+	@info "AM ldiv1"
+    y = copy(b)
+    forward_substitution!(y, ilu, b)
+    backward_substitution!(x, ilu, y)
+    x
+end
+
+function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
+    @info "AM ldiv2"
+	y = copy(b)
+    forward_substitution!(y, ilu, b)
+    backward_substitution!(b, ilu, y)
+    b
+end
+
+function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
+    @info "AM bs "
+	x = copy(b)
+    ldiv!(x, ilu, b)
+end
+
+function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer}
+    length(ilu.nzval)
+end
+
+
+end
\ No newline at end of file
diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl
new file mode 100644
index 0000000..a599094
--- /dev/null
+++ b/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl
@@ -0,0 +1,229 @@
+module ILUAM
+using LinearAlgebra, SparseArrays
+
+#import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
+
+@info "ILUAM"
+
+mutable struct ILUAMPrecon{T,N}
+
+	diag::AbstractVector
+    nzval::AbstractVector
+	A::AbstractMatrix
+	
+end
+
+function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer}
+    @info "ILUAMPrecon"
+	n      = A.n # number of columns
+	nzval  = copy(A.nzval)
+	diag   = Vector{N}(undef, n)
+	
+    ILUAMPrecon{T, N}(diag, copy(A.nzval), A)
+end
+
+
+
+function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
+    @info "iluAM!"
+	nzval  = LU.nzval
+    diag   = LU.diag
+    
+    colptr = LU.A.colptr
+	rowval = LU.A.rowval
+	n      = A.n # number of columns
+	point  = zeros(N, n) #Vector{N}(undef, n)
+	
+	t = zeros(5)
+
+	# find diagonal entries
+	t[1] = @elapsed for j=1:n
+		for v=colptr[j]:colptr[j+1]-1
+			if rowval[v] == j
+				diag[j] = v
+				break
+			end
+			#elseif rowval[v] 
+		end
+	end
+	
+	# compute L and U
+	for j=1:n
+		t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
+			point[rowval[v]] = v
+		end
+		
+		t[3] += @elapsed for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			#nzval[v] /= nzval[diag[i]]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		
+		t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1
+			point[rowval[v]] = zero(N)
+		end
+	end
+	t
+end
+
+
+function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer}
+	@info "iluAM"
+	nzval = copy(A.nzval)
+	colptr = A.colptr
+	rowval = A.rowval
+	#nzval  = ILU.nzval
+	n = A.n # number of columns
+	point = zeros(Ti, n) #Vector{Ti}(undef, n)
+	diag  = Vector{Ti}(undef, n)
+	
+	# find diagonal entries
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1
+			if rowval[v] == j
+				diag[j] = v
+				break
+			end
+			#elseif rowval[v] 
+		end
+	end
+	
+	# compute L and U
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
+			point[rowval[v]] = v
+		end
+		
+		for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			#nzval[v] /= nzval[diag[i]]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		
+		for v=colptr[j]:colptr[j+1]-1
+			point[rowval[v]] = zero(Ti)
+		end
+	end
+	#nzval, diag
+	ILUAMPrecon{Tv,Ti}(diag, nzval, A)
+end
+
+#function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
+#    t = zeros(6)
+#	t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N}))
+#    t[2:6] = iluAM!(LU, A)
+#	@info t
+#    LU
+#end
+
+
+function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer}
+	n = ilu.A.n
+	nzval = ilu.nzval
+	colptr = ilu.A.colptr
+	rowval = ilu.A.rowval
+	diag = ilu.diag
+	y .= 0
+	@inbounds for j=1:n
+		y[j] += v[j]
+		for v=diag[j]+1:colptr[j+1]-1
+			y[rowval[v]] -= nzval[v]*y[j]
+		end
+	end
+	y
+end
+
+
+function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer}
+    n = ilu.A.n
+	nzval = ilu.nzval
+	colptr = ilu.A.colptr
+	rowval = ilu.A.rowval
+	diag = ilu.diag
+	wrk = copy(y)
+	@inbounds for j=n:-1:1
+		x[j] = wrk[j] / nzval[diag[j]]		
+		for i=colptr[j]:diag[j]-1
+			wrk[rowval[i]] -= nzval[i]*x[j]
+		end
+	end
+    x
+end
+
+function ldiv_new!(x, ilu, v)
+
+	n = ilu.A.n
+	y = Vector{Float64}(undef, n)
+	y .= 0
+	nzval = ilu.nzval
+	colptr = ilu.A.colptr
+	rowval = ilu.A.rowval
+	diag = ilu.diag
+	#forward
+	@inbounds for j=1:n
+		y[j] += v[j]
+		for v=diag[j]+1:colptr[j+1]-1
+			y[rowval[v]] -= nzval[v]*y[j]
+		end
+	end
+
+	#backward
+	wrk = copy(y)
+	@inbounds for j=n:-1:1
+		x[j] = wrk[j] / nzval[diag[j]]		
+		for i=colptr[j]:diag[j]-1
+			wrk[rowval[i]] -= nzval[i]*x[j]
+		end
+	end
+    x
+end
+
+function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
+	#@info "AM ldiv1"
+    y = copy(b)
+    forward_substitution!(y, ilu, b)
+    backward_substitution!(x, ilu, y)
+    x
+end
+
+function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
+    @info "AM ldiv2"
+	y = copy(b)
+    forward_substitution!(y, ilu, b)
+    backward_substitution!(b, ilu, y)
+    b
+end
+
+function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
+    @info "AM bs "
+	x = copy(b)
+    ldiv!(x, ilu, b)
+	x
+end
+
+function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer}
+    length(ilu.nzval)
+end
+
+
+end
\ No newline at end of file
diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl
index 5d65e40..a4aed06 100644
--- a/src/factorizations/iluam.jl
+++ b/src/factorizations/iluam.jl
@@ -1,6 +1,6 @@
 mutable struct ILUAMPreconditioner <: AbstractPreconditioner
     A::ExtendableSparseMatrix
-    factorization::ILUAM.ILUAMPrecon
+    factorization::ILUAMPrecon
     phash::UInt64
     function ILUAMPreconditioner()
         p = new()
@@ -22,10 +22,11 @@ function ILUAMPreconditioner end
 function update!(p::ILUAMPreconditioner)
     flush!(p.A)
     if p.A.phash != p.phash
-        p.factorization = ILUAM.iluAM(p.A.cscmatrix)
+        p.factorization = iluAM(p.A.cscmatrix)
         p.phash=p.A.phash
     else
-        ILUAM.ilu0!(p.factorization, p.A.cscmatrix)
+        @warn "fuck?"
+        ilu0!(p.factorization, p.A.cscmatrix)
     end
     p
 end
diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl
new file mode 100644
index 0000000..a1ef818
--- /dev/null
+++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl
@@ -0,0 +1,270 @@
+#module PILUAM
+#using Base.Threads
+#using LinearAlgebra, SparseArrays
+
+import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
+
+@info "PILUAM"
+
+mutable struct PILUAMPrecon{T,N}
+
+	diag::AbstractVector
+    nzval::AbstractVector
+	A::AbstractMatrix
+	start::AbstractVector
+	nt::Integer
+	depth::Integer
+	
+end
+
+function use_vector_par(n, nt, Ti)
+	point = [Vector{Ti}(undef, n) for tid=1:nt]
+	@threads for tid=1:nt
+		point[tid] = zeros(Ti, n)
+	end
+	point
+end
+
+function compute_lu!(nzval, point, j0, j1, tid, rowval, colptr, diag, Ti)
+    for j=j0:j1-1
+        for v=colptr[j]:colptr[j+1]-1
+            point[tid][rowval[v]] = v
+        end
+        
+        for v=colptr[j]:diag[j]-1
+            i = rowval[v]
+            for w=diag[i]+1:colptr[i+1]-1
+                k = point[tid][rowval[w]]
+                if k>0
+                    nzval[k] -= nzval[v]*nzval[w]
+                end
+            end
+        end
+        
+        for v=diag[j]+1:colptr[j+1]-1
+            nzval[v] /= nzval[diag[j]]
+        end
+        
+        for v=colptr[j]:colptr[j+1]-1
+            point[tid][rowval[v]] = zero(Ti)
+        end
+    end
+end
+
+function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer}
+	start = A.start
+	nt = A.nt
+	depth = A.depth
+	
+	colptr = A.cscmatrix.colptr
+	rowval = A.cscmatrix.rowval
+	nzval  = Vector{Tv}(undef, length(rowval)) #copy(A.nzval)
+	n = A.cscmatrix.n # number of columns
+	diag  = Vector{Ti}(undef, n)
+	point = use_vector_par(n, A.nt, Int32)
+	
+	# find diagonal entries
+	#
+	@threads for tid=1:depth*nt+1
+		for j=start[tid]:start[tid+1]-1
+			for v=colptr[j]:colptr[j+1]-1
+				nzval[v] = A.cscmatrix.nzval[v]
+				if rowval[v] == j
+					diag[j] = v
+				end
+				#elseif rowval[v] 
+			end
+		end
+	end
+
+	#=
+	@info "piluAM"
+	nzval = copy(A.cscmatrix.nzval)
+	colptr = A.cscmatrix.colptr
+	rowval = A.cscmatrix.rowval
+	#nzval  = ILU.nzval
+	n = A.n # number of columns
+	diag  = Vector{Ti}(undef, n)
+	start = A.start
+    nt    = A.nt
+    depth = A.depth
+    point = use_vector_par(n, nt, Ti)
+
+	# find diagonal entries
+    @threads for tid=1:depth*nt+1
+        for j=start[tid]:start[tid+1]-1
+            for v=colptr[j]:colptr[j+1]-1
+                if rowval[v] == j
+                    diag[j] = v
+                    break
+                end
+                #elseif rowval[v] 
+            end
+        end
+    end
+	
+	# compute L and U
+    for level=1:depth
+        @threads for tid=1:nt
+            compute_lu!(nzval, point, start[(level-1)*nt+tid], start[(level-1)*nt+tid+1], tid, rowval, colptr, diag, Ti)
+        end
+    end
+
+    compute_lu!(nzval, point, start[depth*nt+1], start[depth*nt+2], 1, rowval, colptr, diag, Ti)
+	=#
+
+	for level=1:depth
+		@threads for tid=1:nt
+			for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1
+				for v=colptr[j]:colptr[j+1]-1
+					point[tid][rowval[v]] = v
+				end
+				
+				for v=colptr[j]:diag[j]-1
+					i = rowval[v]
+					for w=diag[i]+1:colptr[i+1]-1
+						k = point[tid][rowval[w]]
+						if k>0
+							nzval[k] -= nzval[v]*nzval[w]
+						end
+					end
+				end
+				
+				for v=diag[j]+1:colptr[j+1]-1
+					nzval[v] /= nzval[diag[j]]
+				end
+				
+				for v=colptr[j]:colptr[j+1]-1
+					point[tid][rowval[v]] = zero(Ti)
+				end
+			end
+		end
+	end
+	
+	#point = zeros(Ti, n) #Vector{Ti}(undef, n)
+	for j=start[depth*nt+1]:start[depth*nt+2]-1
+		for v=colptr[j]:colptr[j+1]-1
+			point[1][rowval[v]] = v
+		end
+		
+		for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[1][rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		for v=colptr[j]:colptr[j+1]-1
+			point[1][rowval[v]] = zero(Ti)
+		end
+	end
+
+	#nzval, diag
+	PILUAMPrecon{Tv,Ti}(diag, nzval, A.cscmatrix, start, nt, depth)
+end
+
+function forward_subst_old!(y, v, nzval, diag, start, nt, depth, A)
+	#@info "fwo"
+	n = A.n
+	colptr = A.colptr
+	rowval = A.rowval
+	
+	y .= 0
+	
+	for level=1:depth
+		@threads for tid=1:nt
+			@inbounds for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1
+				y[j] += v[j]
+				for v=diag[j]+1:colptr[j+1]-1
+					y[rowval[v]] -= nzval[v]*y[j]
+				end
+			end
+		end
+	end
+	
+	@inbounds for j=start[depth*nt+1]:start[depth*nt+2]-1
+		y[j] += v[j]
+		for v=diag[j]+1:colptr[j+1]-1
+			y[rowval[v]] -= nzval[v]*y[j]
+		end
+	end
+	
+end
+
+
+function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A)
+	#@info "bwo"
+	n = A.n
+	colptr = A.colptr
+	rowval = A.rowval
+	#wrk = copy(y)
+	
+	
+	@inbounds for j=start[depth*nt+2]-1:-1:start[depth*nt+1]
+		x[j] = y[j] / nzval[diag[j]] 
+		
+		for i=colptr[j]:diag[j]-1
+			y[rowval[i]] -= nzval[i]*x[j]
+		end
+		
+	end
+	
+	for level=depth:-1:1
+		@threads for tid=1:nt
+			@inbounds for j=start[(level-1)*nt+tid+1]-1:-1:start[(level-1)*nt+tid]
+				x[j] = y[j] / nzval[diag[j]] 
+				for i=colptr[j]:diag[j]-1
+					y[rowval[i]] -= nzval[i]*x[j]
+				end
+			end
+		end
+	end
+
+end
+
+function ldiv!(x, ILU::PILUAMPrecon, b)
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A
+    start = ILU.start
+    nt    = ILU.nt
+    depth = ILU.depth
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, start, nt, depth, A)
+	backward_subst_old!(x, y, nzval, diag, start, nt, depth, A)
+	x
+end
+
+function ldiv!(ILU::PILUAMPrecon, b)
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A
+    start = ILU.start
+    nt    = ILU.nt
+    depth = ILU.depth
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, start, nt, depth, A)
+	backward_subst_old!(b, y, nzval, diag, start, nt, depth, A)
+	b
+end
+
+function \(ilu::PILUAMPrecon{T,N}, b) where {T,N<:Integer}
+    x = copy(b)
+    ldiv!(x, ilu, b)
+	x
+end
+
+function nnz(ilu::PILUAMPrecon{T,N}) where {T,N<:Integer}
+    length(ilu.nzval)
+end
+
+#end
\ No newline at end of file
diff --git a/src/factorizations/piluam.jl b/src/factorizations/piluam.jl
new file mode 100644
index 0000000..4a5fcdc
--- /dev/null
+++ b/src/factorizations/piluam.jl
@@ -0,0 +1,36 @@
+mutable struct PILUAMPreconditioner <: AbstractPreconditioner
+    A::ExtendableSparseMatrixParallel
+    factorization::PILUAMPrecon
+    phash::UInt64
+    function PILUAMPreconditioner()
+        p = new()
+        p.phash = 0
+        p
+    end
+end
+
+"""
+```
+PILUAMPreconditioner()
+PILUAMPreconditioner(matrix)
+```
+Incomplete LU preconditioner with zero fill-in using ... . This preconditioner
+also calculates and stores updates to the off-diagonal entries and thus delivers better convergence than  the [`ILU0Preconditioner`](@ref).
+"""
+function PILUAMPreconditioner end
+
+function update!(p::PILUAMPreconditioner)
+    flush!(p.A)
+    if p.A.phash != p.phash
+        p.factorization = piluAM(p.A)
+        p.phash=p.A.phash
+    else
+        @warn "fuck?"
+        ilu0!(p.factorization, p.A.cscmatrix)
+    end
+    p
+end
+
+allow_views(::PILUAMPreconditioner)=true
+allow_views(::Type{PILUAMPreconditioner})=true
+
diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
index 68dace8..9e63c37 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
@@ -36,6 +36,12 @@ mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSpar
     nt::Ti
     
     depth::Ti
+
+    phash::UInt64
+
+    n::Ti
+
+    m::Ti
     
     
 end
@@ -46,7 +52,7 @@ function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) w
 	grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1)
 	csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid))
 	lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt]
-	ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth)
+	ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m)
 end
 
 
@@ -253,6 +259,32 @@ function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrixParall
     end
 end
 
+"""
+`function entryexists2(CSC, i, j)`
+
+Find out if CSC already has an nonzero entry at i,j without any allocations
+"""
+function entryexists2(CSC, i, j) #find out if CSC already has an nonzero entry at i,j
+	#vals = 
+	#ids = CSC.colptr[j]:(CSC.colptr[j+1]-1)
+	i in view(CSC.rowval, CSC.colptr[j]:(CSC.colptr[j+1]-1))
+end
+
+
+function updatentryCSC2!(CSC::SparseArrays.SparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer, v) where {Tv, Ti <: Integer}
+	p1 = CSC.colptr[j]
+	p2 = CSC.colptr[j+1]-1
+
+	searchk = searchsortedfirst(view(CSC.rowval, p1:p2), i) + p1 - 1
+	
+	if (searchk <= p2) && (CSC.rowval[searchk] == i)
+		CSC.nzval[searchk] += v
+		return true
+	else
+		return false
+	end
+end
+
 Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n)
 
 include("struct_flush.jl")
diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
index c27aab0..38608ad 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
@@ -11,7 +11,7 @@ function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=tr
 			A.cscmatrix = dense_flush_removezeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices)
 		end
 	end
-	
+	A.phash = phash(A.cscmatrix)
 	A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt]
 
 end

From 3ed83faafb08e428f47ab032bf7c08004c1b9690 Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Fri, 23 Feb 2024 15:38:05 +0100
Subject: [PATCH 07/44] ColEntry from struct to mutable struct

---
 src/matrix/sparsematrixlnk.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/matrix/sparsematrixlnk.jl b/src/matrix/sparsematrixlnk.jl
index 2976e88..b00c6fc 100644
--- a/src/matrix/sparsematrixlnk.jl
+++ b/src/matrix/sparsematrixlnk.jl
@@ -278,7 +278,7 @@ end
 
 # Struct holding pair of value and row
 # number, for sorting
-struct ColEntry{Tv, Ti <: Integer}
+mutable struct ColEntry{Tv, Ti <: Integer}
     rowval::Ti
     nzval::Tv
 end

From f78ed1df028b0fb3f009f10e78c3abf3e5e8fdc9 Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Mon, 26 Feb 2024 14:10:38 +0100
Subject: [PATCH 08/44] enable deeper partitioning / fixing some preparatory
 functions

---
 src/factorizations/ilu_Al-Kurdi_Mittal.jl     |   4 +-
 src/factorizations/iluam.jl                   |   1 +
 src/factorizations/pilu_Al-Kurdi_Mittal.jl    |   2 +
 src/factorizations/piluam.jl                  |   1 +
 .../ExtendableSparseParallel.jl               |   2 +-
 .../preparatory.jl                            | 416 ++++++++++++++----
 6 files changed, 336 insertions(+), 90 deletions(-)

diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl
index 97bb9a8..0b6b1b2 100644
--- a/src/factorizations/ilu_Al-Kurdi_Mittal.jl
+++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl
@@ -14,7 +14,7 @@ mutable struct ILUAMPrecon{T,N}
 end
 
 function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer}
-	@info "iluAM"
+	#@info "iluAM"
 	nzval = copy(A.nzval)
 	colptr = A.colptr
 	rowval = A.rowval
@@ -100,6 +100,7 @@ function backward_subst_old!(x, y, nzval, diag, A)
 end
 
 function ldiv!(x, ILU::ILUAMPrecon, b)
+	#@info "iluam ldiv 1"
 	nzval = ILU.nzval
 	diag  = ILU.diag
 	A     = ILU.A
@@ -111,6 +112,7 @@ function ldiv!(x, ILU::ILUAMPrecon, b)
 end
 
 function ldiv!(ILU::ILUAMPrecon, b)
+	#@info "iluam ldiv 2"
 	nzval = ILU.nzval
 	diag  = ILU.diag
 	A     = ILU.A
diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl
index a4aed06..6d061b0 100644
--- a/src/factorizations/iluam.jl
+++ b/src/factorizations/iluam.jl
@@ -22,6 +22,7 @@ function ILUAMPreconditioner end
 function update!(p::ILUAMPreconditioner)
     flush!(p.A)
     if p.A.phash != p.phash
+        @warn "p.A.phash != p.phash"
         p.factorization = iluAM(p.A.cscmatrix)
         p.phash=p.A.phash
     else
diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl
index a1ef818..15a8b23 100644
--- a/src/factorizations/pilu_Al-Kurdi_Mittal.jl
+++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl
@@ -230,6 +230,7 @@ function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A)
 end
 
 function ldiv!(x, ILU::PILUAMPrecon, b)
+	#@info "piluam ldiv 1"
 	nzval = ILU.nzval
 	diag  = ILU.diag
 	A     = ILU.A
@@ -244,6 +245,7 @@ function ldiv!(x, ILU::PILUAMPrecon, b)
 end
 
 function ldiv!(ILU::PILUAMPrecon, b)
+	#@info "piluam ldiv 2"
 	nzval = ILU.nzval
 	diag  = ILU.diag
 	A     = ILU.A
diff --git a/src/factorizations/piluam.jl b/src/factorizations/piluam.jl
index 4a5fcdc..075f73f 100644
--- a/src/factorizations/piluam.jl
+++ b/src/factorizations/piluam.jl
@@ -22,6 +22,7 @@ function PILUAMPreconditioner end
 function update!(p::PILUAMPreconditioner)
     flush!(p.A)
     if p.A.phash != p.phash
+        @warn "p.A.phash != p.phash"
         p.factorization = piluAM(p.A)
         p.phash=p.A.phash
     else
diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
index 9e63c37..b635a33 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
@@ -49,7 +49,7 @@ end
 
 
 function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) where {Tv, Ti <: Integer}
-	grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1)
+	grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1)
 	csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid))
 	lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt]
 	ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m)
diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
index e14a066..a29356c 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
@@ -6,23 +6,36 @@
 `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices...
 To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here.
 """
-function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0)
+function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false)
 	grid = getgrid(nm; x0, x1)
-	
+	adepth = 0
 	if sequential
-		(allcells, start, cellparts) = grid_to_graph_ps_multi!(grid, nt, depth)#)
+		(allcells, start, cellparts, adepth) = grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa, do_print)#)
 	else
-		(allcells, start, cellparts) = grid_to_graph_ps_multi_par!(grid, nt, depth)
+		(allcells, start, cellparts, adepth) = grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa, do_print)
+	end
+
+	if (adepth != depth) && do_print
+		@info "The requested depth of partitioning is too high. The depth is set to $adepth."
 	end
 
+	depth = adepth
+	cfp = bettercellsforpart(cellparts, depth*nt+1)
+
+	if check_partition
+		validate_partition(grid, cellparts, start, allcells, nt, depth)
+	end
+
+	@info length.(cfp)
+	@info minimum(cellparts), maximum(cellparts), nt, depth
+
 	(nnts, s, onr, gi, gc, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(
-		cellparts, allcells, start, num_nodes(grid), Ti, nt
+		cellparts, allcells, start, num_nodes(grid), Ti, nt, depth
 	)
 	
-	cfp = bettercellsforpart(cellparts, depth*nt+1)
-	return grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts
-end
 
+	return grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, adepth
+end
 
 """
 `function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)`
@@ -35,10 +48,10 @@ Furthermore, `nnts` (number of nodes of the threads) is computed, which contain
 `Ti` is the type (Int64,...) of the elements in the created arrays.
 `nt` is the number of threads.
 """
-function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)
+function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt, depth)
 		
-	num_matrices = maximum(cellregs)
-	depth = Int(floor((num_matrices-1)/nt))
+	#num_matrices = maximum(cellregs)
+	#depth = Int(floor((num_matrices-1)/nt))
 
 	#loop over each node, get the cellregion of the cell (the one not in the separator) write the position of that node inside the cellregions sorted ranking into a long vector
 	#nnts = [zeros(Ti, nt+1) for i=1:depth+1]
@@ -62,6 +75,11 @@ function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_r
 				nnts[crmod] += 1
 				#sortednodesperthread[crmod,j] = nnts[crmod] #nnts[i][cr]
 				#push!(tmp, crmod)
+				if tmpctr > depth+1
+					@info "Cellregs: ", sortedcellregs
+					@info "Levels  : ", Int.(ceil.(sortedcellregs/nt))
+					@info "PartsMod: ", ((sortedcellregs.-1).%nt).+1
+				end
 				tmp[tmpctr] = crmod
 				tmpctr += 1
 			end
@@ -127,9 +145,6 @@ end
 
 
 
-
-
-
 """
 `function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes)`
 
@@ -141,47 +156,77 @@ This function partitons the separator, which is done if `depth`>1 (see `grid_to_
 `level0` is the separator-partitoning level, if the (first) separator is partitioned, level0 = 1, in the next iteration, level0 = 2...
 `preparatory_multi_ps` is the number of separator-cells.
 """
-function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes)
-	sepanodes = findall(x->x==nt+1, cellregs)
+function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes, ri, gi, do_print)
+	# current number of cells treated
+    nc2 = size(ACSC, 1)
 
-	indptr = collect(1:nc+1)
-	indices = zeros(Int64, nc)
-	rowval = zeros(Int64, nc)
+	indptr  = collect(1:nc2+1)
+	indices = zeros(Int64, nc2)
+	rowval  = zeros(Int64, nc2)
 
 	indptrT = collect(1:ctr_sepanodes+1)
 	indicesT = zeros(Int64, ctr_sepanodes)
 	rowvalT = zeros(Int64, ctr_sepanodes)
 
-	for (i,j) in enumerate(sepanodes)
-		indices[j] = i
+	for i=1:ctr_sepanodes
+	    j = ri[i]
+        indices[j] = i
 		indicesT[i] = j
 		rowval[j]  = 1
 		rowvalT[i] = 1
 	end
 
-	R = SparseMatrixCSC(ctr_sepanodes, nc, indptr, indices, rowval)
-	RT = SparseMatrixCSC(nc, ctr_sepanodes, indptrT, indicesT, rowvalT)
-	prod = ACSC*dropzeros(RT)
+    
+
+	R = SparseMatrixCSC(ctr_sepanodes, nc2, indptr, indices, rowval)
+	RT = SparseMatrixCSC(nc2, ctr_sepanodes, indptrT, indicesT, rowvalT)
+	# current adjacency matrix, taken as a part of the given one ACSC
 	RART = dropzeros(R)*ACSC*dropzeros(RT)
 	
-	partition2 = Metis.partition(RART, nt)
-	cellregs2 = copy(partition2)
-
-	ctr_sepanodes = 0
-	for (i,j) in enumerate(sepanodes)
-		rows = RART.rowval[RART.colptr[i]:(RART.colptr[i+1]-1)]
-		cellregs[j] = level0*nt + cellregs2[i]
-		if minimum(partition2[rows]) != maximum(partition2[rows])
-			cellregs[j] = (level0+1)*nt+1
-			ctr_sepanodes += 1
-		end
-	end
-
-	RART, ctr_sepanodes
+	cellregs2 = Metis.partition(RART, nt)
+	
+    
+    for i=1:ctr_sepanodes
+        if cellregs[gi[i]] < level0*nt+1
+            @warn "cell treated in this iteration was not a separator-cell last iteration"
+        end
+        cellregs[gi[i]] = level0*nt + cellregs2[i]
+    end
+
+	# how many cells are in the separator of the new partiton (which is only computed on the separator of the old partition)
+    new_ctr_sepanodes = 0
+    ri2 = Vector{Int64}(undef, ctr_sepanodes)
+    gi2 = Vector{Int64}(undef, ctr_sepanodes)
+    
+    for tid=1:nt
+        for i=1:ctr_sepanodes
+            if cellregs2[i] == tid
+				neighbors = RART.rowval[RART.colptr[i]:(RART.colptr[i+1]-1)]
+                rows = gi[vcat(neighbors, [i])]
+				#counts how many different regions (besides) the separator are adjacent to the current cell
+                x = how_many_different_below(cellregs[rows], (level0+1)*nt+1)
+                if x > 1
+                    cellregs[gi[i]] = (level0+1)*nt+1
+                    new_ctr_sepanodes += 1
+                    gi2[new_ctr_sepanodes] = gi[i]
+                    ri2[new_ctr_sepanodes] = i
+                end
+            end
+        end
+    end
+
+
+    ri2 = ri2[1:new_ctr_sepanodes]
+    gi2 = gi2[1:new_ctr_sepanodes]
+
+    if do_print
+    	@info "At level $(level0+1), we found $new_ctr_sepanodes cells that have to be treated in the next iteration!"
+    end
+
+	RART, new_ctr_sepanodes, ri2, gi2
 end
 
 
-
 """
 `function grid_to_graph_ps_multi!(grid, nt, depth)`
 
@@ -190,7 +235,7 @@ The function assigns colors/partitons to each cell in the `grid`. First, the gri
 `nt` is the number of threads.
 `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices...
 """
-function grid_to_graph_ps_multi!(grid, nt, depth)
+function grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=false)
 	A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid))
 	number_cells_per_node = zeros(Int64, num_nodes(grid))
 	for j=1:num_cells(grid)
@@ -224,27 +269,46 @@ function grid_to_graph_ps_multi!(grid, nt, depth)
 	partition = Metis.partition(ACSC, nt)
 	cellregs  = copy(partition)
 	
+	sn = Vector{Int64}(undef, num_cells(grid))
+	gi = Vector{Int64}(undef, num_cells(grid))
 	ctr_sepanodes = 0
-	for j=1:num_cells(grid)
-		rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)]
-		if minimum(partition[rows]) != maximum(partition[rows])
-			cellregs[j] = nt+1
-			ctr_sepanodes += 1
-		end
+    
+    for tid=1:nt
+        for j=1:num_cells(grid)
+            if cellregs[j] == tid
+                rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
+                if how_many_different_below(cellregs[rows], nt+1) > 1 
+                    cellregs[j] = nt+1 #+ctr_sepanodes
+                    ctr_sepanodes += 1
+                    sn[ctr_sepanodes] = j
+                    gi[ctr_sepanodes] = j
+                end
+            end
+        end
 	end
-	RART = ACSC
+
+    sn = sn[1:ctr_sepanodes]
+    gi = gi[1:ctr_sepanodes]
+    
+    if do_print
+        @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!"
+    end
+
+    RART = copy(ACSC)
+    actual_depth = 1
 	for level=1:depth-1
-		RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes)
+		RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+        actual_depth += 1
+		if ctr_sepanodes < minsize_sepa
+			break
+		end
 	end
-
-			
-	return allcells, start, cellregs
+        
+    return allcells, start, cellregs, actual_depth, ACSC
 end
 
 
-
-function grid_to_graph_ps_multi_par!(grid, nt, depth)
-	time = zeros(12)
+function grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa=10, do_print=false)
 	As = [ExtendableSparseMatrix{Int64, Int64}(num_cells(grid), num_cells(grid)) for tid=1:nt]
 	number_cells_per_node = zeros(Int64, num_nodes(grid))
 	
@@ -288,54 +352,64 @@ function grid_to_graph_ps_multi_par!(grid, nt, depth)
 	end
 	
 	ACSC = add_all_par!(As).cscmatrix
-		
-	#SparseArrays.SparseMatrixCSC(A))
-	
-	
-	partition = Metis.partition(ACSC, nt)
-	cellregs  = copy(partition)
 	
-	ctr_sepanodes_a = zeros(Int64, nt)
+	cellregs = Metis.partition(ACSC, nt)
 	
-	cell_range = get_starts(num_cells(grid), nt)
-	Threads.@threads :static for tid=1:nt
-		for j in cell_range[tid]:cell_range[tid+1]-1
-			rows = @view ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)]
-			if minimum(partition[rows]) != maximum(partition[rows])
-				cellregs[j] = nt+1
-				ctr_sepanodes_a[tid] += 1
-			end
-		end
+	sn = [Vector{Int64}(undef, Int(ceil(num_cells(grid)/nt))) for tid=1:nt]
+	ctr_sepanodess = zeros(Int64, nt)
+    
+    @threads for tid=1:nt
+        for j=1:num_cells(grid)
+            if cellregs[j] == tid
+                rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
+                if how_many_different_below(cellregs[rows], nt+1) > 1 
+                    cellregs[j] = nt+1 #+ctr_sepanodes
+                    ctr_sepanodess[tid] += 1
+                    sn[tid][ctr_sepanodess[tid]] = j
+                end
+            end
+        end
 	end
-	
-	ctr_sepanodes = sum(ctr_sepanodes_a)
-			
-	#=
-	time[10] = @elapsed for j=1:num_cells(grid)
-		rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)]
-		if minimum(partition[rows]) != maximum(partition[rows])
-			cellregs[j] = nt+1
-			ctr_sepanodes += 1
-		end
-	end
-	=#
-	RART = ACSC
+
+    for tid=1:nt
+        sn[tid] = sn[tid][1:ctr_sepanodess[tid]]
+    end
+    ctr_sepanodes = sum(ctr_sepanodess)
+    sn = vcat(sn...)
+    gi = copy(sn)
+
+    if do_print
+        @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!"
+    end
+
+    RART = ACSC
+    actual_depth = 1
 	for level=1:depth-1
-		RART, ctr_sepanodes = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes)
+		RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+        actual_depth += 1
+		if ctr_sepanodes < minsize_sepa
+			break
+		end
 	end
-
-			
-	return allcells, start, cellregs
+    
+    #grid[CellRegions] = cellregs
+    #grid
+    return allcells, start, cellregs, actual_depth
 end
 
+"""
+`function add_all_par!(As)`
 
+Add LNK matrices (stored in a vector) parallely (tree structure).
+The result is stored in the first LNK matrix.
+"""
 function add_all_par!(As)
 	nt = length(As)
 	depth = Int(floor(log2(nt)))
 	ende = nt
 	for level=1:depth
 		
-		@threads :static for tid=1:2^(depth-level)
+		@threads for tid=1:2^(depth-level)
 			#@info "$level, $tid"
 			start = tid+2^(depth-level)
 			while start <= ende
@@ -425,3 +499,169 @@ function last_nz(x)
 	end
 end
 
+
+function how_many_different_below(x0, y; u=0)
+    x = copy(x0)
+    z = unique(x)
+    t = findall(w->w<y,z)
+    t = findall(w->w>u,z[t])
+    length(t)
+end
+
+
+
+function lookat_grid_to_graph_ps_multi!(nm, nt, depth)
+	grid = getgrid(nm)
+	A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid))
+	number_cells_per_node = zeros(Int64, num_nodes(grid))
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			number_cells_per_node[node_id] += 1
+		end
+	end
+	allcells = zeros(Int64, sum(number_cells_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_cells_per_node)
+	number_cells_per_node .= 0
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			allcells[start[node_id] + number_cells_per_node[node_id]] = j
+			number_cells_per_node[node_id] += 1
+		end
+	end
+
+	for j=1:num_nodes(grid)
+		cells = @view allcells[start[j]:start[j+1]-1]
+		for (i,id1) in enumerate(cells)
+			for id2 in cells[i+1:end]
+				A[id1,id2] = 1
+				A[id2,id1] = 1
+			end
+		end	
+	end
+
+	ACSC = SparseArrays.SparseMatrixCSC(A)
+	
+	partition = Metis.partition(ACSC, nt)
+	cellregs  = copy(partition)
+	
+	sn = []
+	gi = []
+	ctr_sepanodes = 0
+	for j=1:num_cells(grid)
+		rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)]
+		if minimum(partition[rows]) != maximum(partition[rows])
+			cellregs[j] = nt+1
+			ctr_sepanodes += 1
+			push!(sn, j)
+			push!(gi, j)
+		end
+	end
+	RART = ACSC
+	#sn = 1:num_cells(grid)
+	#gi = 1:num_cells(grid)
+	for level=1:depth-1
+		RART, ctr_sepanodes, sn, gi = separate_careful!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi)
+		if ctr_sepanodes == 0
+			return RART
+		end
+	end
+
+			
+	#return allcells, start, cellregs
+	RART
+end
+
+
+function adjacencies(grid)
+	A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid))
+	number_cells_per_node = zeros(Int64, num_nodes(grid))
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			number_cells_per_node[node_id] += 1
+		end
+	end
+	allcells = zeros(Int64, sum(number_cells_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_cells_per_node)
+	number_cells_per_node .= 0
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			allcells[start[node_id] + number_cells_per_node[node_id]] = j
+			number_cells_per_node[node_id] += 1
+		end
+	end
+
+	for j=1:num_nodes(grid)
+		cells = @view allcells[start[j]:start[j+1]-1]
+		for (i,id1) in enumerate(cells)
+			for id2 in cells[i+1:end]
+				A[id1,id2] = 1
+				A[id2,id1] = 1
+			end
+		end	
+	end
+
+	allcells, start, SparseArrays.SparseMatrixCSC(A)
+end
+
+function check_adjacencies(nm)
+	grid = getgrid(nm)
+	allcells, start, A = adjacencies(grid)
+
+	i = 1
+	cells1 = sort(vcat([i], A.rowval[A.colptr[i]:(A.colptr[i+1]-1)])) #adjacent cells
+	nodes2 = grid[CellNodes][:,i]
+	cells2 = sort(unique(vcat([allcells[start[j]:start[j+1]-1] for j in nodes2]...)))
+
+	@info cells1
+	@info cells2
+	@info maximum(abs.(cells1-cells2))
+
+
+end
+
+#=
+function check_partition(nm, nt, depth)
+	grid = getgrid(nm)
+	
+	(allcells, start, cellregs, adepth, ACSC) = grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=true)#)
+	
+	if (adepth != depth)
+		@info "The requested depth of partitioning is too high. The depth is set to $adepth."
+	end
+	depth = adepth
+
+	validate_partition(num_nodes(grid), num_cells(grid), grid, cellregs, start, allcells, nt, depth, ACSC)
+end
+=#
+
+function validate_partition(grid, cellregs, start, allcells, nt, depth)
+	@info "Node based validation"
+	violation_ctr = 0
+
+	for j=1:num_nodes(grid)
+		cells = @view allcells[start[j]:start[j+1]-1]
+		sortedcellregs = unique(sort(cellregs[cells]))
+		levels         = Int.(ceil.(sortedcellregs/nt))
+		
+		for i=1:depth+1
+			ids_lev = findall(x->x==i, levels)
+			if length(ids_lev) > 1
+				violation_ctr += 1
+
+				if violation_ctr == 1
+					@info "Node Id : ", j
+					@info "Cellregs: ", sortedcellregs
+					@info "Levels  : ", levels
+					
+					loc = findall(x->x==4, Int.(ceil.(cellregs[allcells[start[j]:start[j+1]-1]]/nt)))
+					cells_at_level4 = allcells[loc.+(start[j]-1)]
+					@info cells_at_level4, cellregs[cells_at_level4]
+					@info grid[CellNodes][:,cells_at_level4[1]], grid[CellNodes][:,cells_at_level4[2]]
+				end
+			end
+		end
+	end
+	@info "We found $violation_ctr violation(s)"
+end
\ No newline at end of file

From 9e6dd1db5607e2fdddb3b3663a65d41169decddf Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Sun, 17 Mar 2024 10:57:59 +0100
Subject: [PATCH 09/44] add parallel matrix vector product

---
 src/ExtendableSparse.jl                       |   1 +
 src/factorizations/filu_Al-Kurdi_Mittal.jl    | 160 ++++++++++++++++++
 src/factorizations/ilu_Al-Kurdi_Mittal.jl     |  65 ++++++-
 src/factorizations/iluam.jl                   |   4 +-
 src/factorizations/pilu_Al-Kurdi_Mittal.jl    | 149 ++++++++++++----
 src/factorizations/piluam.jl                  |   5 +-
 .../ExtendableSparseParallel.jl               |  63 ++++++-
 .../preparatory.jl                            |   4 +-
 .../struct_flush.jl                           |   4 +
 9 files changed, 410 insertions(+), 45 deletions(-)
 create mode 100644 src/factorizations/filu_Al-Kurdi_Mittal.jl

diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 285d0c2..bcf85e6 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -37,6 +37,7 @@ export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet
 include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl")
 
 
+
 include("factorizations/ilu_Al-Kurdi_Mittal.jl")
 #using .ILUAM
 include("factorizations/pilu_Al-Kurdi_Mittal.jl")
diff --git a/src/factorizations/filu_Al-Kurdi_Mittal.jl b/src/factorizations/filu_Al-Kurdi_Mittal.jl
new file mode 100644
index 0000000..2099208
--- /dev/null
+++ b/src/factorizations/filu_Al-Kurdi_Mittal.jl
@@ -0,0 +1,160 @@
+#module PILUAM
+#using Base.Threads
+#using LinearAlgebra, SparseArrays
+
+import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
+
+@info "PILUAM"
+
+mutable struct PILUAMPrecon{T,N}
+
+	diag::AbstractVector
+    nzval::AbstractVector
+	A::AbstractMatrix
+	
+end
+
+function iluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <:Integer}
+	@info "filuAM!"
+    diag = ILU.diag
+	nzval = ILU.nzval
+
+	nzval = copy(A.cscmatrix.nzval)
+	diag  = Vector{Ti}(undef, n)
+	ILU.A = A
+	colptr = A.cscmatrix.colptr
+	rowval = A.cscmatrix.rowval
+	n = A.n
+	point = zeros(Ti, n)
+
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1
+			if rowval[v] == j
+				diag[j] = v
+				break
+			end
+			#elseif rowval[v] 
+		end
+	end
+	
+	# compute L and U
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
+			point[rowval[v]] = v
+		end
+		
+		for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			#nzval[v] /= nzval[diag[i]]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		
+		for v=colptr[j]:colptr[j+1]-1
+			point[rowval[v]] = zero(Ti)
+		end
+	end
+
+end
+
+
+function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer}
+	@info "filuAM, $(A[1,1])"
+	nzval = copy(A.cscmatrix.nzval)
+	colptr = A.cscmatrix.colptr
+	rowval = A.cscmatrix.rowval
+	#nzval  = ILU.nzval
+	n = A.n # number of columns
+	point = zeros(Ti, n) #Vector{Ti}(undef, n)
+	diag  = Vector{Ti}(undef, n)
+
+	# find diagonal entries
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1
+			if rowval[v] == j
+				diag[j] = v
+				break
+			end
+			#elseif rowval[v] 
+		end
+	end
+	
+	# compute L and U
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
+			point[rowval[v]] = v
+		end
+		
+		for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			#nzval[v] /= nzval[diag[i]]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		
+		for v=colptr[j]:colptr[j+1]-1
+			point[rowval[v]] = zero(Ti)
+		end
+	end
+	#nzval, diag
+	PILUAMPrecon{Tv,Ti}(diag, nzval, A)
+end
+
+
+
+function ldiv!(x, ILU::PILUAMPrecon, b)
+	#@info "iluam ldiv 1"
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A.cscmatrix
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, A)
+	backward_subst_old!(x, y, nzval, diag, A)
+	@info "FILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)) 
+    #, maximum(abs.(b-A*x)), b[1], x[1], y[1]
+	x
+end
+
+
+function ldiv!(ILU::PILUAMPrecon, b)
+	#@info "iluam ldiv 2"
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A.cscmatrix
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, A)
+	backward_subst_old!(b, y, nzval, diag, A)
+	b
+end
+
+function \(ilu::PILUAMPrecon{T,N}, b) where {T,N<:Integer}
+    x = copy(b)
+    ldiv!(x, ilu, b)
+	x
+end
+
+function nnz(ilu::PILUAMPrecon{T,N}) where {T,N<:Integer}
+    length(ilu.nzval)
+end
+
+#end
\ No newline at end of file
diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/factorizations/ilu_Al-Kurdi_Mittal.jl
index 0b6b1b2..ad2207d 100644
--- a/src/factorizations/ilu_Al-Kurdi_Mittal.jl
+++ b/src/factorizations/ilu_Al-Kurdi_Mittal.jl
@@ -3,7 +3,7 @@
 
 import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
 
-@info "ILUAM"
+#@info "ILUAM"
 
 mutable struct ILUAMPrecon{T,N}
 
@@ -13,6 +13,58 @@ mutable struct ILUAMPrecon{T,N}
 	
 end
 
+
+function iluAM!(ILU::ILUAMPrecon{Tv,Ti}, A::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <:Integer}
+	diag = ILU.diag
+	nzval = ILU.nzval
+
+	nzval = copy(A.nzval)
+	diag  = Vector{Ti}(undef, n)
+	ILU.A = A
+	colptr = A.colptr
+	rowval = A.rowval
+	n = A.n
+	point = zeros(Ti, n)
+
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1
+			if rowval[v] == j
+				diag[j] = v
+				break
+			end
+			#elseif rowval[v] 
+		end
+	end
+	
+	# compute L and U
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
+			point[rowval[v]] = v
+		end
+		
+		for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			#nzval[v] /= nzval[diag[i]]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		
+		for v=colptr[j]:colptr[j+1]-1
+			point[rowval[v]] = zero(Ti)
+		end
+	end
+
+end
+
 function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer}
 	#@info "iluAM"
 	nzval = copy(A.nzval)
@@ -33,6 +85,9 @@ function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer}
 			#elseif rowval[v] 
 		end
 	end
+
+	#@info diag[1:20]'
+	#@info diag[end-20:end]'
 	
 	# compute L and U
 	for j=1:n
@@ -65,6 +120,7 @@ function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer}
 end
 
 function forward_subst_old!(y, v, nzval, diag, A)
+	#@info "fso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])"
 	n      = A.n
 	colptr = A.colptr
 	rowval = A.rowval
@@ -85,6 +141,7 @@ end
 
 
 function backward_subst_old!(x, y, nzval, diag, A)
+	#@info "bso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])"
 	n      = A.n
 	colptr = A.colptr
 	rowval = A.rowval
@@ -99,7 +156,9 @@ function backward_subst_old!(x, y, nzval, diag, A)
 	x
 end
 
+
 function ldiv!(x, ILU::ILUAMPrecon, b)
+	#t = @elapsed begin
 	#@info "iluam ldiv 1"
 	nzval = ILU.nzval
 	diag  = ILU.diag
@@ -108,6 +167,10 @@ function ldiv!(x, ILU::ILUAMPrecon, b)
 	#forward_subst!(y, b, ILU)
 	forward_subst_old!(y, b, nzval, diag, A)
 	backward_subst_old!(x, y, nzval, diag, A)
+	#@info "ILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)), nnz(A) #, A[10,10]
+	#, b[1], x[1], y[1]#maximum(abs.(b)), maximum(abs.(x))
+	#end
+	#println("$t") #@info t
 	x
 end
 
diff --git a/src/factorizations/iluam.jl b/src/factorizations/iluam.jl
index 6d061b0..24b75be 100644
--- a/src/factorizations/iluam.jl
+++ b/src/factorizations/iluam.jl
@@ -22,12 +22,10 @@ function ILUAMPreconditioner end
 function update!(p::ILUAMPreconditioner)
     flush!(p.A)
     if p.A.phash != p.phash
-        @warn "p.A.phash != p.phash"
         p.factorization = iluAM(p.A.cscmatrix)
         p.phash=p.A.phash
     else
-        @warn "fuck?"
-        ilu0!(p.factorization, p.A.cscmatrix)
+        iluam!(p.factorization, p.A.cscmatrix)
     end
     p
 end
diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl
index 15a8b23..f2861ed 100644
--- a/src/factorizations/pilu_Al-Kurdi_Mittal.jl
+++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl
@@ -4,7 +4,7 @@
 
 import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
 
-@info "PILUAM"
+#@info "PILUAM"
 
 mutable struct PILUAMPrecon{T,N}
 
@@ -51,6 +51,95 @@ function compute_lu!(nzval, point, j0, j1, tid, rowval, colptr, diag, Ti)
     end
 end
 
+function piluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer}
+	@info "piluAM!"
+	diag = ILU.diag
+	nzval = ILU.nzval
+	ILU.A = A
+	start = ILU.start
+	
+	ILU.nt = A.nt
+	nt = A.nt
+	
+	ILU.depth = A.depth
+	depth = A.depth
+	
+	
+	colptr = A.cscmatrix.colptr
+	rowval = A.cscmatrix.rowval
+	n = A.cscmatrix.n # number of columns
+	diag  = Vector{Ti}(undef, n)
+	nzval  = Vector{Tv}(undef, length(rowval)) #copy(A.nzval)
+	point = use_vector_par(n, A.nt, Int32)
+
+
+	@threads for tid=1:depth*nt+1
+		for j=start[tid]:start[tid+1]-1
+			for v=colptr[j]:colptr[j+1]-1
+				nzval[v] = A.cscmatrix.nzval[v]
+				if rowval[v] == j
+					diag[j] = v
+				end
+				#elseif rowval[v] 
+			end
+		end
+	end
+
+	for level=1:depth
+		@threads for tid=1:nt
+			for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1
+				for v=colptr[j]:colptr[j+1]-1
+					point[tid][rowval[v]] = v
+				end
+				
+				for v=colptr[j]:diag[j]-1
+					i = rowval[v]
+					for w=diag[i]+1:colptr[i+1]-1
+						k = point[tid][rowval[w]]
+						if k>0
+							nzval[k] -= nzval[v]*nzval[w]
+						end
+					end
+				end
+				
+				for v=diag[j]+1:colptr[j+1]-1
+					nzval[v] /= nzval[diag[j]]
+				end
+				
+				for v=colptr[j]:colptr[j+1]-1
+					point[tid][rowval[v]] = zero(Ti)
+				end
+			end
+		end
+	end
+	
+	#point = zeros(Ti, n) #Vector{Ti}(undef, n)
+	for j=start[depth*nt+1]:start[depth*nt+2]-1
+		for v=colptr[j]:colptr[j+1]-1
+			point[1][rowval[v]] = v
+		end
+		
+		for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[1][rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		for v=colptr[j]:colptr[j+1]-1
+			point[1][rowval[v]] = zero(Ti)
+		end
+	end
+
+end
+
 function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer}
 	start = A.start
 	nt = A.nt
@@ -65,6 +154,22 @@ function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Intege
 	
 	# find diagonal entries
 	#
+	
+	#=
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1
+			nzval[v] = A.cscmatrix.nzval[v]
+			if rowval[v] == j
+				diag[j] = v
+				#break
+			end
+			#elseif rowval[v] 
+		end
+	end
+	=#
+
+
+
 	@threads for tid=1:depth*nt+1
 		for j=start[tid]:start[tid+1]-1
 			for v=colptr[j]:colptr[j+1]-1
@@ -77,41 +182,9 @@ function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Intege
 		end
 	end
 
-	#=
-	@info "piluAM"
-	nzval = copy(A.cscmatrix.nzval)
-	colptr = A.cscmatrix.colptr
-	rowval = A.cscmatrix.rowval
-	#nzval  = ILU.nzval
-	n = A.n # number of columns
-	diag  = Vector{Ti}(undef, n)
-	start = A.start
-    nt    = A.nt
-    depth = A.depth
-    point = use_vector_par(n, nt, Ti)
 
-	# find diagonal entries
-    @threads for tid=1:depth*nt+1
-        for j=start[tid]:start[tid+1]-1
-            for v=colptr[j]:colptr[j+1]-1
-                if rowval[v] == j
-                    diag[j] = v
-                    break
-                end
-                #elseif rowval[v] 
-            end
-        end
-    end
-	
-	# compute L and U
-    for level=1:depth
-        @threads for tid=1:nt
-            compute_lu!(nzval, point, start[(level-1)*nt+tid], start[(level-1)*nt+tid+1], tid, rowval, colptr, diag, Ti)
-        end
-    end
-
-    compute_lu!(nzval, point, start[depth*nt+1], start[depth*nt+2], 1, rowval, colptr, diag, Ti)
-	=#
+	#@info diag[1:20]'
+	#@info diag[end-20:end]'	
 
 	for level=1:depth
 		@threads for tid=1:nt
@@ -171,6 +244,7 @@ function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Intege
 end
 
 function forward_subst_old!(y, v, nzval, diag, start, nt, depth, A)
+	#@info "pfso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])"
 	#@info "fwo"
 	n = A.n
 	colptr = A.colptr
@@ -200,6 +274,8 @@ end
 
 
 function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A)
+	#@info "pbso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])"
+	
 	#@info "bwo"
 	n = A.n
 	colptr = A.colptr
@@ -229,6 +305,7 @@ function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A)
 
 end
 
+
 function ldiv!(x, ILU::PILUAMPrecon, b)
 	#@info "piluam ldiv 1"
 	nzval = ILU.nzval
@@ -241,6 +318,8 @@ function ldiv!(x, ILU::PILUAMPrecon, b)
 	#forward_subst!(y, b, ILU)
 	forward_subst_old!(y, b, nzval, diag, start, nt, depth, A)
 	backward_subst_old!(x, y, nzval, diag, start, nt, depth, A)
+	#@info "PILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)), nnz(A) #, A[10,10]
+	#@info "PILUAM:", maximum(abs.(b-A*x)), b[1], x[1], maximum(abs.(b)), maximum(abs.(x))
 	x
 end
 
diff --git a/src/factorizations/piluam.jl b/src/factorizations/piluam.jl
index 075f73f..50a46fd 100644
--- a/src/factorizations/piluam.jl
+++ b/src/factorizations/piluam.jl
@@ -20,14 +20,13 @@ also calculates and stores updates to the off-diagonal entries and thus delivers
 function PILUAMPreconditioner end
 
 function update!(p::PILUAMPreconditioner)
+    #@warn "Should flush now", nnz_noflush(p.A)
     flush!(p.A)
     if p.A.phash != p.phash
-        @warn "p.A.phash != p.phash"
         p.factorization = piluAM(p.A)
         p.phash=p.A.phash
     else
-        @warn "fuck?"
-        ilu0!(p.factorization, p.A.cscmatrix)
+        piluAM!(p.factorization, p.A)
     end
     p
 end
diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
index b635a33..2c91a12 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
@@ -285,6 +285,67 @@ function updatentryCSC2!(CSC::SparseArrays.SparseMatrixCSC{Tv, Ti}, i::Integer,
 	end
 end
 
-Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n)
 
+
+
+Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n)
 include("struct_flush.jl")
+
+
+
+import LinearAlgebra.mul!
+
+"""
+```function LinearAlgebra.mul!(y, A, x)```
+
+This overwrites the mul! function for A::ExtendableSparseMatrixParallel
+"""
+function LinearAlgebra.mul!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv, Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer}
+    #@info "my matvec"
+    _, nnzLNK = nnz_noflush(A)
+    @assert nnzLNK == 0
+    #mul!(y, A.cscmatrix, x)
+    matvec!(y, A, x)
+end
+
+
+"""
+```function matvec!(y, A, x)```
+
+y <- A*x, where y and x are vectors and A is an ExtendableSparseMatrixParallel
+this computation is done in parallel, it has the same result as y = A.cscmatrix*x
+"""
+function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer}
+    #a1 = @allocated begin
+    nt = A.nt
+    depth = A.depth
+    colptr = A.cscmatrix.colptr
+    nzv = A.cscmatrix.nzval
+    rv  = A.cscmatrix.rowval
+
+    LinearAlgebra._rmul_or_fill!(y, 0.0)
+    
+    #end
+    #a2 = @allocated 
+    for level=1:depth
+        @threads for tid::Int64=1:nt
+            for col::Int64=A.start[(level-1)*nt+tid]:A.start[(level-1)*nt+tid+1]-1
+                for row::Int64=colptr[col]:colptr[col+1]-1 # in nzrange(A, col)
+                    y[rv[row]] += nzv[row]*x[col]
+                end
+            end
+        end
+    end
+
+    @threads for tid=1:1
+        #a3 = @allocated 
+        for col::Int64=A.start[depth*nt+1]:A.start[depth*nt+2]-1
+            for row::Int64=colptr[col]:colptr[col+1]-1 #nzrange(A, col)
+                y[rv[row]] += nzv[row]*x[col]
+            end
+        end
+    end
+    
+    #println(a1,a2,a3)
+    y
+end
diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
index a29356c..7eeb3d3 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
@@ -26,8 +26,8 @@ function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false,
 		validate_partition(grid, cellparts, start, allcells, nt, depth)
 	end
 
-	@info length.(cfp)
-	@info minimum(cellparts), maximum(cellparts), nt, depth
+	#@info length.(cfp)
+	#@info minimum(cellparts), maximum(cellparts), nt, depth
 
 	(nnts, s, onr, gi, gc, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(
 		cellparts, allcells, start, num_nodes(grid), Ti, nt, depth
diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
index 38608ad..73471dc 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
@@ -1,5 +1,9 @@
 function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=true)
+	_, nnzLNK = nnz_noflush(A)
 
+	if nnzLNK == 0
+		return
+	end
 
 	if !do_dense
 		A.cscmatrix = A.cscmatrix+sparse_flush!(A; keep_zeros)

From 0cc56e87cd29634014af8e53d33f284327b464e9 Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Sun, 17 Mar 2024 11:03:16 +0100
Subject: [PATCH 10/44] remove old code

---
 src/ExtendableSparse.jl                     |   2 +-
 src/factorizations/filu_Al-Kurdi_Mittal.jl  | 160 --------------
 src/factorizations/ilu_Al-Kurdi_Mittal_0.jl | 146 -------------
 src/factorizations/ilu_Al-Kurdi_Mittal_1.jl | 229 --------------------
 4 files changed, 1 insertion(+), 536 deletions(-)
 delete mode 100644 src/factorizations/filu_Al-Kurdi_Mittal.jl
 delete mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_0.jl
 delete mode 100644 src/factorizations/ilu_Al-Kurdi_Mittal_1.jl

diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index bcf85e6..9c490ca 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -33,7 +33,7 @@ export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawup
 export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet
 
 
-@warn "ESMP!"
+#@warn "ESMP!"
 include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl")
 
 
diff --git a/src/factorizations/filu_Al-Kurdi_Mittal.jl b/src/factorizations/filu_Al-Kurdi_Mittal.jl
deleted file mode 100644
index 2099208..0000000
--- a/src/factorizations/filu_Al-Kurdi_Mittal.jl
+++ /dev/null
@@ -1,160 +0,0 @@
-#module PILUAM
-#using Base.Threads
-#using LinearAlgebra, SparseArrays
-
-import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
-
-@info "PILUAM"
-
-mutable struct PILUAMPrecon{T,N}
-
-	diag::AbstractVector
-    nzval::AbstractVector
-	A::AbstractMatrix
-	
-end
-
-function iluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <:Integer}
-	@info "filuAM!"
-    diag = ILU.diag
-	nzval = ILU.nzval
-
-	nzval = copy(A.cscmatrix.nzval)
-	diag  = Vector{Ti}(undef, n)
-	ILU.A = A
-	colptr = A.cscmatrix.colptr
-	rowval = A.cscmatrix.rowval
-	n = A.n
-	point = zeros(Ti, n)
-
-	for j=1:n
-		for v=colptr[j]:colptr[j+1]-1
-			if rowval[v] == j
-				diag[j] = v
-				break
-			end
-			#elseif rowval[v] 
-		end
-	end
-	
-	# compute L and U
-	for j=1:n
-		for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
-			point[rowval[v]] = v
-		end
-		
-		for v=colptr[j]:diag[j]-1
-			i = rowval[v]
-			#nzval[v] /= nzval[diag[i]]
-			for w=diag[i]+1:colptr[i+1]-1
-				k = point[rowval[w]]
-				if k>0
-					nzval[k] -= nzval[v]*nzval[w]
-				end
-			end
-		end
-		
-		for v=diag[j]+1:colptr[j+1]-1
-			nzval[v] /= nzval[diag[j]]
-		end
-		
-		
-		for v=colptr[j]:colptr[j+1]-1
-			point[rowval[v]] = zero(Ti)
-		end
-	end
-
-end
-
-
-function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer}
-	@info "filuAM, $(A[1,1])"
-	nzval = copy(A.cscmatrix.nzval)
-	colptr = A.cscmatrix.colptr
-	rowval = A.cscmatrix.rowval
-	#nzval  = ILU.nzval
-	n = A.n # number of columns
-	point = zeros(Ti, n) #Vector{Ti}(undef, n)
-	diag  = Vector{Ti}(undef, n)
-
-	# find diagonal entries
-	for j=1:n
-		for v=colptr[j]:colptr[j+1]-1
-			if rowval[v] == j
-				diag[j] = v
-				break
-			end
-			#elseif rowval[v] 
-		end
-	end
-	
-	# compute L and U
-	for j=1:n
-		for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
-			point[rowval[v]] = v
-		end
-		
-		for v=colptr[j]:diag[j]-1
-			i = rowval[v]
-			#nzval[v] /= nzval[diag[i]]
-			for w=diag[i]+1:colptr[i+1]-1
-				k = point[rowval[w]]
-				if k>0
-					nzval[k] -= nzval[v]*nzval[w]
-				end
-			end
-		end
-		
-		for v=diag[j]+1:colptr[j+1]-1
-			nzval[v] /= nzval[diag[j]]
-		end
-		
-		
-		for v=colptr[j]:colptr[j+1]-1
-			point[rowval[v]] = zero(Ti)
-		end
-	end
-	#nzval, diag
-	PILUAMPrecon{Tv,Ti}(diag, nzval, A)
-end
-
-
-
-function ldiv!(x, ILU::PILUAMPrecon, b)
-	#@info "iluam ldiv 1"
-	nzval = ILU.nzval
-	diag  = ILU.diag
-	A     = ILU.A.cscmatrix
-	y = copy(b)
-	#forward_subst!(y, b, ILU)
-	forward_subst_old!(y, b, nzval, diag, A)
-	backward_subst_old!(x, y, nzval, diag, A)
-	@info "FILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)) 
-    #, maximum(abs.(b-A*x)), b[1], x[1], y[1]
-	x
-end
-
-
-function ldiv!(ILU::PILUAMPrecon, b)
-	#@info "iluam ldiv 2"
-	nzval = ILU.nzval
-	diag  = ILU.diag
-	A     = ILU.A.cscmatrix
-	y = copy(b)
-	#forward_subst!(y, b, ILU)
-	forward_subst_old!(y, b, nzval, diag, A)
-	backward_subst_old!(b, y, nzval, diag, A)
-	b
-end
-
-function \(ilu::PILUAMPrecon{T,N}, b) where {T,N<:Integer}
-    x = copy(b)
-    ldiv!(x, ilu, b)
-	x
-end
-
-function nnz(ilu::PILUAMPrecon{T,N}) where {T,N<:Integer}
-    length(ilu.nzval)
-end
-
-#end
\ No newline at end of file
diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl
deleted file mode 100644
index 26f9788..0000000
--- a/src/factorizations/ilu_Al-Kurdi_Mittal_0.jl
+++ /dev/null
@@ -1,146 +0,0 @@
-module ILUAM
-using LinearAlgebra, SparseArrays
-
-import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
-
-
-mutable struct ILUAMPrecon{T,N}
-
-	diag::AbstractVector
-    nzval::AbstractVector
-	rowval::AbstractVector
-	colptr::AbstractVector
-	
-end
-
-function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer}
-    @info "ILUAMPrecon"
-	n      = A.n # number of columns
-	nzval  = copy(A.nzval)
-	diag   = Vector{N}(undef, n)
-	
-    ILUAMPrecon{T, N}(diag, copy(A.nzval), copy(A.rowval), copy(A.colptr))
-end
-
-function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
-    @info "iluAM!"
-	nzval  = LU.nzval
-    diag   = LU.diag
-    
-    colptr = LU.colptr
-	rowval = LU.rowval
-	n      = A.n # number of columns
-	point  = zeros(N, n) #Vector{N}(undef, n)
-	
-	t = zeros(5)
-
-	# find diagonal entries
-	t[1] = @elapsed for j=1:n
-		for v=colptr[j]:colptr[j+1]-1
-			if rowval[v] == j
-				diag[j] = v
-				break
-			end
-			#elseif rowval[v] 
-		end
-	end
-	
-	# compute L and U
-	for j=1:n
-		t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
-			point[rowval[v]] = v
-		end
-		
-		t[3] += @elapsed for v=colptr[j]:diag[j]-1
-			i = rowval[v]
-			#nzval[v] /= nzval[diag[i]]
-			for w=diag[i]+1:colptr[i+1]-1
-				k = point[rowval[w]]
-				if k>0
-					nzval[k] -= nzval[v]*nzval[w]
-				end
-			end
-		end
-		
-		t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1
-			nzval[v] /= nzval[diag[j]]
-		end
-		
-		
-		t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1
-			point[rowval[v]] = zero(N)
-		end
-	end
-	t
-end
-
-function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
-    t = zeros(6)
-	t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N}))
-    t[2:6] = iluAM!(LU, A)
-	@info t
-    LU
-end
-
-
-function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer}
-	n = ilu.A.n
-	nzval = ilu.nzval
-	colptr = ilu.colptr
-	rowval = ilu.rowval
-	diag = ilu.diag
-	y .= 0
-	@inbounds for j=1:n
-		y[j] += v[j]
-		for v=diag[j]+1:colptr[j+1]-1
-			y[rowval[v]] -= nzval[v]*y[j]
-		end
-	end
-	y
-end
-
-
-function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer}
-    n = ilu.A.n
-	nzval = ilu.nzval
-	colptr = ilu.colptr
-	rowval = ilu.rowval
-	diag = ilu.diag
-	wrk = copy(y)
-	@inbounds for j=n:-1:1
-		x[j] = wrk[j] / nzval[diag[j]]		
-		for i=colptr[j]:diag[j]-1
-			wrk[rowval[i]] -= nzval[i]*x[j]
-		end
-	end
-    x
-end
-
-function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
-	@info "AM ldiv1"
-    y = copy(b)
-    forward_substitution!(y, ilu, b)
-    backward_substitution!(x, ilu, y)
-    x
-end
-
-function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
-    @info "AM ldiv2"
-	y = copy(b)
-    forward_substitution!(y, ilu, b)
-    backward_substitution!(b, ilu, y)
-    b
-end
-
-function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
-    @info "AM bs "
-	x = copy(b)
-    ldiv!(x, ilu, b)
-end
-
-function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer}
-    length(ilu.nzval)
-end
-
-
-end
\ No newline at end of file
diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl b/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl
deleted file mode 100644
index a599094..0000000
--- a/src/factorizations/ilu_Al-Kurdi_Mittal_1.jl
+++ /dev/null
@@ -1,229 +0,0 @@
-module ILUAM
-using LinearAlgebra, SparseArrays
-
-#import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
-
-@info "ILUAM"
-
-mutable struct ILUAMPrecon{T,N}
-
-	diag::AbstractVector
-    nzval::AbstractVector
-	A::AbstractMatrix
-	
-end
-
-function ILUAMPrecon(A::SparseMatrixCSC{T,N}, b_type=T) where {T,N<:Integer}
-    @info "ILUAMPrecon"
-	n      = A.n # number of columns
-	nzval  = copy(A.nzval)
-	diag   = Vector{N}(undef, n)
-	
-    ILUAMPrecon{T, N}(diag, copy(A.nzval), A)
-end
-
-
-
-function iluAM!(LU::ILUAMPrecon{T,N}, A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
-    @info "iluAM!"
-	nzval  = LU.nzval
-    diag   = LU.diag
-    
-    colptr = LU.A.colptr
-	rowval = LU.A.rowval
-	n      = A.n # number of columns
-	point  = zeros(N, n) #Vector{N}(undef, n)
-	
-	t = zeros(5)
-
-	# find diagonal entries
-	t[1] = @elapsed for j=1:n
-		for v=colptr[j]:colptr[j+1]-1
-			if rowval[v] == j
-				diag[j] = v
-				break
-			end
-			#elseif rowval[v] 
-		end
-	end
-	
-	# compute L and U
-	for j=1:n
-		t[2] += @elapsed for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
-			point[rowval[v]] = v
-		end
-		
-		t[3] += @elapsed for v=colptr[j]:diag[j]-1
-			i = rowval[v]
-			#nzval[v] /= nzval[diag[i]]
-			for w=diag[i]+1:colptr[i+1]-1
-				k = point[rowval[w]]
-				if k>0
-					nzval[k] -= nzval[v]*nzval[w]
-				end
-			end
-		end
-		
-		t[4] += @elapsed for v=diag[j]+1:colptr[j+1]-1
-			nzval[v] /= nzval[diag[j]]
-		end
-		
-		
-		t[5] += @elapsed for v=colptr[j]:colptr[j+1]-1
-			point[rowval[v]] = zero(N)
-		end
-	end
-	t
-end
-
-
-function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer}
-	@info "iluAM"
-	nzval = copy(A.nzval)
-	colptr = A.colptr
-	rowval = A.rowval
-	#nzval  = ILU.nzval
-	n = A.n # number of columns
-	point = zeros(Ti, n) #Vector{Ti}(undef, n)
-	diag  = Vector{Ti}(undef, n)
-	
-	# find diagonal entries
-	for j=1:n
-		for v=colptr[j]:colptr[j+1]-1
-			if rowval[v] == j
-				diag[j] = v
-				break
-			end
-			#elseif rowval[v] 
-		end
-	end
-	
-	# compute L and U
-	for j=1:n
-		for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
-			point[rowval[v]] = v
-		end
-		
-		for v=colptr[j]:diag[j]-1
-			i = rowval[v]
-			#nzval[v] /= nzval[diag[i]]
-			for w=diag[i]+1:colptr[i+1]-1
-				k = point[rowval[w]]
-				if k>0
-					nzval[k] -= nzval[v]*nzval[w]
-				end
-			end
-		end
-		
-		for v=diag[j]+1:colptr[j+1]-1
-			nzval[v] /= nzval[diag[j]]
-		end
-		
-		
-		for v=colptr[j]:colptr[j+1]-1
-			point[rowval[v]] = zero(Ti)
-		end
-	end
-	#nzval, diag
-	ILUAMPrecon{Tv,Ti}(diag, nzval, A)
-end
-
-#function iluAM(A::SparseMatrixCSC{T,N}) where {T,N<:Integer}
-#    t = zeros(6)
-#	t[1] = @elapsed (LU = ILUAMPrecon(A::SparseMatrixCSC{T,N}))
-#    t[2:6] = iluAM!(LU, A)
-#	@info t
-#    LU
-#end
-
-
-function forward_substitution!(y, ilu::ILUAMPrecon{T,N}, v) where {T,N<:Integer}
-	n = ilu.A.n
-	nzval = ilu.nzval
-	colptr = ilu.A.colptr
-	rowval = ilu.A.rowval
-	diag = ilu.diag
-	y .= 0
-	@inbounds for j=1:n
-		y[j] += v[j]
-		for v=diag[j]+1:colptr[j+1]-1
-			y[rowval[v]] -= nzval[v]*y[j]
-		end
-	end
-	y
-end
-
-
-function backward_substitution!(x, ilu::ILUAMPrecon{T,N}, y) where {T,N<:Integer}
-    n = ilu.A.n
-	nzval = ilu.nzval
-	colptr = ilu.A.colptr
-	rowval = ilu.A.rowval
-	diag = ilu.diag
-	wrk = copy(y)
-	@inbounds for j=n:-1:1
-		x[j] = wrk[j] / nzval[diag[j]]		
-		for i=colptr[j]:diag[j]-1
-			wrk[rowval[i]] -= nzval[i]*x[j]
-		end
-	end
-    x
-end
-
-function ldiv_new!(x, ilu, v)
-
-	n = ilu.A.n
-	y = Vector{Float64}(undef, n)
-	y .= 0
-	nzval = ilu.nzval
-	colptr = ilu.A.colptr
-	rowval = ilu.A.rowval
-	diag = ilu.diag
-	#forward
-	@inbounds for j=1:n
-		y[j] += v[j]
-		for v=diag[j]+1:colptr[j+1]-1
-			y[rowval[v]] -= nzval[v]*y[j]
-		end
-	end
-
-	#backward
-	wrk = copy(y)
-	@inbounds for j=n:-1:1
-		x[j] = wrk[j] / nzval[diag[j]]		
-		for i=colptr[j]:diag[j]-1
-			wrk[rowval[i]] -= nzval[i]*x[j]
-		end
-	end
-    x
-end
-
-function ldiv!(x, ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
-	#@info "AM ldiv1"
-    y = copy(b)
-    forward_substitution!(y, ilu, b)
-    backward_substitution!(x, ilu, y)
-    x
-end
-
-function ldiv!(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
-    @info "AM ldiv2"
-	y = copy(b)
-    forward_substitution!(y, ilu, b)
-    backward_substitution!(b, ilu, y)
-    b
-end
-
-function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
-    @info "AM bs "
-	x = copy(b)
-    ldiv!(x, ilu, b)
-	x
-end
-
-function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer}
-    length(ilu.nzval)
-end
-
-
-end
\ No newline at end of file

From 0f282c6f37b45146816a0243e5f6ff7f24e2d84e Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Sun, 24 Mar 2024 17:40:05 +0100
Subject: [PATCH 11/44] added preparation for edgewise assembly

---
 src/factorizations/pilu_Al-Kurdi_Mittal.jl    |   2 +-
 .../ExtendableSparseParallel.jl               |   9 +-
 .../preparatory.jl                            | 234 ++++++++++++++++--
 .../struct_flush.jl                           |  14 +-
 4 files changed, 231 insertions(+), 28 deletions(-)

diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/factorizations/pilu_Al-Kurdi_Mittal.jl
index f2861ed..ad9529b 100644
--- a/src/factorizations/pilu_Al-Kurdi_Mittal.jl
+++ b/src/factorizations/pilu_Al-Kurdi_Mittal.jl
@@ -52,7 +52,7 @@ function compute_lu!(nzval, point, j0, j1, tid, rowval, colptr, diag, Ti)
 end
 
 function piluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer}
-	@info "piluAM!"
+	#@info "piluAM!"
 	diag = ILU.diag
 	nzval = ILU.nzval
 	ILU.A = A
diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
index 2c91a12..b413c5c 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
@@ -103,7 +103,7 @@ function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_t
 	
 	if updatentryCSC2!(A.cscmatrix, i, j, v)
 	else
-		level, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]])
+		_, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]])
 		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
 	end
 end
@@ -316,7 +316,6 @@ y <- A*x, where y and x are vectors and A is an ExtendableSparseMatrixParallel
 this computation is done in parallel, it has the same result as y = A.cscmatrix*x
 """
 function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer}
-    #a1 = @allocated begin
     nt = A.nt
     depth = A.depth
     colptr = A.cscmatrix.colptr
@@ -325,8 +324,6 @@ function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti}
 
     LinearAlgebra._rmul_or_fill!(y, 0.0)
     
-    #end
-    #a2 = @allocated 
     for level=1:depth
         @threads for tid::Int64=1:nt
             for col::Int64=A.start[(level-1)*nt+tid]:A.start[(level-1)*nt+tid+1]-1
@@ -337,8 +334,9 @@ function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti}
         end
     end
 
+
+
     @threads for tid=1:1
-        #a3 = @allocated 
         for col::Int64=A.start[depth*nt+1]:A.start[depth*nt+2]-1
             for row::Int64=colptr[col]:colptr[col+1]-1 #nzrange(A, col)
                 y[rv[row]] += nzv[row]*x[col]
@@ -346,6 +344,5 @@ function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti}
         end
     end
     
-    #println(a1,a2,a3)
     y
 end
diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
index 7eeb3d3..033a2fa 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
@@ -6,24 +6,31 @@
 `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices...
 To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here.
 """
-function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false)
+function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, assembly=:cellwise, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false)
 	grid = getgrid(nm; x0, x1)
 	adepth = 0
 	if sequential
-		(allcells, start, cellparts, adepth) = grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa, do_print)#)
+		(allcells, start, cellparts, adepth) = grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa, do_print)#)
 	else
-		(allcells, start, cellparts, adepth) = grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa, do_print)
+		(allcells, start, cellparts, adepth) = grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa, do_print)
 	end
 
 	if (adepth != depth) && do_print
 		@info "The requested depth of partitioning is too high. The depth is set to $adepth."
 	end
-
 	depth = adepth
-	cfp = bettercellsforpart(cellparts, depth*nt+1)
+	
+	if assembly == :cellwise
+		cfp = bettercellsforpart(cellparts, depth*nt+1)
+
+	else
+		edgeparts = edgewise_partition_from_cellwise_partition(grid, cellparts)
+		cfp = bettercellsforpart(edgeparts, depth*nt+1)
+	end
 
+		
 	if check_partition
-		validate_partition(grid, cellparts, start, allcells, nt, depth)
+		validate_partition(grid, cellparts, start, allcells, nt, depth, assembly)
 	end
 
 	#@info length.(cfp)
@@ -126,7 +133,7 @@ function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_r
 		tmpctr = 1
 		for cr in sortedcellregs
 			crmod = (cr-1)%nt+1
-			level = Int(ceil(cr/nt))
+			#level = Int(ceil(cr/nt))
 			if !(crmod in tmp[1:tmpctr-1])
 				gictrs[crmod] += 1 # , level] += 1
 				sortednodesperthread[crmod,nj] = gictrs[crmod]
@@ -235,7 +242,7 @@ The function assigns colors/partitons to each cell in the `grid`. First, the gri
 `nt` is the number of threads.
 `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices...
 """
-function grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=false)
+function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=false)
 	A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid))
 	number_cells_per_node = zeros(Int64, num_nodes(grid))
 	for j=1:num_cells(grid)
@@ -304,11 +311,10 @@ function grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=fals
 		end
 	end
         
-    return allcells, start, cellregs, actual_depth, ACSC
+    return allcells, start, cellregs, actual_depth
 end
 
-
-function grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa=10, do_print=false)
+function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print=false)
 	As = [ExtendableSparseMatrix{Int64, Int64}(num_cells(grid), num_cells(grid)) for tid=1:nt]
 	number_cells_per_node = zeros(Int64, num_nodes(grid))
 	
@@ -397,6 +403,195 @@ function grid_to_graph_ps_multi_par!(grid, nt, depth; minsize_sepa=10, do_print=
     return allcells, start, cellregs, actual_depth
 end
 
+function grid_to_graph_edgewise!(grid, nt, depth; minsize_sepa=10, do_print=false)
+	ce = grid[CellEdges]
+    A = SparseMatrixLNK{Int64, Int64}(num_edges(grid), num_edges(grid))
+	number_edges_per_node = zeros(Int64, num_nodes(grid))
+	
+    for i=1:num_edges(grid)
+        for node_id in grid[EdgeNodes][:,i] 
+            number_edges_per_node[node_id] += 1
+        end
+    end
+
+    alledges = zeros(Int64, sum(number_edges_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_edges_per_node)
+	number_edges_per_node .= 0
+	
+    for j=1:num_edges(grid)
+		for node_id in grid[EdgeNodes][:,j]
+			alledges[start[node_id] + number_edges_per_node[node_id]] = j
+			number_edges_per_node[node_id] += 1
+		end
+	end
+
+	for j=1:num_nodes(grid)
+		edges = @view alledges[start[j]:start[j+1]-1]
+		for (i,id1) in enumerate(edges)
+			for id2 in edges[i+1:end]
+				A[id1,id2] = 1
+				A[id2,id1] = 1
+			end
+		end	
+	end
+
+	ACSC = SparseArrays.SparseMatrixCSC(A)
+	
+	partition = Metis.partition(ACSC, nt)
+
+    sn = Vector{Int64}(undef, num_edges(grid))
+	gi = Vector{Int64}(undef, num_edges(grid))
+	ctr_sepanodes = 0
+    
+	edgeregs  = copy(partition)
+    for tid=1:nt
+        for j=1:num_edges(grid)
+            if edgeregs[j] == tid
+                rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
+                if how_many_different_below(edgeregs[rows], nt+1) > 1 
+                    edgeregs[j] = nt+1 #+ctr_sepanodes
+                    ctr_sepanodes += 1
+                    sn[ctr_sepanodes] = j
+                    gi[ctr_sepanodes] = j
+                end
+            end
+        end
+	end
+
+    sn = sn[1:ctr_sepanodes]
+    gi = gi[1:ctr_sepanodes]
+    
+    if do_print
+        @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!"
+    end
+    
+    RART = copy(ACSC)
+    actual_depth = 1
+	for level=1:depth-1
+		RART, ctr_sepanodes, sn, gi = separate!(edgeregs, num_edges(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+        actual_depth += 1
+		if ctr_sepanodes < minsize_sepa
+			break
+		end
+	end
+        
+    return alledges, start, edgeregs, actual_depth
+end
+
+function grid_to_graph_edgewise_par!(grid, nt, depth; minsize_sepa=10, do_print=false)
+	ce = grid[CellEdges]
+    cn = grid[EdgeNodes]
+	
+    As = [ExtendableSparseMatrix{Int64, Int64}(num_edges(grid), num_edges(grid)) for tid=1:nt]
+	number_edges_per_node = zeros(Int64, num_nodes(grid))
+	
+	
+	for j=1:num_edges(grid)
+		tmp = view(cn, :, j)
+		for node_id in tmp
+			number_edges_per_node[node_id] += 1
+		end
+	end
+		
+	
+	alledges = zeros(Int64, sum(number_edges_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_edges_per_node)
+	number_edges_per_node .= 0
+	
+	for j=1:num_edges(grid)
+		tmp = view(cn, :, j)
+		for node_id in tmp
+			alledges[start[node_id] + number_edges_per_node[node_id]] = j
+			number_edges_per_node[node_id] += 1
+		end
+	end
+
+	node_range = get_starts(num_nodes(grid), nt)
+	Threads.@threads for tid=1:nt
+		for j in node_range[tid]:node_range[tid+1]-1
+			edges = @view alledges[start[j]:start[j+1]-1]
+			l = length(edges)
+			for (i,id1) in enumerate(edges)
+				ce = view(edges, i+1:l)
+				for id2 in ce
+					As[tid][id1,id2] = 1
+					As[tid][id2,id1] = 1
+					
+				end
+			end	
+		end
+		ExtendableSparse.flush!(As[tid])
+	end
+	
+	ACSC = add_all_par!(As).cscmatrix
+	
+	cellregs = Metis.partition(ACSC, nt)
+	
+	sn = [Vector{Int64}(undef, Int(ceil(num_cells(grid)/nt))) for tid=1:nt]
+	ctr_sepanodess = zeros(Int64, nt)
+    
+    @threads for tid=1:nt
+        for j=1:num_edges(grid)
+            if cellregs[j] == tid
+                rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
+                if how_many_different_below(cellregs[rows], nt+1) > 1 
+                    cellregs[j] = nt+1 #+ctr_sepanodes
+                    ctr_sepanodess[tid] += 1
+                    sn[tid][ctr_sepanodess[tid]] = j
+                end
+            end
+        end
+	end
+
+    for tid=1:nt
+        sn[tid] = sn[tid][1:ctr_sepanodess[tid]]
+    end
+    ctr_sepanodes = sum(ctr_sepanodess)
+    sn = vcat(sn...)
+    gi = copy(sn)
+
+    if do_print
+        @info "At level $(1), we found $ctr_sepanodes edges that have to be treated in the next iteration!"
+    end
+
+    RART = ACSC
+    actual_depth = 1
+	for level=1:depth-1
+		RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+        actual_depth += 1
+		if ctr_sepanodes < minsize_sepa
+			break
+		end
+	end
+    
+    #grid[CellRegions] = cellregs
+    #grid
+    return alledges, start, cellregs, actual_depth
+end
+
+
+function edgewise_partition_from_cellwise_partition(grid, cellregs)
+	ce = grid[CellEdges]
+	if num_edges(grid) == 0
+		grid[EdgeNodes]
+	end
+	
+	edgeregs = maximum(cellregs)*ones(Int64, num_edges(grid))
+
+	for icell=1:num_cells(grid)
+		tmp = cellregs[icell]
+		for iedge in ce[:,icell]
+			if tmp < edgeregs[iedge]
+				edgeregs[iedge] = tmp
+			end
+		end
+	end
+
+	edgeregs
+end
+
 """
 `function add_all_par!(As)`
 
@@ -636,10 +831,15 @@ function check_partition(nm, nt, depth)
 end
 =#
 
-function validate_partition(grid, cellregs, start, allcells, nt, depth)
-	@info "Node based validation"
+function validate_partition(grid, cellregs, start, allcells, nt, depth, assemblytype)
 	violation_ctr = 0
 
+	if assemblytype == :cellwise
+		key = CellNodes
+	else
+		key = EdgeNodes
+	end
+
 	for j=1:num_nodes(grid)
 		cells = @view allcells[start[j]:start[j+1]-1]
 		sortedcellregs = unique(sort(cellregs[cells]))
@@ -651,14 +851,14 @@ function validate_partition(grid, cellregs, start, allcells, nt, depth)
 				violation_ctr += 1
 
 				if violation_ctr == 1
-					@info "Node Id : ", j
-					@info "Cellregs: ", sortedcellregs
-					@info "Levels  : ", levels
+					@info "Node Id : $j (we only show one violation)"
+					@info "Cellregs: $sortedcellregs"
+					@info "Levels  : $levels"
 					
 					loc = findall(x->x==4, Int.(ceil.(cellregs[allcells[start[j]:start[j+1]-1]]/nt)))
 					cells_at_level4 = allcells[loc.+(start[j]-1)]
 					@info cells_at_level4, cellregs[cells_at_level4]
-					@info grid[CellNodes][:,cells_at_level4[1]], grid[CellNodes][:,cells_at_level4[2]]
+					@info grid[key][:,cells_at_level4[1]], grid[key][:,cells_at_level4[2]]
 				end
 			end
 		end
diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
index 73471dc..1b8fc48 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
@@ -49,6 +49,9 @@ function dense_flush_keepzeros!(
 	eqctr = 0
 	tmp = zeros(Ti, size(onr)[1])
 	
+	#@warn [As[i].nnz for i=1:nt], [As[i].n for i=1:nt], [As[i].m for i=1:nt]
+	#@info maximum.([As[i].colptr for i=1:nt])
+
 	for nj=1:As[1].m
 		indptr[nj] = ctr
 		oj = rni[nj]
@@ -62,7 +65,10 @@ function dense_flush_keepzeros!(
 				k = s[regmod, nj]
 				if regionctr == 1
 					while k>0
-						#if As[regmod].nzval[k] != 0.0
+						if As[regmod].rowval[k] != 0
+							if ctr > nnz
+								@info "ctr > nnz, $nj, $oj"
+							end
 							indices[ctr] = As[regmod].rowval[k]
 							data[ctr]    = As[regmod].nzval[k]
 							
@@ -82,12 +88,12 @@ function dense_flush_keepzeros!(
 							
 							ctr += 1
 							jc += 1
-						#end
+						end
 						k = As[regmod].colptr[k]
 					end
 				else
 					while k>0
-						#if As[regmod].nzval[k] != 0.0
+						if As[regmod].rowval[k] != 0
 							indices[ctr] = As[regmod].rowval[k]
 							data[ctr]    = As[regmod].nzval[k]
 							
@@ -120,7 +126,7 @@ function dense_flush_keepzeros!(
 							
 							ctr += 1
 							jc += 1
-						#end
+						end
 						k = As[regmod].colptr[k]
 					end
 					

From d5b9bacb79e9478d0792d5b847d4da71ccbd12a4 Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Fri, 10 May 2024 15:58:05 +0200
Subject: [PATCH 12/44] added comment

---
 src/matrix/ExtendableSparseMatrixParallel/preparatory.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
index 033a2fa..e73c7d1 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
@@ -5,6 +5,8 @@
 `nt` is the number of threads.
 `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices...
 To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here.
+
+This should be somewhere else, longterm
 """
 function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, assembly=:cellwise, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false)
 	grid = getgrid(nm; x0, x1)
@@ -864,4 +866,4 @@ function validate_partition(grid, cellregs, start, allcells, nt, depth, assembly
 		end
 	end
 	@info "We found $violation_ctr violation(s)"
-end
\ No newline at end of file
+end

From d0b6c63d8b62dfc6ae03b8ad5e39c394a1e47fc3 Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Sat, 11 May 2024 19:31:01 +0200
Subject: [PATCH 13/44] removed grid dependency of ESMP

---
 .../ExtendableSparseParallel.jl               | 63 +++++++++++++++++--
 .../struct_flush.jl                           |  2 +-
 .../supersparse.jl                            |  7 ++-
 3 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
index b413c5c..5126f8d 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
@@ -9,38 +9,89 @@ mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSpar
     cscmatrix::SparseMatrixCSC{Tv, Ti}
 
     """
-    Linked list structure holding data of extension
+    Linked list structures holding data of extension, one for each thread
     """
     lnkmatrices::Vector{SuperSparseMatrixLNK{Tv, Ti}}
 
-	grid::ExtendableGrid
+    """
+    this is the grid on which the pde lives
+    (We do not want this dependency)
+    """
+    #grid::ExtendableGrid
 
+    """
+    Number of Nodes per Threads
+    """
 	nnts::Vector{Ti}
     
+    """
+    sortednodesperthread[i,j] = local index of the j-th global column in the i-th LNK matrix
+    (this is used e.g. when assembling the matrix)
+    """
     sortednodesperthread::Matrix{Ti}
     
+    """
+    depth+1 x nn matrix,
+    old_noderegions[i,j] = region in which node j is, in level i 
+    old refers to the fact that j is the 'old index' (i.e. grid index, not matrix index, see 'new_indices')
+    """
     old_noderegions::Matrix{Ti}
     
+    """
+    cellsforpart[i] is a vector containing all cells in the i-th region
+    cellsforpart has length nt*depth + 1
+    """
     cellsforpart::Vector{Vector{Ti}}
     
+    """
+    globalindices[i][j] = index in the global (ESMP & CSC) matrix of the j-th column of the i-th LNK matrix
+    (this maps the local indices (in the LNKs) to the global indices (ESMP & CSC)) 
+    """
     globalindices::Vector{Vector{Ti}}
     
+    """
+    For some applications such as the parallel ILU preconditioner, a block form is necessary.
+    Thus, the columns are reordered and the A[i,i] does not correspond to the i-th node of the grid,
+    but A[new_indices[i], new_indices[i]] does
+    """
     new_indices::Vector{Ti}
     
+    """
+    Reverse: rev_new_indices[new_indices[i]] = i, for all i
+    """
     rev_new_indices::Vector{Ti}
     
+    """
+    starts[i] gives the first column of the i-th region, i.e. starts[1] = 1
+    starts has length nt*depth + 1
+    """
     start::Vector{Ti}
-    
+
+    """
+    cellparts[i] = region of the i-th cell
+    """
     cellparts::Vector{Ti}
     
+    """
+    Number of threads
+    """
     nt::Ti
     
+    """
+    How often is the separator partitioned? (if never: depth = 1)
+    """
     depth::Ti
 
     phash::UInt64
 
+    """
+    Number of rows / number of nodes in grid 
+    """
     n::Ti
 
+    """
+    Number of columns / number of nodes in grid (only works for square matrices)
+    """
     m::Ti
     
     
@@ -52,7 +103,7 @@ function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) w
 	grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1)
 	csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid))
 	lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt]
-	ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m)
+	ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m)
 end
 
 
@@ -206,8 +257,8 @@ end
 #------------------------------------
 
 function reset!(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer}
-	A.cscmatrix = spzeros(Tv, Ti, num_nodes(A.grid), num_nodes(A.grid))
-	A.lnkmatrices = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt]
+	A.cscmatrix = spzeros(Tv, Ti, A.n, A.m)
+	A.lnkmatrices = [SuperSparseMatrixLNK{Tv, Ti}(A.n, A.nnts[tid]) for tid=1:A.nt]
 end
 
 function nnz_flush(ext::ExtendableSparseMatrixParallel)
diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
index 1b8fc48..3169d4b 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
@@ -16,7 +16,7 @@ function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=tr
 		end
 	end
 	A.phash = phash(A.cscmatrix)
-	A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(num_nodes(A.grid), A.nnts[tid]) for tid=1:A.nt]
+	A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(A.n, A.nnts[tid]) for tid=1:A.nt]
 
 end
 
diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
index ae52f60..00b397d 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
@@ -43,16 +43,17 @@ mutable struct SuperSparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{T
     rowval::Vector{Ti}
 
     """
-    Nonzero entry values correspondin to each pair
+    Nonzero entry values corresponding to each pair
     (colptr[index],rowval[index])
 
-    Initial length is n,  it grows with each new entry.
+    Initial length is n, it grows with each new entry.
     """
     nzval::Vector{Tv}
     
-    
+	
     collnk::Vector{Ti}
     
+	# counts the number of columns in use
     colctr::Ti
 end
 

From 6d182951d16b4e14b30012ce26a2e8692f9e431a Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Sun, 12 May 2024 12:21:45 +0200
Subject: [PATCH 14/44] minor change: no globalcounter ouutput in constructor
 anymore

---
 .../ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl  | 2 +-
 src/matrix/ExtendableSparseMatrixParallel/preparatory.jl        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
index 5126f8d..737660d 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
@@ -100,7 +100,7 @@ end
 
 
 function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) where {Tv, Ti <: Integer}
-	grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1)
+	grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1)
 	csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid))
 	lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt]
 	ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m)
diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
index e73c7d1..6e9eee3 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
@@ -43,7 +43,7 @@ function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false,
 	)
 	
 
-	return grid, nnts, s, onr, cfp, gi, gc, ni, rni, starts, cellparts, adepth
+	return grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, adepth
 end
 
 """

From 01cc1ee8037230c34a02b59cadc837cb18b428c2 Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Mon, 13 May 2024 04:14:40 +0200
Subject: [PATCH 15/44] remove grid dependency of ExtendableSparse.jl
 completely + minor restructuring

---
 Project.toml                                  |   1 -
 src/ExtendableSparse.jl                       |   3 +-
 .../ExtendableSparseParallel.jl               | 123 +++++++---
 .../preparatory.jl                            | 222 ++++++++++++------
 .../supersparse.jl                            | 204 ++++++++--------
 src/matrix/extendable.jl                      |   9 +
 test/rect.jl                                  | 182 ++++++++++++++
 7 files changed, 540 insertions(+), 204 deletions(-)
 create mode 100644 test/rect.jl

diff --git a/Project.toml b/Project.toml
index 8776054..36d1e03 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,7 +6,6 @@ version = "1.4.0"
 [deps]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8"
 ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 9c490ca..f927622 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -6,7 +6,6 @@ using ILUZero
 
 using Metis
 using Base.Threads
-using ExtendableGrids
 
 if  !isdefined(Base, :get_extension)
     using Requires
@@ -28,7 +27,7 @@ include("matrix/sparsematrixcsc.jl")
 include("matrix/sparsematrixlnk.jl")
 include("matrix/extendable.jl")
 
-export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse
+export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset!
 
 export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet
 
diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
index 737660d..08268ee 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
@@ -13,12 +13,6 @@ mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSpar
     """
     lnkmatrices::Vector{SuperSparseMatrixLNK{Tv, Ti}}
 
-    """
-    this is the grid on which the pde lives
-    (We do not want this dependency)
-    """
-    #grid::ExtendableGrid
-
     """
     Number of Nodes per Threads
     """
@@ -98,52 +92,57 @@ mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSpar
 end
 
 
+"""
+$(SIGNATURES)
 
-function ExtendableSparseMatrixParallel{Tv, Ti}(nm, nt, depth; x0=0.0, x1=1.0) where {Tv, Ti <: Integer}
-	grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; x0, x1)
-	csc = spzeros(Tv, Ti, num_nodes(grid), num_nodes(grid))
-	lnk = [SuperSparseMatrixLNK{Tv, Ti}(num_nodes(grid), nnts[tid]) for tid=1:nt]
+`ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct = true) where {Tv, Ti <: Integer}`
+
+Create an ExtendableSparseMatrixParallel based on a grid.
+The grid is specified by nc (number of cells), nn (number of nodes) and the `mat_cell_node` (i.e. grid[CellNodes] if ExtendableGrids is used). 
+Here, `mat_cell_node[k,i]` is the i-th node in the k-th cell.
+The matrix structure is made for parallel computations with `nt` threads.
+`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again
+`block_struct=true` means, the matrix should be reordered two have a block structure, this is necessary for parallel ILU, for `false`, the matrix is not reordered 
+"""
+function ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct = true) where {Tv, Ti <: Integer}
+	nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth, Ti; block_struct)
+	csc = spzeros(Tv, Ti, nn, nn)
+	lnk = [SuperSparseMatrixLNK{Tv, Ti}(nn, nnts[tid]) for tid=1:nt]
 	ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m)
 end
 
 
+"""
+$(SIGNATURES)
 
-function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}
-	if known_that_unknown
-		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
-		return
-	end
-	
-	if updatentryCSC2!(A.cscmatrix, i, j, v)
-	else
-		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
-	end
-end
-
+`addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}`
 
-#=
-function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer}
+`A[i,j] += v`
+This function should be used, if the thread in which the entry appears is known (`tid`).
+If the thread is not known, use `addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false)`, this function calculates `tid`.
+If you know that the entry is not yet known to the CSC structure, set `known_that_unknown=true`.
+"""
+function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}
 	if known_that_unknown
-		level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
 		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
 		return
 	end
 	
 	if updatentryCSC2!(A.cscmatrix, i, j, v)
 	else
-		level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
 		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
 	end
 end
-=#
 
 
 """
-`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=true) where {Tv, Ti <: Integer}`
+$(SIGNATURES)
+
+`addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer}`
 
 A[i,j] += v, using any partition.
 If the partition should be specified (for parallel use), use 
-`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=true) where {Tv, Ti <: Integer}`.
+`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}`.
 """
 function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer}
 	if known_that_unknown
@@ -161,7 +160,13 @@ end
 
 #---------------------------------
 
+"""
+$(SIGNATURES)
+`updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j) where {Tv, Ti <: Integer`
 
+Update element of the matrix  with operation `op`.
+Use this method if the 'thread of the element' is not known, otherwise use `updateindex!(ext, op, v, i, j, tid)`. 
+"""
 function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
                       op,
                       v,
@@ -178,6 +183,13 @@ function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
     ext
 end
 
+"""
+$(SIGNATURES)
+`updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j, tid) where {Tv, Ti <: Integer`
+
+Update element of the matrix  with operation `op`.
+Use this method if the 'thread of the element' is known, otherwise use `updateindex!(ext, op, v, i, j)`. 
+"""
 function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
                       op,
                       v,
@@ -194,6 +206,13 @@ function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
     ext
 end
 
+"""
+$(SIGNATURES)
+`rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j) where {Tv, Ti <: Integer}`
+
+Like [`updateindex!`](@ref) but without checking if v is zero.
+Use this method if the 'thread of the element' is not known.
+"""
 function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
                          op,
                          v,
@@ -209,6 +228,13 @@ function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
     ext
 end
 
+"""
+$(SIGNATURES)
+`rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j, tid) where {Tv, Ti <: Integer}`
+
+Like [`updateindex!`](@ref) but without checking if v is zero.
+Use this method if the 'thread of the element' is known
+"""
 function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
                          op,
                          v,
@@ -224,6 +250,13 @@ function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
     ext
 end
 
+"""
+$(SIGNATURES)
+``Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti}, i::Integer, j::Integer) where {Tv, Ti <: Integer`
+
+Find index in CSC matrix and return value, if it exists.
+Otherwise, return value from extension.
+"""
 function Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti},
                        i::Integer,
                        j::Integer) where {Tv, Ti <: Integer}
@@ -237,6 +270,13 @@ function Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti},
     
 end
 
+"""
+$(SIGNATURES)
+`Base.setindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, v::Union{Number,AbstractVecOrMat}, i::Integer, j::Integer) where {Tv, Ti}`
+
+Find index in CSC matrix and set value if it exists. Otherwise,
+set index in extension if `v` is nonzero.
+"""
 function Base.setindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
                         v::Union{Number,AbstractVecOrMat},
                         i::Integer,
@@ -256,16 +296,31 @@ end
 
 #------------------------------------
 
+"""
+$(SIGNATURES)
+
+Reset matrix, such that CSC and LNK have no non-zero entries.
+"""
 function reset!(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer}
 	A.cscmatrix = spzeros(Tv, Ti, A.n, A.m)
 	A.lnkmatrices = [SuperSparseMatrixLNK{Tv, Ti}(A.n, A.nnts[tid]) for tid=1:A.nt]
 end
 
+"""
+$(SIGNATURES)
+
+Compute number of non-zero elements, after flush.
+"""
 function nnz_flush(ext::ExtendableSparseMatrixParallel)
     flush!(ext)
     return nnz(ext.cscmatrix)
 end
 
+"""
+$(SIGNATURES)
+
+Compute number of non-zero elements, without flush.
+"""
 function nnz_noflush(ext::ExtendableSparseMatrixParallel)
     return nnz(ext.cscmatrix), sum([ext.lnkmatrices[i].nnz for i=1:ext.nt])
 end
@@ -279,7 +334,11 @@ function matrixvaluetype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, T
 end
 
 
+"""
+$(SIGNATURES)
 
+Show matrix, without flushing
+"""
 function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrixParallel)
     #flush!(ext)
     xnnzCSC, xnnzLNK = nnz_noflush(ext)
@@ -321,7 +380,11 @@ function entryexists2(CSC, i, j) #find out if CSC already has an nonzero entry a
 	i in view(CSC.rowval, CSC.colptr[j]:(CSC.colptr[j+1]-1))
 end
 
+"""
+$(SIGNATURES)
 
+Find out if i,j is non-zero entry in CSC, if yes, update entry with += v and return `true`, if not return `false`
+"""
 function updatentryCSC2!(CSC::SparseArrays.SparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer, v) where {Tv, Ti <: Integer}
 	p1 = CSC.colptr[j]
 	p2 = CSC.colptr[j+1]-1
@@ -347,6 +410,7 @@ include("struct_flush.jl")
 import LinearAlgebra.mul!
 
 """
+$(SIGNATURES)
 ```function LinearAlgebra.mul!(y, A, x)```
 
 This overwrites the mul! function for A::ExtendableSparseMatrixParallel
@@ -361,6 +425,7 @@ end
 
 
 """
+$(SIGNATURES)
 ```function matvec!(y, A, x)```
 
 y <- A*x, where y and x are vectors and A is an ExtendableSparseMatrixParallel
diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
index 6e9eee3..fe5686f 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
@@ -1,5 +1,5 @@
 """
-`function preparatory_multi_ps_less_reverse(nm, nt, depth)`
+`function preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth)`
 
 `nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid).
 `nt` is the number of threads.
@@ -8,13 +8,15 @@ To assemble the system matrix parallely, things such as `cellsforpart` (= which
 
 This should be somewhere else, longterm
 """
-function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false, assembly=:cellwise, x0=0.0, x1=1.0, minsize_sepa=10, do_print=false, check_partition=false)
-	grid = getgrid(nm; x0, x1)
+function preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth, Ti; 
+										   sequential=false, assembly=:cellwise, 
+										   minsize_sepa=10, do_print=false, check_partition=false, ne=0, ce=[], mat_edge_node=[], block_struct=true)
+	#grid = getgrid(nm; x0, x1)
 	adepth = 0
 	if sequential
-		(allcells, start, cellparts, adepth) = grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa, do_print)#)
+		(allcells, start, cellparts, adepth) = grid_to_graph_cellwise_nogrid!(mat_cell_node, nc, nn, nt, depth; minsize_sepa, do_print)#)
 	else
-		(allcells, start, cellparts, adepth) = grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa, do_print)
+		(allcells, start, cellparts, adepth) = grid_to_graph_cellwise_par_nogrid!(mat_cell_node, nc, nn, nt, depth; minsize_sepa, do_print)
 	end
 
 	if (adepth != depth) && do_print
@@ -26,26 +28,32 @@ function preparatory_multi_ps_less_reverse(nm, nt, depth, Ti; sequential=false,
 		cfp = bettercellsforpart(cellparts, depth*nt+1)
 
 	else
-		edgeparts = edgewise_partition_from_cellwise_partition(grid, cellparts)
+		edgeparts = edgewise_partition_from_cellwise_partition(nc, ne, ce, cellparts)
 		cfp = bettercellsforpart(edgeparts, depth*nt+1)
 	end
 
 		
 	if check_partition
-		validate_partition(grid, cellparts, start, allcells, nt, depth, assembly)
+		if assembly == :cellwise
+			validate_partition(nn, mat_cell_node, cellparts, start, allcells, nt, depth, assembly)
+		else
+			validate_partition(nn, mat_edge_node, cellparts, start, allcells, nt, depth, assembly)
+		end
 	end
 
 	#@info length.(cfp)
 	#@info minimum(cellparts), maximum(cellparts), nt, depth
 
-	(nnts, s, onr, gi, gc, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(
-		cellparts, allcells, start, num_nodes(grid), Ti, nt, depth
+	(nnts, s, onr, gi, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(
+		cellparts, allcells, start, nn, Ti, nt, depth; block_struct
 	)
 	
 
-	return grid, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, adepth
+	return nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, adepth
 end
 
+
+
 """
 `function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)`
 
@@ -56,8 +64,9 @@ Furthermore, `nnts` (number of nodes of the threads) is computed, which contain
 `nn` is the number of nodes in the grid.
 `Ti` is the type (Int64,...) of the elements in the created arrays.
 `nt` is the number of threads.
+`block_struct=true` means, the matrix should be reordered two have a block structure, this is necessary for parallel ILU, for `false`, the matrix is not reordered 
 """
-function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt, depth)
+function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt, depth; block_struct = true)
 		
 	#num_matrices = maximum(cellregs)
 	#depth = Int(floor((num_matrices-1)/nt))
@@ -119,6 +128,12 @@ function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_r
 	end
 	starts .+= 1
 	
+	if !block_struct
+		new_indices = collect(1:nn)
+		rev_new_indices = collect(1:nn)
+		starts = []
+	end
+
 	# Build sortednodesperthread and globalindices array:
 	# They are inverses of each other: globalindices[tid][sortednodeperthread[tid][j]] = j
 	# Note that j has to be a `new index`
@@ -147,7 +162,7 @@ function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_r
 		end
 	end
 	
-	nnts, sortednodesperthread, old_noderegions, globalindices, gictrs, new_indices, rev_new_indices, starts
+	nnts, sortednodesperthread, old_noderegions, globalindices, new_indices, rev_new_indices, starts
 end
 
 
@@ -165,7 +180,7 @@ This function partitons the separator, which is done if `depth`>1 (see `grid_to_
 `level0` is the separator-partitoning level, if the (first) separator is partitioned, level0 = 1, in the next iteration, level0 = 2...
 `preparatory_multi_ps` is the number of separator-cells.
 """
-function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes, ri, gi, do_print)
+function separate!(cellregs, ACSC, nt, level0, ctr_sepanodes, ri, gi, do_print)
 	# current number of cells treated
     nc2 = size(ACSC, 1)
 
@@ -236,34 +251,36 @@ function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes, ri, gi, do_pri
 end
 
 
+
 """
-`function grid_to_graph_ps_multi!(grid, nt, depth)`
+`function grid_to_graph_ps_multi_nogrid!(nc, nn, mat_cell_node, nt, depth)`
 
 The function assigns colors/partitons to each cell in the `grid`. First, the grid is partitoned into `nt` partitions. If `depth` > 1, the separator is partitioned again...
-`grid` is a simplexgrid. 
+The grid is specified by nc (number of cells), nn (number of nodes) and the mat_cell_node (i.e. grid[CellNodes] if ExtendableGrids is used). 
+Here, `mat_cell_node[k,i]` is the i-th node in the k-th cell. 
 `nt` is the number of threads.
 `depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices...
 """
-function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=false)
-	A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid))
-	number_cells_per_node = zeros(Int64, num_nodes(grid))
-	for j=1:num_cells(grid)
-		for node_id in grid[CellNodes][:,j]
+function grid_to_graph_cellwise_nogrid!(nc, nn, mat_cell_node, nt, depth; minsize_sepa=10, do_print=false)
+	A = SparseMatrixLNK{Int64, Int64}(nc, nc)
+	number_cells_per_node = zeros(Int64, nn)
+	for j=1:nc
+		for node_id in mat_cell_node[:,j]
 			number_cells_per_node[node_id] += 1
 		end
 	end
 	allcells = zeros(Int64, sum(number_cells_per_node))
-	start = ones(Int64, num_nodes(grid)+1)
+	start = ones(Int64, nn+1)
 	start[2:end] += cumsum(number_cells_per_node)
 	number_cells_per_node .= 0
-	for j=1:num_cells(grid)
-		for node_id in grid[CellNodes][:,j]
+	for j=1:nc
+		for node_id in mat_cell_node[:,j]
 			allcells[start[node_id] + number_cells_per_node[node_id]] = j
 			number_cells_per_node[node_id] += 1
 		end
 	end
 
-	for j=1:num_nodes(grid)
+	for j=1:nn
 		cells = @view allcells[start[j]:start[j+1]-1]
 		for (i,id1) in enumerate(cells)
 			for id2 in cells[i+1:end]
@@ -278,12 +295,12 @@ function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=fals
 	partition = Metis.partition(ACSC, nt)
 	cellregs  = copy(partition)
 	
-	sn = Vector{Int64}(undef, num_cells(grid))
-	gi = Vector{Int64}(undef, num_cells(grid))
+	sn = Vector{Int64}(undef, nc)
+	gi = Vector{Int64}(undef, nc)
 	ctr_sepanodes = 0
     
     for tid=1:nt
-        for j=1:num_cells(grid)
+        for j=1:nc
             if cellregs[j] == tid
                 rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
                 if how_many_different_below(cellregs[rows], nt+1) > 1 
@@ -306,7 +323,7 @@ function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=fals
     RART = copy(ACSC)
     actual_depth = 1
 	for level=1:depth-1
-		RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+		RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print)
         actual_depth += 1
 		if ctr_sepanodes < minsize_sepa
 			break
@@ -316,13 +333,18 @@ function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=fals
     return allcells, start, cellregs, actual_depth
 end
 
-function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print=false)
-	As = [ExtendableSparseMatrix{Int64, Int64}(num_cells(grid), num_cells(grid)) for tid=1:nt]
-	number_cells_per_node = zeros(Int64, num_nodes(grid))
+
+"""
+`function grid_to_graph_ps_multi_par_nogrid!(nc, nn, mat_cell_node, nt, depth)`
+
+Same result as `grid_to_graph_ps_multi_nogrid!`, but computed on multiple threads.
+"""
+function grid_to_graph_cellwise_par_nogrid!(cn, nc, nn, nt, depth; minsize_sepa=10, do_print=false)
+	As = [ExtendableSparseMatrix{Int64, Int64}(nc, nc) for tid=1:nt]
+	number_cells_per_node = zeros(Int64, nn)
 	
-	cn = grid[CellNodes]
 	
-	for j=1:num_cells(grid)
+	for j=1:nc
 		tmp = view(cn, :, j)
 		for node_id in tmp
 			number_cells_per_node[node_id] += 1
@@ -331,11 +353,11 @@ function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print=
 		
 	
 	allcells = zeros(Int64, sum(number_cells_per_node))
-	start = ones(Int64, num_nodes(grid)+1)
+	start = ones(Int64, nn+1)
 	start[2:end] += cumsum(number_cells_per_node)
 	number_cells_per_node .= 0
 	
-	for j=1:num_cells(grid)
+	for j=1:nc
 		tmp = view(cn, :, j)
 		for node_id in tmp
 			allcells[start[node_id] + number_cells_per_node[node_id]] = j
@@ -343,7 +365,7 @@ function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print=
 		end
 	end
 
-	node_range = get_starts(num_nodes(grid), nt)
+	node_range = get_starts(nn, nt)
 	Threads.@threads for tid=1:nt
 		for j in node_range[tid]:node_range[tid+1]-1
 			cells = @view allcells[start[j]:start[j+1]-1]
@@ -363,11 +385,11 @@ function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print=
 	
 	cellregs = Metis.partition(ACSC, nt)
 	
-	sn = [Vector{Int64}(undef, Int(ceil(num_cells(grid)/nt))) for tid=1:nt]
+	sn = [Vector{Int64}(undef, Int(ceil(nc/nt))) for tid=1:nt]
 	ctr_sepanodess = zeros(Int64, nt)
     
     @threads for tid=1:nt
-        for j=1:num_cells(grid)
+        for j=1:nc
             if cellregs[j] == tid
                 rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
                 if how_many_different_below(cellregs[rows], nt+1) > 1 
@@ -393,7 +415,7 @@ function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print=
     RART = ACSC
     actual_depth = 1
 	for level=1:depth-1
-		RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+		RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print)
         actual_depth += 1
 		if ctr_sepanodes < minsize_sepa
 			break
@@ -405,6 +427,80 @@ function grid_to_graph_cellwise_par!(grid, nt, depth; minsize_sepa=10, do_print=
     return allcells, start, cellregs, actual_depth
 end
 
+"""
+function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=false)
+	A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid))
+	number_cells_per_node = zeros(Int64, num_nodes(grid))
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			number_cells_per_node[node_id] += 1
+		end
+	end
+	allcells = zeros(Int64, sum(number_cells_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_cells_per_node)
+	number_cells_per_node .= 0
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			allcells[start[node_id] + number_cells_per_node[node_id]] = j
+			number_cells_per_node[node_id] += 1
+		end
+	end
+
+	for j=1:num_nodes(grid)
+		cells = @view allcells[start[j]:start[j+1]-1]
+		for (i,id1) in enumerate(cells)
+			for id2 in cells[i+1:end]
+				A[id1,id2] = 1
+				A[id2,id1] = 1
+			end
+		end	
+	end
+
+	ACSC = SparseArrays.SparseMatrixCSC(A)
+	
+	partition = Metis.partition(ACSC, nt)
+	cellregs  = copy(partition)
+	
+	sn = Vector{Int64}(undef, num_cells(grid))
+	gi = Vector{Int64}(undef, num_cells(grid))
+	ctr_sepanodes = 0
+    
+    for tid=1:nt
+        for j=1:num_cells(grid)
+            if cellregs[j] == tid
+                rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
+                if how_many_different_below(cellregs[rows], nt+1) > 1 
+                    cellregs[j] = nt+1 #+ctr_sepanodes
+                    ctr_sepanodes += 1
+                    sn[ctr_sepanodes] = j
+                    gi[ctr_sepanodes] = j
+                end
+            end
+        end
+	end
+
+    sn = sn[1:ctr_sepanodes]
+    gi = gi[1:ctr_sepanodes]
+    
+    if do_print
+        @info "At level (1), we found ctr_sepanodes cells that have to be treated in the next iteration!"
+    end
+
+    RART = copy(ACSC)
+    actual_depth = 1
+	for level=1:depth-1
+		RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+        actual_depth += 1
+		if ctr_sepanodes < minsize_sepa
+			break
+		end
+	end
+        
+    return allcells, start, cellregs, actual_depth
+end
+
+
 function grid_to_graph_edgewise!(grid, nt, depth; minsize_sepa=10, do_print=false)
 	ce = grid[CellEdges]
     A = SparseMatrixLNK{Int64, Int64}(num_edges(grid), num_edges(grid))
@@ -465,13 +561,13 @@ function grid_to_graph_edgewise!(grid, nt, depth; minsize_sepa=10, do_print=fals
     gi = gi[1:ctr_sepanodes]
     
     if do_print
-        @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!"
+        @info "At level (1), we found ctr_sepanodes cells that have to be treated in the next iteration!"
     end
     
     RART = copy(ACSC)
     actual_depth = 1
 	for level=1:depth-1
-		RART, ctr_sepanodes, sn, gi = separate!(edgeregs, num_edges(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+		RART, ctr_sepanodes, sn, gi = separate!(edgeregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print)
         actual_depth += 1
 		if ctr_sepanodes < minsize_sepa
 			break
@@ -555,13 +651,13 @@ function grid_to_graph_edgewise_par!(grid, nt, depth; minsize_sepa=10, do_print=
     gi = copy(sn)
 
     if do_print
-        @info "At level $(1), we found $ctr_sepanodes edges that have to be treated in the next iteration!"
+        @info "At level (1), we found ctr_sepanodes edges that have to be treated in the next iteration!"
     end
 
     RART = ACSC
     actual_depth = 1
 	for level=1:depth-1
-		RART, ctr_sepanodes, sn, gi = separate!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+		RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print)
         actual_depth += 1
 		if ctr_sepanodes < minsize_sepa
 			break
@@ -572,17 +668,13 @@ function grid_to_graph_edgewise_par!(grid, nt, depth; minsize_sepa=10, do_print=
     #grid
     return alledges, start, cellregs, actual_depth
 end
+"""
 
+function edgewise_partition_from_cellwise_partition(nc, ne, ce, cellregs)
+	#ce = grid[CellEdges]
+	edgeregs = maximum(cellregs)*ones(Int64, ne)
 
-function edgewise_partition_from_cellwise_partition(grid, cellregs)
-	ce = grid[CellEdges]
-	if num_edges(grid) == 0
-		grid[EdgeNodes]
-	end
-	
-	edgeregs = maximum(cellregs)*ones(Int64, num_edges(grid))
-
-	for icell=1:num_cells(grid)
+	for icell=1:nc
 		tmp = cellregs[icell]
 		for iedge in ce[:,icell]
 			if tmp < edgeregs[iedge]
@@ -656,27 +748,7 @@ function bettercellsforpart(xx, upper)
 	cfp
 end
 
-"""
-`function getgrid(nm)`
 
-Returns a simplexgrid with a given number of nodes in each dimension.
-`nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid).
-"""
-function getgrid(nm; x0=0.0, x1=1.0)
-	if length(nm) == 2
-		n,m = nm
-		xx = collect(LinRange(x0, x1, n))
-		yy = collect(LinRange(x0, x1, m))
-		grid = simplexgrid(xx, yy)
-	else 
-		n,m,l = nm
-		xx = collect(LinRange(x0, x1, n))
-		yy = collect(LinRange(x0, x1, m))
-		zz = collect(LinRange(x0, x1, l))
-		grid = simplexgrid(xx, yy, zz)
-	end
-	grid
-end
 
 function get_starts(n, nt)
 	ret = ones(Int64, nt+1)
@@ -833,7 +905,7 @@ function check_partition(nm, nt, depth)
 end
 =#
 
-function validate_partition(grid, cellregs, start, allcells, nt, depth, assemblytype)
+function validate_partition(nn, mat, cellregs, start, allcells, nt, depth, assemblytype)
 	violation_ctr = 0
 
 	if assemblytype == :cellwise
@@ -842,7 +914,7 @@ function validate_partition(grid, cellregs, start, allcells, nt, depth, assembly
 		key = EdgeNodes
 	end
 
-	for j=1:num_nodes(grid)
+	for j=1:nn
 		cells = @view allcells[start[j]:start[j+1]-1]
 		sortedcellregs = unique(sort(cellregs[cells]))
 		levels         = Int.(ceil.(sortedcellregs/nt))
@@ -860,7 +932,7 @@ function validate_partition(grid, cellregs, start, allcells, nt, depth, assembly
 					loc = findall(x->x==4, Int.(ceil.(cellregs[allcells[start[j]:start[j+1]-1]]/nt)))
 					cells_at_level4 = allcells[loc.+(start[j]-1)]
 					@info cells_at_level4, cellregs[cells_at_level4]
-					@info grid[key][:,cells_at_level4[1]], grid[key][:,cells_at_level4[2]]
+					@info mat[:,cells_at_level4[1]], mat[:,cells_at_level4[2]]
 				end
 			end
 		end
diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
index 00b397d..4004b0d 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
@@ -399,103 +399,13 @@ function print_col(col, coll)
 	@info v
 end
 
-function plus(lnk::SparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer}
-	if lnk.nnz == 0
-		return csc
-	elseif length(csc.rowval) == 0
-		return SparseMatrixCSC(lnk)
-	else
-		return lnk + csc
-	end
-end
-
-function plus(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer}
-	gi = collect(1:csc.n)
-	
-	
-	supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]]
-	sortedcolumnids    = sortperm(supersparsecolumns)
-	sortedcolumns      = supersparsecolumns[sortedcolumnids]
-	#sortedcolumns      = vcat([1], sortedcolumns)
-	sortedcolumns      = vcat(sortedcolumns, [csc.n+1])
-	
-	col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m]
-	
-	#@info sortedcolumnids 
-	
-	nnz_sum   = length(csc.rowval) + lnk.nnz
-	colptr    = Vector{Ti}(undef, csc.n+1)
-	rowval    = Vector{Ti}(undef, nnz_sum)
-	nzval     = Vector{Tv}(undef, nnz_sum)
-	colptr[1] = one(Ti)
-	
-	#first part: columns between 1 and first column of lnk
-	
-	colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1])
-	rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1)
-	nzval[1:csc.colptr[sortedcolumns[1]]-1]  = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1)
-	
-	numshifts = 0
-	
-	for J=1:length(sortedcolumns)-1
-		#@info ">>>>>>> $J <<<<<<<<<<<<<<<"
-		# insert new added column here / dummy
-		i = sortedcolumns[J]
-		coll = get_column!(col, lnk, i)
-		#print_col(col, coll)
-		
-		nns       = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1)
-		
-		numshifts += nns
-		#j = colptr[i] #sortedcolumns[J]] 
-		#rowval[j] = J
-		#nzval[j]  = J
-		# insertion end
-		
-		#colptr[i+1] = colptr[i] + csc.colptr[i+1]-csc.colptr[i] + numshifts
-		
-		#a = i+1
-		#b = sortedcolumns[J+1]
-		#@info a, b
-		
-		
-		#colptr[i+1:sortedcolumns[J+1]] = (csc.colptr[i+1:sortedcolumns[J+1]]-csc.colptr[i:sortedcolumns[J+1]-1]).+(colptr[i] + nns)
-		
-		colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns)
-		
-		
-		rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
-		nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1]  = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
-		
-		
-		#=
-		
-		@info csc.colptr[a:b]
-		
-		colptr[a:b] = csc.colptr[a:b].+numshifts
-		
-		#colptr[i+2:sortedcolumns[J+1]] = csc.colptr[i+2:sortedcolumns[J+1]].+numshifts
-		@info i, J, colptr[i+2], colptr[sortedcolumns[J+1]], csc.colptr[i+2], csc.colptr[sortedcolumns[J+1]]
-		@info i, J, colptr[a], colptr[b], csc.colptr[a], csc.colptr[b]
-		rowval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.rowval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]])
-		nzval[colptr[i+2]:colptr[sortedcolumns[J+1]]]  = view(csc.nzval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]])
-		#rowval[colptrsortedcolumns[J+1]]
-		=#
-	end
-	
-	#@info colptr
-	
-	resize!(rowval, length(csc.rowval)+numshifts)
-	resize!(nzval, length(csc.rowval)+numshifts)
-	
-	
-	SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval)
-
-		
-
-end
 
+"""
+$(SIGNATURES)
 
+Add the matrices `lnks` of type SuperSparseMatrixLNK onto the SparseMatrixCSC `csc`.
+`gi[i]` maps the indices in `lnks[i]` to the indices of `csc`.
+"""
 function plus_remap(lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Vector{Ti}}; keep_zeros=true) where {Tv, Ti <: Integer}
 	nt = length(lnks)
 
@@ -605,7 +515,12 @@ function plus_remap(lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, csc::SparseArray
 end
 
 
+"""
+$(SIGNATURES)
 
+Add the SuperSparseMatrixLNK `lnk` onto the SparseMatrixCSC `csc`.
+`gi` maps the indices in `lnk` to the indices of `csc`.
+"""
 function plus_remap(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Ti}) where {Tv, Ti <: Integer}
 
 	#@info lnk.collnk[1:lnk.colctr]
@@ -677,7 +592,103 @@ function plus_remap(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseM
 end
 
 
+"""
+
+function plus(lnk::SparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer}
+	if lnk.nnz == 0
+		return csc
+	elseif length(csc.rowval) == 0
+		return SparseMatrixCSC(lnk)
+	else
+		return lnk + csc
+	end
+end
+
+function plus(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer}
+	gi = collect(1:csc.n)
+	
+	
+	supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]]
+	sortedcolumnids    = sortperm(supersparsecolumns)
+	sortedcolumns      = supersparsecolumns[sortedcolumnids]
+	#sortedcolumns      = vcat([1], sortedcolumns)
+	sortedcolumns      = vcat(sortedcolumns, [csc.n+1])
+	
+	col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m]
+	
+	#@info sortedcolumnids 
+	
+	nnz_sum   = length(csc.rowval) + lnk.nnz
+	colptr    = Vector{Ti}(undef, csc.n+1)
+	rowval    = Vector{Ti}(undef, nnz_sum)
+	nzval     = Vector{Tv}(undef, nnz_sum)
+	colptr[1] = one(Ti)
+	
+	#first part: columns between 1 and first column of lnk
+	
+	colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1])
+	rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1)
+	nzval[1:csc.colptr[sortedcolumns[1]]-1]  = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1)
+	
+	numshifts = 0
+	
+	for J=1:length(sortedcolumns)-1
+		#@info ">>>>>>> J <<<<<<<<<<<<<<<"
+		# insert new added column here / dummy
+		i = sortedcolumns[J]
+		coll = get_column!(col, lnk, i)
+		#print_col(col, coll)
+		
+		nns       = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1)
+		
+		numshifts += nns
+		#j = colptr[i] #sortedcolumns[J]] 
+		#rowval[j] = J
+		#nzval[j]  = J
+		# insertion end
+		
+		#colptr[i+1] = colptr[i] + csc.colptr[i+1]-csc.colptr[i] + numshifts
+		
+		#a = i+1
+		#b = sortedcolumns[J+1]
+		#@info a, b
+		
+		
+		#colptr[i+1:sortedcolumns[J+1]] = (csc.colptr[i+1:sortedcolumns[J+1]]-csc.colptr[i:sortedcolumns[J+1]-1]).+(colptr[i] + nns)
+		
+		colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns)
+		
+		
+		rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
+		nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1]  = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
+		
+		
+		#=
+		
+		@info csc.colptr[a:b]
+		
+		colptr[a:b] = csc.colptr[a:b].+numshifts
+		
+		#colptr[i+2:sortedcolumns[J+1]] = csc.colptr[i+2:sortedcolumns[J+1]].+numshifts
+		@info i, J, colptr[i+2], colptr[sortedcolumns[J+1]], csc.colptr[i+2], csc.colptr[sortedcolumns[J+1]]
+		@info i, J, colptr[a], colptr[b], csc.colptr[a], csc.colptr[b]
+		rowval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.rowval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]])
+		nzval[colptr[i+2]:colptr[sortedcolumns[J+1]]]  = view(csc.nzval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]])
+		#rowval[colptrsortedcolumns[J+1]]
+		=#
+	end
+	
+	#@info colptr
+	
+	resize!(rowval, length(csc.rowval)+numshifts)
+	resize!(nzval, length(csc.rowval)+numshifts)
+	
+	
+	SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval)
 
+		
+
+end
 
 function plus_loop(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer}
 	gi = collect(1:csc.n)
@@ -738,7 +749,6 @@ function plus_loop(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMa
 end
 
 
-
 function twodisjointsets(n, k)
 	A = rand(1:n, k)
 	B = zeros(Int64, k)
@@ -767,7 +777,7 @@ function distinct(x, n)
 	end
 	y
 end 
-
+"""
 
 function mean(x)
 	sum(x)/length(x)
diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl
index 37cc015..abcd04a 100644
--- a/src/matrix/extendable.jl
+++ b/src/matrix/extendable.jl
@@ -313,6 +313,15 @@ function flush!(ext::ExtendableSparseMatrix)
     end
     return ext
 end
+"""
+$(SIGNATURES)
+
+Reset ExtenableSparseMatrix into state similar to that after creation.
+"""
+function reset!(A::ExtendableSparseMatrix)
+    A.cscmatrix=spzeros(size(A)...)
+    A.lnkmatrix=nothing
+end
 
 """
 $(SIGNATURES)
diff --git a/test/rect.jl b/test/rect.jl
new file mode 100644
index 0000000..c502fc6
--- /dev/null
+++ b/test/rect.jl
@@ -0,0 +1,182 @@
+"""
+`test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)`
+
+Measure and output times for build and update for a rectangle grid with `n * n` cells.
+Calculations are done on `nt` threads (`nt` >= 1).
+Returns the assembled matrix.
+"""
+function test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)
+    m = n
+    lindexes = LinearIndices((1:n,1:m))
+    mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti)
+    if nt > 1
+        A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false)
+    else
+        A = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m)
+    end
+
+    X = collect(1:n) #LinRange(0,1,n)
+    Y = collect(1:n) #LinRange(0,1,m)
+
+    #Build
+    times_build = zeros(k)
+    for i=1:k
+        ExtendableSparse.reset!(A)
+        times_build[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false)
+    end
+
+
+
+    #update
+    times_update = zeros(k)
+    for i=1:k
+        times_update[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=true)
+    end
+
+    @info "TIMES:  MIN,  AVG,  MAX"
+    info_minmax(times_build, "build ")
+    info_minmax(times_update, "update")
+    
+    A
+end
+
+"""
+`generate_rectangle_grid(lindexes, Ti)`
+
+Generate a rectangle grid (i.e. a CellNodes matrix) based on LinerIndices
+"""
+function generate_rectangle_grid(lindexes, Ti)
+    n,m = size(lindexes)
+    nn = n*m # num nodes
+    nc = (n-1)*(m-1)
+    #lindexes=LinearIndices((1:n,1:m))
+
+    mat_cell_node = zeros(Ti, 4, nc)
+
+    # links oben, rechts oben, rechts unten, links unten
+    cell_id = 1
+    for ir in 1:n-1
+        for jr in 1:m-1
+            mat_cell_node[1,cell_id] = lindexes[ir,jr]
+            mat_cell_node[2,cell_id] = lindexes[ir,jr+1]
+            mat_cell_node[3,cell_id] = lindexes[ir+1,jr+1]
+            mat_cell_node[4,cell_id] = lindexes[ir+1,jr]
+            cell_id += 1
+        end
+    end
+
+
+    mat_cell_node, nc, nn
+
+end
+
+function info_minmax(x, name; digits=3)
+    n = length(x)
+    @info name*" $(round(minimum(x),digits=digits)), $(round(sum(x)/n,digits=digits)), $(round(maximum(x),digits=digits))"
+end
+
+"""
+Assembly functions for ExtendableSparseMatrixParallel
+"""
+function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell_node, X, Y; d=0.1, set_CSC_zero=true) where {Tv, Ti <: Integer}
+    if set_CSC_zero
+        A.cscmatrix.nzval .= 0
+    end
+
+    for level=1:A.depth
+        Threads.@threads for tid=1:A.nt
+            for cell in A.cellsforpart[(level-1)*A.nt+tid]
+                assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, tid)
+            end
+        end
+    end
+
+    for cell in A.cellsforpart[A.depth*A.nt+1]
+        assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, 1)
+    end
+
+    nnzCSC, nnzLNK = ExtendableSparse.nnz_noflush(A)
+    if nnzCSC > 0 && nnzLNK > 0	
+        flush!(A; do_dense=false)
+        #sparse flush
+    elseif nnzCSC == 0 && nnzLNK > 0
+        flush!(A; do_dense=true)
+        #dense flush
+    end 
+end
+
+function assembleedge!(A::ExtendableSparseMatrixParallel{Tv, Ti},v,k,l,tid) where {Tv, Ti <: Integer}
+    ExtendableSparse.addtoentry!(A, k, k, tid, +v)
+    ExtendableSparse.addtoentry!(A, k, l, tid, -v)
+    ExtendableSparse.addtoentry!(A, l, k, tid, -v)
+    ExtendableSparse.addtoentry!(A, l, l, tid, +v)
+end
+
+function assemblecell!(A::ExtendableSparseMatrixParallel{Tv, Ti},n,m,mcn,X,Y,d,cell,tid) where {Tv, Ti <: Integer}
+    ij00=mcn[1,cell]
+    ij10=mcn[2,cell]
+    ij11=mcn[3,cell]
+    ij01=mcn[4,cell]
+    
+    ix = (cell-1)%n+1
+    iy = Int64(ceil(cell/n))
+
+    hx=X[ix+1]-X[ix]
+    hy=Y[iy+1]-Y[iy]
+    
+    assembleedge!(A,0.5*hx/hy,ij00,ij01,tid)
+    assembleedge!(A,0.5*hx/hy,ij10,ij11,tid)
+    assembleedge!(A,0.5*hy/hx,ij00,ij10,tid)
+    assembleedge!(A,0.5*hy/hx,ij01,ij11,tid)
+    v=0.25*hx*hy
+    ExtendableSparse.addtoentry!(A, ij00, ij00, tid, v*d)
+    ExtendableSparse.addtoentry!(A, ij01, ij01, tid, v*d)
+    ExtendableSparse.addtoentry!(A, ij10, ij10, tid, v*d)
+    ExtendableSparse.addtoentry!(A, ij11, ij11, tid, v*d)
+end
+
+
+
+"""
+Assembly functions for ExtendableSparseMatrix
+"""
+function assemble_ESMP(A::ExtendableSparseMatrix{Tv, Ti}, n, m, mat_cell_node, X, Y; d=0.1, set_CSC_zero=true) where {Tv, Ti <: Integer}
+    if set_CSC_zero
+        A.cscmatrix.nzval .= 0
+    end
+    nc = size(mat_cell_node,2)
+    for cell=1:nc
+        assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell)
+    end
+    ExtendableSparse.flush!(A) 
+end
+
+function assembleedge!(A::ExtendableSparseMatrix{Tv, Ti},v,k,l) where {Tv, Ti <: Integer}
+    A[k,k]+=v
+    A[k,l]-=v
+    A[l,k]-=v
+    A[l,l]+=v
+end
+
+function assemblecell!(A::ExtendableSparseMatrix{Tv, Ti},n,m,mcn,X,Y,d,cell) where {Tv, Ti <: Integer}
+    ij00=mcn[1,cell]
+    ij10=mcn[2,cell]
+    ij11=mcn[3,cell]
+    ij01=mcn[4,cell]
+    
+    ix = (cell-1)%n+1
+    iy = Int64(ceil(cell/n))
+
+    hx=X[ix+1]-X[ix]
+    hy=Y[iy+1]-Y[iy]
+    
+    assembleedge!(A,0.5*hx/hy,ij00,ij01)
+    assembleedge!(A,0.5*hx/hy,ij10,ij11)
+    assembleedge!(A,0.5*hy/hx,ij00,ij10)
+    assembleedge!(A,0.5*hy/hx,ij01,ij11)
+    v=0.25*hx*hy
+    A[ij00,ij00]+=v*d
+    A[ij01,ij01]+=v*d
+    A[ij10,ij10]+=v*d
+    A[ij11,ij11]+=v*d
+end
\ No newline at end of file

From 8d56f995ab51ad41043878d1bf716196be3cc773 Mon Sep 17 00:00:00 2001
From: Johannes Taraz <johannes.taraz@gmail.com>
Date: Tue, 14 May 2024 19:12:47 +0200
Subject: [PATCH 16/44] add comment on collnk

---
 src/matrix/ExtendableSparseMatrixParallel/supersparse.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
index 4004b0d..ece23bd 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
@@ -50,7 +50,9 @@ mutable struct SuperSparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{T
     """
     nzval::Vector{Tv}
     
-	
+	"""
+	(Unsorted) list of all columns with non-zero entries
+	"""
     collnk::Vector{Ti}
     
 	# counts the number of columns in use

From 1fa752d739a18a629a67f2fd70d30c1c8a0a3fb4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 13 May 2024 21:44:24 +0200
Subject: [PATCH 17/44] some better explanation of parallel test tools

---
 test/parallel_testtools.jl | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/test/parallel_testtools.jl b/test/parallel_testtools.jl
index 2a92521..126edf0 100644
--- a/test/parallel_testtools.jl
+++ b/test/parallel_testtools.jl
@@ -1,4 +1,4 @@
-using ChunkSplitters
+import ChunkSplitters
 # Methods to test parallel assembly
 # Will eventually become part of the package.
 
@@ -9,12 +9,12 @@ Return colored partitioing of grid made up by `X` and `Y`  for work with `max(nt
 as a vector `p` of a vector pairs of index ranges such that `p[i]` containes partions
 of color i which can be assembled independently.
 
-The current algorithm 
+The current algorithm  creates `nt^2` partitions with `nt` colors.
 """
 function part2d(X,Y, nt)
     nt=max(4,nt)
-    XP=collect(chunks(1:length(X)-1,n=nt))
-    YP=collect(chunks(1:length(Y)-1,n=nt))
+    XP=collect(ChunkSplitters.chunks(1:length(X)-1,n=nt))
+    YP=collect(ChunkSplitters.chunks(1:length(Y)-1,n=nt))
     partitions = [Tuple{StepRange{Int64}, StepRange{Int64}}[] for i = 1:nt]
     ipart=1
     col=1
@@ -28,7 +28,12 @@ function part2d(X,Y, nt)
     partitions
 end
 
+"""
+    showgrid(Makie, ColorSchemes, X,Y,nt)
 
+Show grid partitioned according to [`part2d`](@ref). Needs a makie variant and ColorSchemes
+to be passed as modules.
+"""
 function showgrid(Makie, ColorSchemes, X,Y,nt)
     f = Makie.Figure()
     ax = Makie.Axis(f[1, 1]; aspect = 1)
@@ -109,7 +114,7 @@ function assemblepartition!(A,lindexes,X,Y,xp,yp,d)
 end
 
 """
-    partassemble!(A,N,np=1;xrange=(0,1),yrange=(0,1), d=0.1)
+    partassemble!(A,N,nt=1;xrange=(0,1),yrange=(0,1), d=0.1)
 
 Partitioned, cellwise, multithreaded assembly of finite difference matrix for
 ` -Δu + d*u=f` with homogeneous Neumann bc on grid  set up by coordinate vectors

From 493bec2fd1d987402f538f5bd7dc5083968dbe56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Thu, 16 May 2024 22:52:13 +0200
Subject: [PATCH 18/44] reorganized: put developing stuff into Experimental

---
 Project.toml                                  |  1 +
 src/ExtendableSparse.jl                       | 23 ++-----
 src/experimental/Experimental.jl              | 41 ++++++++++++
 .../experimental}/parallel_testtools.jl       | 16 ++---
 src/factorizations/factorizations.jl          | 36 -----------
 src/matrix/extendable.jl                      | 41 +++++++++---
 ...t_parallel.jl => experimental_parallel.jl} |  5 +-
 test/{rect.jl => experimental_rect.jl}        | 62 ++++++++++++++++---
 8 files changed, 140 insertions(+), 85 deletions(-)
 create mode 100644 src/experimental/Experimental.jl
 rename {test => src/experimental}/parallel_testtools.jl (92%)
 rename test/{test_parallel.jl => experimental_parallel.jl} (97%)
 rename test/{rect.jl => experimental_rect.jl} (74%)

diff --git a/Project.toml b/Project.toml
index 36d1e03..f3d1ef7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,6 +5,7 @@ version = "1.4.0"
 
 [deps]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
+ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index f927622..5bdc442 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -4,8 +4,6 @@ using LinearAlgebra
 using Sparspak
 using ILUZero
 
-using Metis
-using Base.Threads
 
 if  !isdefined(Base, :get_extension)
     using Requires
@@ -31,41 +29,28 @@ export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawup
 
 export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet
 
-
-#@warn "ESMP!"
-include("matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl")
-
-
-
-include("factorizations/ilu_Al-Kurdi_Mittal.jl")
-#using .ILUAM
-include("factorizations/pilu_Al-Kurdi_Mittal.jl")
-#using .PILUAM
 include("factorizations/factorizations.jl")
 
+include("experimental/Experimental.jl")
+
 include("factorizations/simple_iteration.jl")
 export simple, simple!
 
 include("matrix/sprand.jl")
 export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark
 
+export rawupdateindex!, updateindex!
 
 
 
-export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK
-export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!, rawupdateindex!, updateindex!, compare_matrices_light
-
 
 export JacobiPreconditioner,
     ILU0Preconditioner,
     ILUZeroPreconditioner,
-    ILUAMPreconditioner,
-    PILUAMPreconditioner,
     PointBlockILUZeroPreconditioner,
     ParallelJacobiPreconditioner,
     ParallelILU0Preconditioner,
-    BlockPreconditioner,allow_views,
-    reorderlinsys
+    BlockPreconditioner,allow_views
 
 export AbstractFactorization, LUFactorization, CholeskyFactorization, SparspakLU
 export issolver
diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
new file mode 100644
index 0000000..7108c25
--- /dev/null
+++ b/src/experimental/Experimental.jl
@@ -0,0 +1,41 @@
+module Experimental
+using ExtendableSparse, SparseArrays
+import ExtendableSparse: flush!, reset!, rawupdateindex!
+using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash
+using DocStringExtensions
+using Metis
+using Base.Threads
+using LinearAlgebra
+
+include(joinpath(@__DIR__, "..", "matrix", "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl"))
+
+include(joinpath(@__DIR__, "..", "factorizations","ilu_Al-Kurdi_Mittal.jl"))
+#using .ILUAM
+include(joinpath(@__DIR__, "..", "factorizations","pilu_Al-Kurdi_Mittal.jl"))
+#using .PILUAM
+
+include(joinpath(@__DIR__, "..", "factorizations","iluam.jl"))
+include(joinpath(@__DIR__, "..", "factorizations","piluam.jl"))
+
+@eval begin
+    @makefrommatrix ILUAMPreconditioner
+    @makefrommatrix PILUAMPreconditioner
+end
+
+function factorize!(p::PILUAMPreconditioner, A::ExtendableSparseMatrixParallel)
+    p.A = A
+    update!(p)
+    p
+end
+                
+export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK
+export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!,  compare_matrices_light
+export     ILUAMPreconditioner,    PILUAMPreconditioner
+export     reorderlinsys, nnz_noflush
+
+
+include("parallel_testtools.jl")
+export part2d, showgrid, partassemble!
+
+end
+
diff --git a/test/parallel_testtools.jl b/src/experimental/parallel_testtools.jl
similarity index 92%
rename from test/parallel_testtools.jl
rename to src/experimental/parallel_testtools.jl
index 126edf0..a007ff2 100644
--- a/test/parallel_testtools.jl
+++ b/src/experimental/parallel_testtools.jl
@@ -66,10 +66,10 @@ Assemble edge for finite volume laplacian.
 Used by [`partassemble!`](@ref).
 """
 function assembleedge!(A,v,k,l)
-    A[k,k]+=v
-    A[k,l]-=v
-    A[l,k]-=v
-    A[l,l]+=v
+    rawupdateindex!(A,+,v,k,k)
+    rawupdateindex!(A,+,-v,k,l)
+    rawupdateindex!(A,+,-v,l,k)
+    rawupdateindex!(A,+,v,l,l)
 end
 
 """
@@ -92,10 +92,10 @@ function assemblecell!(A,lindexes,X,Y,i,j,d)
     assembleedge!(A,0.5*hy/hx,ij00,ij10)
     assembleedge!(A,0.5*hy/hx,ij01,ij11)
     v=0.25*hx*hy
-    A[ij00,ij00]+=v*d
-    A[ij01,ij01]+=v*d
-    A[ij10,ij10]+=v*d
-    A[ij11,ij11]+=v*d
+    rawupdateindex!(A,+,v*d,ij00,ij00)
+    rawupdateindex!(A,+,v*d,ij01,ij01)
+    rawupdateindex!(A,+,v*d,ij10,ij10)
+    rawupdateindex!(A,+,v*d,ij11,ij11)
 end
 
 """
diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index 2d56fce..c9809d3 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -75,8 +75,6 @@ end
 
 include("ilu0.jl")
 include("iluzero.jl")
-include("iluam.jl")
-include("piluam.jl")
 include("parallel_jacobi.jl")
 include("parallel_ilu0.jl")
 include("sparspak.jl")
@@ -86,8 +84,6 @@ include("jacobi.jl")
 @eval begin
     @makefrommatrix ILU0Preconditioner
     @makefrommatrix ILUZeroPreconditioner
-    @makefrommatrix ILUAMPreconditioner
-    @makefrommatrix PILUAMPreconditioner
     @makefrommatrix PointBlockILUZeroPreconditioner
     @makefrommatrix JacobiPreconditioner
     @makefrommatrix ParallelJacobiPreconditioner
@@ -110,40 +106,8 @@ function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix)
     update!(p)
     p
 end
-
-function factorize!(p::PILUAMPreconditioner, A::ExtendableSparseMatrixParallel)
-    p.A = A
-    update!(p)
-    p
-end
-
-#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel)
-#    p.A = A
-#    update!(p)
-#    p
-#end
-
-#factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel)=factorize!(p,ExtendableSparseMatrix(A.cscmatrix))
-
-#factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)=factorize!(p,ExtendableSparseMatrix(A.cscmatrix))
-
 factorize!(p::AbstractFactorization, A::SparseMatrixCSC)=factorize!(p,ExtendableSparseMatrix(A))
 
-#function factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)
-#    factorize!(p, A)
-#end
-
-#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrixParallel)
-#    factorize!(p, A.cscmatrix)
-#end
-
-
-#function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix)
-#    factorize!(p, A.cscmatrix)
-#end
-
-
-#factorize!(p::PILUAMPrecon, A::ExtendableSparseMatrixParallel)=factorize!(p,A)
 
 """
 ```
diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl
index abcd04a..df67dc7 100644
--- a/src/matrix/extendable.jl
+++ b/src/matrix/extendable.jl
@@ -26,6 +26,29 @@ mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractSparseMatrix
     phash::UInt64
 end
 
+mutable struct Locking
+    locking::Bool
+end
+
+const locking=Locking(true)
+
+function with_locking!(l::Bool)
+    global locking
+    locking.locking=l
+end
+
+function with_locking()
+    global locking
+    locking.locking
+end
+
+mylock(x)=with_locking() ? Base.lock(x) : nothing
+myunlock(x)=with_locking() ? Base.unlock(x) : nothing
+
+
+#mylock(x)=nothing
+#myunlock(x)=nothing
+
 """
 ```
 ExtendableSparseMatrix(Tv,Ti,m,n)
@@ -57,7 +80,7 @@ ExtendableSparseMatrix(m, n) = ExtendableSparseMatrix{Float64, Int}(m, n)
 """
 $(SIGNATURES)
 
- Create ExtendableSparseMatrix from SparseMatrixCSC
+Create ExtendableSparseMatrix from SparseMatrixCSC
 """
 
 function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
@@ -171,14 +194,14 @@ function updateindex!(ext::ExtendableSparseMatrix{Tv, Ti},
     if k > 0
         ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
     else
-        lock(ext.lock)
+        mylock(ext.lock)
         try
             if ext.lnkmatrix == nothing
                 ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
             end
             updateindex!(ext.lnkmatrix, op, v, i, j)
         finally
-            unlock(ext.lock)
+            myunlock(ext.lock)
         end
     end
     ext
@@ -198,14 +221,14 @@ function rawupdateindex!(ext::ExtendableSparseMatrix{Tv, Ti},
     if k > 0
         ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
     else
-        lock(ext.lock)
+        mylock(ext.lock)
         try
             if ext.lnkmatrix == nothing
                 ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
             end
             rawupdateindex!(ext.lnkmatrix, op, v, i, j)
         finally
-            unlock(ext.lock)
+            myunlock(ext.lock)
         end
     end
     ext
@@ -225,14 +248,14 @@ function Base.setindex!(ext::ExtendableSparseMatrix{Tv, Ti},
     if k > 0
         ext.cscmatrix.nzval[k] = v
     else
-        lock(ext.lock)
+        mylock(ext.lock)
         try
             if ext.lnkmatrix == nothing
                 ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
             end
             ext.lnkmatrix[i, j] = v
         finally
-            unlock(ext.lock)
+            myunlock(ext.lock)
         end
     end
 end
@@ -253,11 +276,11 @@ function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti},
         return zero(Tv)
     else
         v=zero(Tv)
-        lock(ext.lock)
+        mylock(ext.lock)
         try
             v=ext.lnkmatrix[i, j]
         finally
-            unlock(ext.lock)
+            myunlock(ext.lock)
         end
     end
 end
diff --git a/test/test_parallel.jl b/test/experimental_parallel.jl
similarity index 97%
rename from test/test_parallel.jl
rename to test/experimental_parallel.jl
index 1fe3f1d..fa42d1d 100644
--- a/test/test_parallel.jl
+++ b/test/experimental_parallel.jl
@@ -1,10 +1,9 @@
 using ExtendableSparse,SparseArrays
+using ExtendableSparse.Experimental
 using DocStringExtensions
 using BenchmarkTools
 using Test
 
-include("parallel_testtools.jl")
-
 """
     test_correctness_update(N)
 
@@ -99,7 +98,7 @@ end
 
 Reset ExtenableSparseMatrix into state similar to that after creation.
 """
-function reset!(A)
+function ExtendableSparse.reset!(A::ExtendableSparseMatrix)
     A.cscmatrix=spzeros(size(A)...)
     A.lnkmatrix=nothing
 end
diff --git a/test/rect.jl b/test/experimental_rect.jl
similarity index 74%
rename from test/rect.jl
rename to test/experimental_rect.jl
index c502fc6..367e1ac 100644
--- a/test/rect.jl
+++ b/test/experimental_rect.jl
@@ -1,3 +1,10 @@
+using ExtendableSparse,SparseArrays
+using ExtendableSparse.Experimental
+using DocStringExtensions
+using BenchmarkTools
+using Test
+
+
 """
 `test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)`
 
@@ -40,6 +47,41 @@ function test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)
     A
 end
 
+
+function speedup_build_ESMP(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10])
+    m = n
+    lindexes = LinearIndices((1:n,1:m))
+    X = collect(1:n) #LinRange(0,1,n)
+    Y = collect(1:n) #LinRange(0,1,m)
+
+
+    ExtendableSparse.with_locking!(false)
+    A = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m)
+    t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A))
+    ExtendableSparse.with_locking!(true)
+
+    mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti)
+    result=[]
+
+    for nt in allnp
+        A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false)
+        t=@belapsed assemble_ESMP($A, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) setup=(ExtendableSparse.reset!($A)) seconds=1
+        push!(result,(nt,round(t0/t,digits=2)))
+    end
+
+    # #update
+    # times_update = zeros(k)
+    # for i=1:k
+    #     times_update[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=true)
+    # end
+
+    # @info "TIMES:  MIN,  AVG,  MAX"
+    # info_minmax(times_build, "build ")
+    # info_minmax(times_update, "update")
+    result
+    
+end
+
 """
 `generate_rectangle_grid(lindexes, Ti)`
 
@@ -95,7 +137,7 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell
         assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, 1)
     end
 
-    nnzCSC, nnzLNK = ExtendableSparse.nnz_noflush(A)
+    nnzCSC, nnzLNK = nnz_noflush(A)
     if nnzCSC > 0 && nnzLNK > 0	
         flush!(A; do_dense=false)
         #sparse flush
@@ -106,10 +148,10 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell
 end
 
 function assembleedge!(A::ExtendableSparseMatrixParallel{Tv, Ti},v,k,l,tid) where {Tv, Ti <: Integer}
-    ExtendableSparse.addtoentry!(A, k, k, tid, +v)
-    ExtendableSparse.addtoentry!(A, k, l, tid, -v)
-    ExtendableSparse.addtoentry!(A, l, k, tid, -v)
-    ExtendableSparse.addtoentry!(A, l, l, tid, +v)
+    addtoentry!(A, k, k, tid, +v)
+    addtoentry!(A, k, l, tid, -v)
+    addtoentry!(A, l, k, tid, -v)
+    addtoentry!(A, l, l, tid, +v)
 end
 
 function assemblecell!(A::ExtendableSparseMatrixParallel{Tv, Ti},n,m,mcn,X,Y,d,cell,tid) where {Tv, Ti <: Integer}
@@ -129,10 +171,10 @@ function assemblecell!(A::ExtendableSparseMatrixParallel{Tv, Ti},n,m,mcn,X,Y,d,c
     assembleedge!(A,0.5*hy/hx,ij00,ij10,tid)
     assembleedge!(A,0.5*hy/hx,ij01,ij11,tid)
     v=0.25*hx*hy
-    ExtendableSparse.addtoentry!(A, ij00, ij00, tid, v*d)
-    ExtendableSparse.addtoentry!(A, ij01, ij01, tid, v*d)
-    ExtendableSparse.addtoentry!(A, ij10, ij10, tid, v*d)
-    ExtendableSparse.addtoentry!(A, ij11, ij11, tid, v*d)
+    addtoentry!(A, ij00, ij00, tid, v*d)
+    addtoentry!(A, ij01, ij01, tid, v*d)
+    addtoentry!(A, ij10, ij10, tid, v*d)
+    addtoentry!(A, ij11, ij11, tid, v*d)
 end
 
 
@@ -179,4 +221,4 @@ function assemblecell!(A::ExtendableSparseMatrix{Tv, Ti},n,m,mcn,X,Y,d,cell) whe
     A[ij01,ij01]+=v*d
     A[ij10,ij10]+=v*d
     A[ij11,ij11]+=v*d
-end
\ No newline at end of file
+end

From 954819c349df7d24cb73fce1f50a27f2ec313c06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Sat, 18 May 2024 23:15:11 +0200
Subject: [PATCH 19/44] ExtendableSparseMatrixParallelDict  & tests

Steps to AbstractExtendableSparse
---
 Project.toml                                  |   1 +
 src/experimental/Experimental.jl              |  18 +-
 src/experimental/abstractextendable.jl        |  34 ++++
 .../extendablesparsematrixdict.jl             | 191 ++++++++++++++++++
 src/experimental/parallel_testtools.jl        |  81 +++++++-
 src/experimental/sparsematrixdict.jl          |  64 ++++++
 .../supersparse.jl                            |   5 +-
 test/experimental_dict.jl                     | 130 ++++++++++++
 8 files changed, 516 insertions(+), 8 deletions(-)
 create mode 100644 src/experimental/abstractextendable.jl
 create mode 100644 src/experimental/extendablesparsematrixdict.jl
 create mode 100644 src/experimental/sparsematrixdict.jl
 create mode 100644 test/experimental_dict.jl

diff --git a/Project.toml b/Project.toml
index f3d1ef7..1d10c82 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,6 +10,7 @@ DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
+OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
index 7108c25..f7aa518 100644
--- a/src/experimental/Experimental.jl
+++ b/src/experimental/Experimental.jl
@@ -1,11 +1,15 @@
 module Experimental
 using ExtendableSparse, SparseArrays
-import ExtendableSparse: flush!, reset!, rawupdateindex!
+using LinearAlgebra
+using SparseArrays: AbstractSparseMatrixCSC
+import SparseArrays: nonzeros, getcolptr,nzrange
+import ExtendableSparse: flush!, reset!, rawupdateindex!, findindex
 using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash
 using DocStringExtensions
 using Metis
 using Base.Threads
-using LinearAlgebra
+using OhMyThreads: @tasks
+
 
 include(joinpath(@__DIR__, "..", "matrix", "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl"))
 
@@ -34,8 +38,16 @@ export     ILUAMPreconditioner,    PILUAMPreconditioner
 export     reorderlinsys, nnz_noflush
 
 
+include("abstractextendable.jl")
+
+include("sparsematrixdict.jl")
+export SparseMatrixDict
+
+include("extendablesparsematrixdict.jl")
+export ExtendableSparseMatrixParallelDict, partcolors!
+
 include("parallel_testtools.jl")
-export part2d, showgrid, partassemble!
+export part2d, showgrid, partassemble!,  assemblepartition!
 
 end
 
diff --git a/src/experimental/abstractextendable.jl b/src/experimental/abstractextendable.jl
new file mode 100644
index 0000000..d44b4d1
--- /dev/null
+++ b/src/experimental/abstractextendable.jl
@@ -0,0 +1,34 @@
+abstract type AbstractExtendableSparseMatrix{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end
+
+SparseArrays.nnz(ext::AbstractExtendableSparseMatrix)=nnz(sparse(ext))
+
+SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrix)=nonzeros(sparse(ext))
+
+Base.size(ext::AbstractExtendableSparseMatrix)=size(sparse(ext))
+
+function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrix)
+    A=sparse(ext)
+    xnnz = nnz(A)
+    m, n = size(A)
+    print(io,
+          m,
+          "×",
+          n,
+          " ",
+          typeof(ext),
+          " with ",
+          xnnz,
+          " stored ",
+          xnnz == 1 ? "entry" : "entries")
+
+    if !haskey(io, :compact)
+        io = IOContext(io, :compact => true)
+    end
+
+    if !(m == 0 || n == 0 || xnnz == 0)
+        print(io, ":\n")
+        Base.print_array(IOContext(io), A)
+    end
+end
+
+    
diff --git a/src/experimental/extendablesparsematrixdict.jl b/src/experimental/extendablesparsematrixdict.jl
new file mode 100644
index 0000000..5ef63da
--- /dev/null
+++ b/src/experimental/extendablesparsematrixdict.jl
@@ -0,0 +1,191 @@
+mutable struct ExtendableSparseMatrixParallelDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
+    """
+    Final matrix data
+    """
+    cscmatrix::SparseMatrixCSC{Tv, Ti}
+
+    """
+        Linked list structure holding data of extension
+    """
+    dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}
+
+    nodeparts::Vector{Ti}
+    partnodes::Vector{Vector{Ti}}
+    colparts::Vector{Vector{Ti}}
+end
+
+
+function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,p::Integer) where{Tv, Ti}
+    ExtendableSparseMatrixParallelDict(spzeros(Tv, Ti, m, n),
+                                       [SparseMatrixDict{Tv,Ti}(m,n) for i=1:p],
+                                       zeros(Ti,n),
+                                       Vector{Ti}[],
+                                       Vector{Ti}[]
+                                       )
+end
+
+function partcolors!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, partcolors) where {Tv, Ti}
+    ncol=maximum(partcolors)
+    colparts=[Ti[] for i=1:ncol]
+    for i=1:length(partcolors)
+        push!(colparts[partcolors[i]],i)
+    end
+    ext.colparts=colparts
+    ext
+end
+
+function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,pc::Vector) where{Tv, Ti}
+    ext=ExtendableSparseMatrixParallelDict(m,n,length(pc))
+    partcolors!(ext,pc)
+end
+
+
+ExtendableSparseMatrixParallelDict(n,m,p)=ExtendableSparseMatrixParallelDict{Float64,Int}(n,m,p)
+
+
+function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},p::Integer) where {Tv,Ti}
+    m,n=size(ext.cscmatrix)
+    ext.cscmatrix=spzeros(Tv, Ti, m, n)
+    ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:p]
+    ext.nodeparts.=zero(Ti)
+    ext
+end
+
+function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where {Tv,Ti}
+    reset!(ext,length(ext.dictmatrices))
+end
+
+function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},pc::Vector) where {Tv,Ti}
+    reset!(ext,length(pc))
+    partcolors!(ext,pc)
+end
+
+
+function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti}
+    lnew=sumlength(ext.dictmatrices)
+    if lnew>0
+        (;colptr,nzval,rowval,m,n)=ext.cscmatrix
+        l=lnew+nnz(ext.cscmatrix)
+        I=Vector{Ti}(undef,l)
+        J=Vector{Ti}(undef,l)
+        V=Vector{Tv}(undef,l)
+        i=1
+        ip=1
+        for m in ext.dictmatrices
+            for (p,v) in m.values
+                ext.nodeparts[first(p)]=ip
+	        I[i]=first(p)
+	        J[i]=last(p)
+	        V[i]=v
+	        i=i+1
+            end
+            ip=ip+1
+        end
+        
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+
+        np=length(ext.dictmatrices)
+        ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:np]
+        ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+)
+        
+        n,m=size(ext)
+        pn=zeros(Int,np)
+        for i=1:n
+            if ext.nodeparts[i]>0
+                pn[ext.nodeparts[i]]+=1
+            end
+        end
+        partnodes=[zeros(Int,pn[i]) for i=1:np]
+        pn.=1
+        for i=1:n
+            if ext.nodeparts[i]>0
+                ip=ext.nodeparts[i]
+                partnodes[ip][pn[ip]]=i
+                pn[ip]+=1
+            end
+        end
+        ext.partnodes=partnodes
+    end
+    ext
+end
+
+function SparseArrays.sparse(ext::ExtendableSparseMatrixParallelDict)
+    flush!(ext)
+    ext.cscmatrix
+end
+
+
+
+function Base.setindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
+                        v::Union{Number,AbstractVecOrMat},
+                        i::Integer,
+                        j::Integer) where {Tv, Ti}
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = v
+    else
+        error("use rawupdateindex! for new entries into ExtendableSparseMatrixParallelDict")
+    end
+end
+
+
+function Base.getindex(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
+                       i::Integer,
+                       j::Integer) where {Tv, Ti <: Integer}
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        return ext.cscmatrix.nzval[k]
+    elseif sumlength(ext.dictmatrices) == 0
+        return zero(Tv)
+    else
+        error("flush! ExtendableSparseMatrixParallelDict before using getindex")
+    end
+end
+
+function rawupdateindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
+                         op,
+                         v,
+                         i,
+                         j,
+                         tid) where {Tv, Ti <: Integer}
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        rawupdateindex!(ext.dictmatrices[tid],op,v,i,j)
+    end
+end
+
+function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, x) where {Tv,Ti}
+    A=ext.cscmatrix
+    colparts=ext.colparts
+    partnodes=ext.partnodes
+    rows = rowvals(A)
+    vals = nonzeros(A)
+
+    r.=zero(Tv)
+    m,n=size(A)
+    for icol=1:length(colparts)
+        part=colparts[icol]
+        @tasks for ip=1:length(part)
+            @inbounds begin
+                for j in partnodes[part[ip]]
+                    for i in nzrange(A,j)
+                        row = rows[i]
+                        val = vals[i]
+                        r[row]+=val*x[j]
+                    end
+                end
+            end
+        end
+    end
+    r
+end
+
diff --git a/src/experimental/parallel_testtools.jl b/src/experimental/parallel_testtools.jl
index a007ff2..4f99283 100644
--- a/src/experimental/parallel_testtools.jl
+++ b/src/experimental/parallel_testtools.jl
@@ -28,6 +28,22 @@ function part2d(X,Y, nt)
     partitions
 end
 
+function colpart2d(X,Y,nt)
+    Nx=length(X)
+    Ny=length(Y)
+    p=part2d(X,Y,nt)
+    pc=zeros(Int,sum(length,p))
+    jp=1
+    for icol=1:length(p)
+        for ip=1:length(p[icol])
+            pc[jp]=icol
+            jp+=1
+        end
+    end
+    p,pc
+end
+
+
 """
     showgrid(Makie, ColorSchemes, X,Y,nt)
 
@@ -72,6 +88,13 @@ function assembleedge!(A,v,k,l)
     rawupdateindex!(A,+,v,l,l)
 end
 
+function assembleedge!(A,v,k,l,tid)
+    rawupdateindex!(A,+,v,k,k,tid)
+    rawupdateindex!(A,+,-v,k,l,tid)
+    rawupdateindex!(A,+,-v,l,k,tid)
+    rawupdateindex!(A,+,v,l,l,tid)
+end
+
 """
     $(SIGNATURES)
 
@@ -98,6 +121,25 @@ function assemblecell!(A,lindexes,X,Y,i,j,d)
     rawupdateindex!(A,+,v*d,ij11,ij11)
 end
 
+function assemblecell!(A,lindexes,X,Y,i,j,d,tid)
+    hx=X[i+1]-X[i]
+    hy=Y[j+1]-Y[j]
+    ij00=lindexes[i,j]
+    ij10=lindexes[i+1,j]
+    ij11=lindexes[i+1,j+1]
+    ij01=lindexes[i,j+1]
+    
+    assembleedge!(A,0.5*hx/hy,ij00,ij01,tid)
+    assembleedge!(A,0.5*hx/hy,ij10,ij11,tid)
+    assembleedge!(A,0.5*hy/hx,ij00,ij10,tid)
+    assembleedge!(A,0.5*hy/hx,ij01,ij11,tid)
+    v=0.25*hx*hy
+    rawupdateindex!(A,+,v*d,ij00,ij00,tid)
+    rawupdateindex!(A,+,v*d,ij01,ij01,tid)
+    rawupdateindex!(A,+,v*d,ij10,ij10,tid)
+    rawupdateindex!(A,+,v*d,ij11,ij11,tid)
+end
+
 """
     $(SIGNATURES)
 
@@ -113,6 +155,14 @@ function assemblepartition!(A,lindexes,X,Y,xp,yp,d)
     end
 end
 
+function assemblepartition!(A,lindexes,X,Y,xp,yp,d,tid)
+    for j in yp
+	for i in xp
+	    assemblecell!(A,lindexes,X,Y,i,j,d,tid)
+	end
+    end
+end
+
 """
     partassemble!(A,N,nt=1;xrange=(0,1),yrange=(0,1), d=0.1)
 
@@ -133,10 +183,39 @@ function partassemble!(A,X,Y,nt=1;d=0.1)
     else
         p=part2d(X,Y,nt)
         for icol=1:length(p)
-	    Threads.@threads for (xp, yp) in p[icol]
+	    @tasks for (xp, yp) in p[icol]
 	        assemblepartition!(A,lindexes,X,Y,xp,yp,d)
 	    end
         end
     end
     flush!(A)
 end
+
+
+function partassemble!(A::ExtendableSparseMatrixParallelDict,X,Y,nt=1;d=0.1, reset=true)
+    Nx=length(X)
+    Ny=length(Y)
+    size(A,1)==Nx*Ny || error("incompatible size of A")
+    size(A,2)==Nx*Ny || error("incompatible size of A")
+
+    lindexes=LinearIndices((1:Nx,1:Ny))
+    if nt==1
+        reset!(A,1)
+	assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1)
+    else
+        p,pc=colpart2d(X,Y,nt)
+        if reset
+            reset!(A,pc)
+        end
+        jp0=0
+        for icol=1:length(p)
+            npc=length(p[icol])
+	    @tasks for ip=1:npc
+                (xp, yp)=p[icol][ip]
+	        assemblepartition!(A,lindexes,X,Y,xp,yp,d,jp0+ip)
+	    end
+            jp0+=npc
+        end
+    end
+    flush!(A)
+end
diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl
new file mode 100644
index 0000000..112f949
--- /dev/null
+++ b/src/experimental/sparsematrixdict.jl
@@ -0,0 +1,64 @@
+mutable struct SparseMatrixDict{Tv,Ti} <: AbstractSparseMatrix{Tv,Ti}
+    m::Ti
+    n::Ti
+    values::Dict{Pair{Ti,Ti}, Tv}
+    SparseMatrixDict{Tv,Ti}(m,n) where {Tv,Ti} = new(m,n,Dict{Pair{Ti,Ti}, Tv}())
+end
+
+function reset!(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti}
+    m.values=Dict{Pair{Ti,Ti}, Tv}()
+end
+
+function Base.setindex!(m::SparseMatrixDict,v,i,j)
+	m.values[Pair(i,j)]=v
+end
+
+function rawupdateindex!(m::SparseMatrixDict{Tv,Ti},op,v,i,j) where {Tv,Ti}
+    p=Pair(i,j)
+    haskey(m.values,p) ?  vnew=op(m.values[p],v) : vnew=op(zero(Tv),v)
+    m.values[p]=vnew
+end
+
+function Base.getindex(m::SparseMatrixDict{Tv},i,j) where Tv
+    haskey(m.values,Pair(i,j)) ? m.values[Pair(i,j)] : zero(Tv)
+end
+
+Base.size(m::SparseMatrixDict)=(m.m,m.n)
+
+flush!(m::SparseMatrixDict)=nothing
+
+function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti}
+	l=length(m.values)
+	I=Vector{Ti}(undef,l)
+	J=Vector{Ti}(undef,l)
+	V=Vector{Tv}(undef,l)
+	i=1
+	for (p,v) in m.values
+		I[i]=first(p)
+		J[i]=last(p)
+		V[i]=v
+		i=i+1
+	end
+	SparseArrays.sparse!(I,J,V,size(m)...,+)
+end
+
+sumlength(mv::Vector{SparseMatrixDict{Tv,Ti}}) where{Tv,Ti}=sum(m->length(m.values),mv)
+
+function SparseArrays.sparse(mv::Vector{SparseMatrixDict{Tv,Ti}}) where {Tv,Ti}
+    l=sumlength(mv)
+    I=Vector{Ti}(undef,l)
+    J=Vector{Ti}(undef,l)
+    V=Vector{Tv}(undef,l)
+    i=1
+    for m in mv
+        for (p,v) in m.values
+	    I[i]=first(p)
+	    J[i]=last(p)
+	    V[i]=v
+	    i=i+1
+        end
+    end
+    SparseArrays.sparse!(I,J,V,size(mv[1])...,+)
+end
+
+
diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
index ece23bd..691e158 100644
--- a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
+++ b/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
@@ -1,7 +1,4 @@
 
-using SparseArrays
-using ExtendableSparse
-
 mutable struct SuperSparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti}
     """
     Number of rows
@@ -70,7 +67,7 @@ function SuperSparseMatrixLNK{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
 end
 
 
-function findindex(lnk::SuperSparseMatrixLNK, i, j)
+function ExtendableSparse.findindex(lnk::SuperSparseMatrixLNK, i, j)
     if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
         throw(BoundsError(lnk, (i, j)))
     end
diff --git a/test/experimental_dict.jl b/test/experimental_dict.jl
new file mode 100644
index 0000000..f88715a
--- /dev/null
+++ b/test/experimental_dict.jl
@@ -0,0 +1,130 @@
+using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
+using DocStringExtensions
+using BenchmarkTools
+using Test
+
+
+function test_correctness_update(N)
+    X=1:N
+    Y=1:N
+    A=ExtendableSparseMatrixParallelDict{Float64,Int}(N^2,N^2,1)
+    allnp=[4,5,6,7,8]
+
+    # Assembele without partitioning
+    # this gives the "base truth" to compare with
+    partassemble!(A,X,Y)
+
+    # Save the nonzeros 
+    nz=copy(nonzeros(A))
+    for np in allnp
+        # Reset the nonzeros, keeping the structure intact
+        nonzeros(A).=0
+        # Parallel assembly whith np threads
+        partassemble!(A,X,Y, np)
+        @test nonzeros(A)≈nz
+    end
+end
+
+"""
+    test_correctness_build(N)
+
+Test correctness of parallel assembly on NxN grid  during 
+build phase, assuming that no structure has been assembled.
+"""
+function test_correctness_build(N)
+    X=1:N
+    Y=1:N
+    allnp=[4,5,6,7,8]
+    # Get the "ground truth"
+    A=ExtendableSparseMatrix(N^2,N^2)
+    partassemble!(A,X,Y)
+    nz=copy(nonzeros(A))
+    for np in allnp
+        # Make a new matrix and assemble parallel.
+        # this should result in the same nonzeros
+        A=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
+        partassemble!(A,X,Y, np)
+        @test nonzeros(A)≈nz
+    end
+end
+
+function test_correctness_mul(N; nps=5)
+    X=1:N
+    Y=1:N
+    allnp=[4,5,6,7,8]
+    A0=ExtendableSparseMatrix(N^2,N^2)
+    partassemble!(A0,X,Y)
+
+    for np in allnp
+        A=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
+        partassemble!(A,X,Y,np)
+        b=rand(N^2)
+        @test A*b ≈ A0*b
+    end    
+end
+
+function speedup_update(N; allnp=[4,5,6,7,8,9,10])
+    X=1:N
+    Y=1:N
+    A=ExtendableSparseMatrix(N^2,N^2)
+    partassemble!(A,X,Y)
+    nz=copy(nonzeros(A))
+    # Get the base timing
+    # During setup, set matrix entries to zero while keeping  the structure 
+    t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(nonzeros($A).=0)
+    result=[]
+    A=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
+    for np in allnp
+        # Get the parallel timing
+        # During setup, set matrix entries to zero while keeping  the structure
+        partassemble!(A,X,Y,np)
+        t=@belapsed partassemble!($A,$X,$Y,$np,reset=false) seconds=1 setup=(nonzeros($A).=0)
+        @assert nonzeros(A)≈nz
+        push!(result,(np,round(t0/t,digits=2)))
+    end
+    result
+end
+
+function speedup_build(N; allnp=[4,5,6,7,8,9,10])
+    X=1:N
+    Y=1:N
+    A=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
+    partassemble!(A,X,Y)
+    nz=copy(nonzeros(A))
+    reset!(A)
+    partassemble!(A,X,Y)
+    @assert nonzeros(A)≈(nz)
+    
+    # Get the base timing
+    # During setup, reset matrix to empty state.
+    t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A))
+    
+    result=[]
+    for np in allnp
+        # Get the parallel timing
+        # During setup, reset matrix to empty state.
+        t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(reset!($A))
+        @assert nonzeros(A)≈nz
+        push!(result,(np,round(t0/t,digits=2)))
+    end
+    result
+end
+
+function speedup_mul(N; allnp=[4,5,6,7,8,9,10])
+    X=1:N
+    Y=1:N
+    
+    A0=ExtendableSparseMatrix(N^2,N^2)
+    partassemble!(A0,X,Y)
+    b=rand(N^2)
+    t0=@belapsed $A0*$b seconds=1
+    
+    result=[]
+    for np in allnp
+        A=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
+        partassemble!(A,X,Y,np)
+        t=@belapsed $A*$b seconds=1
+        push!(result,(np,round(t0/t,digits=2)))
+    end
+    result
+end

From 96f015d2f4e782d023ae69ba6e359eac6103a1e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Wed, 22 May 2024 22:51:27 +0200
Subject: [PATCH 20/44] Parallelism with Dict based matrices

---
 src/experimental/Experimental.jl              |   4 +
 src/experimental/abstractextendable.jl        |   5 +-
 .../extendablesparsematrixdict.jl             | 150 +++-----------
 .../extendablesparsematrixparalleldict.jl     | 194 ++++++++++++++++++
 src/experimental/sparsematrixdict.jl          |  21 +-
 test/ExperimentalDict.jl                      |  36 ++++
 ...al_parallel.jl => ExperimentalParallel.jl} |   0
 ...al_dict.jl => ExperimentalParallelDict.jl} |  19 +-
 8 files changed, 285 insertions(+), 144 deletions(-)
 create mode 100644 src/experimental/extendablesparsematrixparalleldict.jl
 create mode 100644 test/ExperimentalDict.jl
 rename test/{experimental_parallel.jl => ExperimentalParallel.jl} (100%)
 rename test/{experimental_dict.jl => ExperimentalParallelDict.jl} (90%)

diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
index f7aa518..9fdf1e4 100644
--- a/src/experimental/Experimental.jl
+++ b/src/experimental/Experimental.jl
@@ -44,8 +44,12 @@ include("sparsematrixdict.jl")
 export SparseMatrixDict
 
 include("extendablesparsematrixdict.jl")
+export ExtendableSparseMatrixDict
+
+include("extendablesparsematrixparalleldict.jl")
 export ExtendableSparseMatrixParallelDict, partcolors!
 
+
 include("parallel_testtools.jl")
 export part2d, showgrid, partassemble!,  assemblepartition!
 
diff --git a/src/experimental/abstractextendable.jl b/src/experimental/abstractextendable.jl
index d44b4d1..2cd72c8 100644
--- a/src/experimental/abstractextendable.jl
+++ b/src/experimental/abstractextendable.jl
@@ -4,7 +4,7 @@ SparseArrays.nnz(ext::AbstractExtendableSparseMatrix)=nnz(sparse(ext))
 
 SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrix)=nonzeros(sparse(ext))
 
-Base.size(ext::AbstractExtendableSparseMatrix)=size(sparse(ext))
+Base.size(ext::AbstractExtendableSparseMatrix)=size(ext.cscmatrix)
 
 function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrix)
     A=sparse(ext)
@@ -31,4 +31,5 @@ function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatr
     end
 end
 
-    
+
+LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrix{Tv,Ti}, x) where {Tv,Ti} = mul!(r,sparse(ext),x)
diff --git a/src/experimental/extendablesparsematrixdict.jl b/src/experimental/extendablesparsematrixdict.jl
index 5ef63da..6641e5a 100644
--- a/src/experimental/extendablesparsematrixdict.jl
+++ b/src/experimental/extendablesparsematrixdict.jl
@@ -1,68 +1,34 @@
-mutable struct ExtendableSparseMatrixParallelDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
+mutable struct ExtendableSparseMatrixDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
     """
     Final matrix data
     """
     cscmatrix::SparseMatrixCSC{Tv, Ti}
 
     """
-        Linked list structure holding data of extension
+        Vector of dictionaries for new entries
     """
-    dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}
-
-    nodeparts::Vector{Ti}
-    partnodes::Vector{Vector{Ti}}
-    colparts::Vector{Vector{Ti}}
-end
-
-
-function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,p::Integer) where{Tv, Ti}
-    ExtendableSparseMatrixParallelDict(spzeros(Tv, Ti, m, n),
-                                       [SparseMatrixDict{Tv,Ti}(m,n) for i=1:p],
-                                       zeros(Ti,n),
-                                       Vector{Ti}[],
-                                       Vector{Ti}[]
-                                       )
+    dictmatrix::SparseMatrixDict{Tv,Ti}
 end
 
-function partcolors!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, partcolors) where {Tv, Ti}
-    ncol=maximum(partcolors)
-    colparts=[Ti[] for i=1:ncol]
-    for i=1:length(partcolors)
-        push!(colparts[partcolors[i]],i)
-    end
-    ext.colparts=colparts
-    ext
-end
 
-function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,pc::Vector) where{Tv, Ti}
-    ext=ExtendableSparseMatrixParallelDict(m,n,length(pc))
-    partcolors!(ext,pc)
+function ExtendableSparseMatrixDict{Tv, Ti}(n::Integer,m::Integer) where{Tv, Ti<:Integer}
+    ExtendableSparseMatrixDict(spzeros(Tv, Ti, m, n),
+                               SparseMatrixDict{Tv,Ti}(m,n)
+                               )
 end
 
+ExtendableSparseMatrixDict(n::Integer,m::Integer)=ExtendableSparseMatrixDict{Float64,Int}(n,m)
 
-ExtendableSparseMatrixParallelDict(n,m,p)=ExtendableSparseMatrixParallelDict{Float64,Int}(n,m,p)
-
-
-function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},p::Integer) where {Tv,Ti}
+function reset!(ext::ExtendableSparseMatrixDict{Tv,Ti}) where {Tv,Ti}
     m,n=size(ext.cscmatrix)
     ext.cscmatrix=spzeros(Tv, Ti, m, n)
-    ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:p]
-    ext.nodeparts.=zero(Ti)
+    ext.dictmatrix=SparseMatrixDict{Tv,Ti}(m,n)
     ext
 end
 
-function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where {Tv,Ti}
-    reset!(ext,length(ext.dictmatrices))
-end
-
-function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},pc::Vector) where {Tv,Ti}
-    reset!(ext,length(pc))
-    partcolors!(ext,pc)
-end
 
-
-function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti}
-    lnew=sumlength(ext.dictmatrices)
+function flush!(ext::ExtendableSparseMatrixDict{Tv,Ti}) where{Tv,Ti}
+    lnew=length(ext.dictmatrix.values)
     if lnew>0
         (;colptr,nzval,rowval,m,n)=ext.cscmatrix
         l=lnew+nnz(ext.cscmatrix)
@@ -70,18 +36,6 @@ function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti}
         J=Vector{Ti}(undef,l)
         V=Vector{Tv}(undef,l)
         i=1
-        ip=1
-        for m in ext.dictmatrices
-            for (p,v) in m.values
-                ext.nodeparts[first(p)]=ip
-	        I[i]=first(p)
-	        J[i]=last(p)
-	        V[i]=v
-	        i=i+1
-            end
-            ip=ip+1
-        end
-        
         for icsc=1:length(colptr)-1
             for j=colptr[icsc]:colptr[icsc+1]-1
                 I[i]=icsc
@@ -91,39 +45,25 @@ function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti}
             end            
         end
 
-        np=length(ext.dictmatrices)
-        ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:np]
-        ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+)
-        
-        n,m=size(ext)
-        pn=zeros(Int,np)
-        for i=1:n
-            if ext.nodeparts[i]>0
-                pn[ext.nodeparts[i]]+=1
-            end
+        for (p,v) in ext.dictmatrix.values
+	    I[i]=first(p)
+	    J[i]=last(p)
+	    V[i]=v
+	    i=i+1
         end
-        partnodes=[zeros(Int,pn[i]) for i=1:np]
-        pn.=1
-        for i=1:n
-            if ext.nodeparts[i]>0
-                ip=ext.nodeparts[i]
-                partnodes[ip][pn[ip]]=i
-                pn[ip]+=1
-            end
-        end
-        ext.partnodes=partnodes
+        
+        ext.dictmatrix=SparseMatrixDict{Tv,Ti}(m,n)
+        ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+)
     end
     ext
 end
-
-function SparseArrays.sparse(ext::ExtendableSparseMatrixParallelDict)
+    
+function SparseArrays.sparse(ext::ExtendableSparseMatrixDict)
     flush!(ext)
     ext.cscmatrix
 end
 
-
-
-function Base.setindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
+function Base.setindex!(ext::ExtendableSparseMatrixDict{Tv, Ti},
                         v::Union{Number,AbstractVecOrMat},
                         i::Integer,
                         j::Integer) where {Tv, Ti}
@@ -131,61 +71,31 @@ function Base.setindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
     if k > 0
         ext.cscmatrix.nzval[k] = v
     else
-        error("use rawupdateindex! for new entries into ExtendableSparseMatrixParallelDict")
+        setindex!(ext.dictmatrix,v,i,j)
     end
 end
 
 
-function Base.getindex(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
+function Base.getindex(ext::ExtendableSparseMatrixDict{Tv, Ti},
                        i::Integer,
                        j::Integer) where {Tv, Ti <: Integer}
     k = findindex(ext.cscmatrix, i, j)
     if k > 0
-        return ext.cscmatrix.nzval[k]
-    elseif sumlength(ext.dictmatrices) == 0
-        return zero(Tv)
+        ext.cscmatrix.nzval[k]
     else
-        error("flush! ExtendableSparseMatrixParallelDict before using getindex")
+        getindex(ext.dictmatrix,i,j)
     end
 end
 
-function rawupdateindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
+function rawupdateindex!(ext::ExtendableSparseMatrixDict{Tv, Ti},
                          op,
                          v,
                          i,
-                         j,
-                         tid) where {Tv, Ti <: Integer}
+                         j) where {Tv, Ti <: Integer}
     k = findindex(ext.cscmatrix, i, j)
     if k > 0
         ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
     else
-        rawupdateindex!(ext.dictmatrices[tid],op,v,i,j)
+        rawupdateindex!(ext.dictmatrix,op,v,i,j)
     end
 end
-
-function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, x) where {Tv,Ti}
-    A=ext.cscmatrix
-    colparts=ext.colparts
-    partnodes=ext.partnodes
-    rows = rowvals(A)
-    vals = nonzeros(A)
-
-    r.=zero(Tv)
-    m,n=size(A)
-    for icol=1:length(colparts)
-        part=colparts[icol]
-        @tasks for ip=1:length(part)
-            @inbounds begin
-                for j in partnodes[part[ip]]
-                    for i in nzrange(A,j)
-                        row = rows[i]
-                        val = vals[i]
-                        r[row]+=val*x[j]
-                    end
-                end
-            end
-        end
-    end
-    r
-end
-
diff --git a/src/experimental/extendablesparsematrixparalleldict.jl b/src/experimental/extendablesparsematrixparalleldict.jl
new file mode 100644
index 0000000..241964d
--- /dev/null
+++ b/src/experimental/extendablesparsematrixparalleldict.jl
@@ -0,0 +1,194 @@
+mutable struct ExtendableSparseMatrixParallelDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
+    """
+    Final matrix data
+    """
+    cscmatrix::SparseMatrixCSC{Tv, Ti}
+
+    """
+        Vector of dictionaries for new entries
+    """
+    dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}
+
+    nodeparts::Vector{Ti}
+    partnodes::Vector{Vector{Ti}}
+    colparts::Vector{Vector{Ti}}
+end
+
+
+function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,p::Integer) where{Tv, Ti}
+    ExtendableSparseMatrixParallelDict(spzeros(Tv, Ti, m, n),
+                                       [SparseMatrixDict{Tv,Ti}(m,n) for i=1:p],
+                                       zeros(Ti,n),
+                                       Vector{Ti}[],
+                                       Vector{Ti}[]
+                                       )
+end
+
+function partcolors!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, partcolors) where {Tv, Ti}
+    ncol=maximum(partcolors)
+    colparts=[Ti[] for i=1:ncol]
+    for i=1:length(partcolors)
+        push!(colparts[partcolors[i]],i)
+    end
+    ext.colparts=colparts
+    ext
+end
+
+function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,pc::Vector) where{Tv, Ti}
+    ext=ExtendableSparseMatrixParallelDict(m,n,length(pc))
+    partcolors!(ext,pc)
+end
+
+
+ExtendableSparseMatrixParallelDict(n,m,p)=ExtendableSparseMatrixParallelDict{Float64,Int}(n,m,p)
+
+
+function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},p::Integer) where {Tv,Ti}
+    m,n=size(ext.cscmatrix)
+    ext.cscmatrix=spzeros(Tv, Ti, m, n)
+    ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:p]
+    ext.nodeparts.=zero(Ti)
+    ext
+end
+
+function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where {Tv,Ti}
+    reset!(ext,length(ext.dictmatrices))
+end
+
+function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},pc::Vector) where {Tv,Ti}
+    reset!(ext,length(pc))
+    partcolors!(ext,pc)
+end
+
+
+function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti}
+    lnew=sumlength(ext.dictmatrices)
+    if lnew>0
+        (;colptr,nzval,rowval,m,n)=ext.cscmatrix
+        l=lnew+nnz(ext.cscmatrix)
+        I=Vector{Ti}(undef,l)
+        J=Vector{Ti}(undef,l)
+        V=Vector{Tv}(undef,l)
+        i=1
+        
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+
+        ip=1
+        for m in ext.dictmatrices
+            for (p,v) in m.values
+                ext.nodeparts[last(p)]=ip
+	        I[i]=first(p)
+	        J[i]=last(p)
+	        V[i]=v
+	        i=i+1
+            end
+            ip=ip+1
+        end
+        
+
+        np=length(ext.dictmatrices)
+        ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:np]
+        ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+)
+
+        npts::Vector{Ti}=ext.nodeparts
+        pn=zeros(Ti,np)
+        for i=1:n
+            npi=npts[i]
+            if npi>0
+                pn[npi]+=1
+            end
+        end
+            partnodes=[zeros(Int,pn[i]) for i=1:np]
+        pn.=1
+        for i=1:n
+            npi=ext.nodeparts[i]
+            if npi>0
+                partnodes[npi][pn[npi]]=i
+                pn[npi]+=1
+            end
+        end
+        ext.partnodes=partnodes
+    end
+    ext
+end
+
+function SparseArrays.sparse(ext::ExtendableSparseMatrixParallelDict)
+    flush!(ext)
+    ext.cscmatrix
+end
+
+
+
+function Base.setindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
+                        v::Union{Number,AbstractVecOrMat},
+                        i::Integer,
+                        j::Integer) where {Tv, Ti}
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = v
+    else
+        error("use rawupdateindex! for new entries into ExtendableSparseMatrixParallelDict")
+    end
+end
+
+
+function Base.getindex(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
+                       i::Integer,
+                       j::Integer) where {Tv, Ti <: Integer}
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        return ext.cscmatrix.nzval[k]
+    elseif sumlength(ext.dictmatrices) == 0
+        return zero(Tv)
+    else
+        error("flush! ExtendableSparseMatrixParallelDict before using getindex")
+    end
+end
+
+function rawupdateindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
+                         op,
+                         v,
+                         i,
+                         j,
+                         tid) where {Tv, Ti <: Integer}
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        rawupdateindex!(ext.dictmatrices[tid],op,v,i,j)
+    end
+end
+
+function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, x) where {Tv,Ti}
+    A=ext.cscmatrix
+    colparts=ext.colparts
+    partnodes=ext.partnodes
+    rows = rowvals(A)
+    vals = nonzeros(A)
+
+    r.=zero(Tv)
+    m,n=size(A)
+    for icol=1:length(colparts)
+        part=colparts[icol]
+        @tasks for ip=1:length(part)
+            @inbounds begin
+                for j in partnodes[part[ip]]
+                    for i in nzrange(A,j)
+                        row = rows[i]
+                        val = vals[i]
+                        r[row]+=val*x[j]
+                    end
+                end
+            end
+        end
+    end
+    r
+end
+
diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl
index 112f949..2288c89 100644
--- a/src/experimental/sparsematrixdict.jl
+++ b/src/experimental/sparsematrixdict.jl
@@ -15,33 +15,17 @@ end
 
 function rawupdateindex!(m::SparseMatrixDict{Tv,Ti},op,v,i,j) where {Tv,Ti}
     p=Pair(i,j)
-    haskey(m.values,p) ?  vnew=op(m.values[p],v) : vnew=op(zero(Tv),v)
-    m.values[p]=vnew
+    m.values[p]=op(get(m.values, p, zero(Tv)),v)
 end
 
 function Base.getindex(m::SparseMatrixDict{Tv},i,j) where Tv
-    haskey(m.values,Pair(i,j)) ? m.values[Pair(i,j)] : zero(Tv)
+    get(m.values,Pair(i,j),zero(Tv)) 
 end
 
 Base.size(m::SparseMatrixDict)=(m.m,m.n)
 
 flush!(m::SparseMatrixDict)=nothing
 
-function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti}
-	l=length(m.values)
-	I=Vector{Ti}(undef,l)
-	J=Vector{Ti}(undef,l)
-	V=Vector{Tv}(undef,l)
-	i=1
-	for (p,v) in m.values
-		I[i]=first(p)
-		J[i]=last(p)
-		V[i]=v
-		i=i+1
-	end
-	SparseArrays.sparse!(I,J,V,size(m)...,+)
-end
-
 sumlength(mv::Vector{SparseMatrixDict{Tv,Ti}}) where{Tv,Ti}=sum(m->length(m.values),mv)
 
 function SparseArrays.sparse(mv::Vector{SparseMatrixDict{Tv,Ti}}) where {Tv,Ti}
@@ -62,3 +46,4 @@ function SparseArrays.sparse(mv::Vector{SparseMatrixDict{Tv,Ti}}) where {Tv,Ti}
 end
 
 
+SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} = sparse([m])
diff --git a/test/ExperimentalDict.jl b/test/ExperimentalDict.jl
new file mode 100644
index 0000000..a8b0375
--- /dev/null
+++ b/test/ExperimentalDict.jl
@@ -0,0 +1,36 @@
+module ExperimentalDict
+
+using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
+using DocStringExtensions
+using BenchmarkTools
+using Test
+
+
+function ExtendableSparse.reset!(A::ExtendableSparseMatrix)
+    A.cscmatrix=spzeros(size(A)...)
+    A.lnkmatrix=nothing
+end
+
+
+function test_correctness_build(N)
+    X=1:N
+    Y=1:N
+    A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2)
+    A=ExtendableSparseMatrixDict{Float64,Int}(N^2,N^2)
+    partassemble!(A0,X,Y)
+    partassemble!(A,X,Y)
+    @test sparse(A0)≈sparse(A)
+end
+
+function speed_build(N)
+    X=1:N
+    Y=1:N
+    A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2)
+    A=ExtendableSparseMatrixDict{Float64,Int}(N^2,N^2)
+
+    tlnk= @belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0))
+    tdict= @belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A))
+    tdict/tlnk
+end
+
+end
diff --git a/test/experimental_parallel.jl b/test/ExperimentalParallel.jl
similarity index 100%
rename from test/experimental_parallel.jl
rename to test/ExperimentalParallel.jl
diff --git a/test/experimental_dict.jl b/test/ExperimentalParallelDict.jl
similarity index 90%
rename from test/experimental_dict.jl
rename to test/ExperimentalParallelDict.jl
index f88715a..96d2d95 100644
--- a/test/experimental_dict.jl
+++ b/test/ExperimentalParallelDict.jl
@@ -1,3 +1,5 @@
+module ExperimentalParallelDict
+
 using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
 using DocStringExtensions
 using BenchmarkTools
@@ -48,10 +50,9 @@ function test_correctness_build(N)
     end
 end
 
-function test_correctness_mul(N; nps=5)
+function test_correctness_mul(N;     allnp=[4,5,6,7,8])
     X=1:N
     Y=1:N
-    allnp=[4,5,6,7,8]
     A0=ExtendableSparseMatrix(N^2,N^2)
     partassemble!(A0,X,Y)
 
@@ -88,16 +89,23 @@ end
 function speedup_build(N; allnp=[4,5,6,7,8,9,10])
     X=1:N
     Y=1:N
+    A0=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
     A=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
+    partassemble!(A0,X,Y)
+    nz=copy(nonzeros(A0))
+    reset!(A0)
+    partassemble!(A0,X,Y)
+    @assert nonzeros(A0)≈(nz)
+
     partassemble!(A,X,Y)
     nz=copy(nonzeros(A))
     reset!(A)
     partassemble!(A,X,Y)
     @assert nonzeros(A)≈(nz)
-    
+
     # Get the base timing
     # During setup, reset matrix to empty state.
-    t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A))
+    t0=@belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0))
     
     result=[]
     for np in allnp
@@ -128,3 +136,6 @@ function speedup_mul(N; allnp=[4,5,6,7,8,9,10])
     end
     result
 end
+
+end
+

From 79432f336ae1c304197e6adebf3b63e1d91b170e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Thu, 23 May 2024 00:02:55 +0200
Subject: [PATCH 21/44] AbstractExtendableSparseMatrix for "old" code.

---
 src/ExtendableSparse.jl                |   1 +
 src/experimental/Experimental.jl       |   4 +-
 src/experimental/abstractextendable.jl |  35 ---
 src/matrix/abstractextendable.jl       | 299 ++++++++++++++++++++++
 src/matrix/extendable.jl               | 337 ++-----------------------
 5 files changed, 316 insertions(+), 360 deletions(-)
 delete mode 100644 src/experimental/abstractextendable.jl
 create mode 100644 src/matrix/abstractextendable.jl

diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 5bdc442..8ab64ba 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -23,6 +23,7 @@ import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros
 
 include("matrix/sparsematrixcsc.jl")
 include("matrix/sparsematrixlnk.jl")
+include("matrix/abstractextendable.jl")
 include("matrix/extendable.jl")
 
 export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset!
diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
index 9fdf1e4..e9e64e9 100644
--- a/src/experimental/Experimental.jl
+++ b/src/experimental/Experimental.jl
@@ -4,7 +4,7 @@ using LinearAlgebra
 using SparseArrays: AbstractSparseMatrixCSC
 import SparseArrays: nonzeros, getcolptr,nzrange
 import ExtendableSparse: flush!, reset!, rawupdateindex!, findindex
-using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash
+using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash, AbstractExtendableSparseMatrix
 using DocStringExtensions
 using Metis
 using Base.Threads
@@ -38,8 +38,6 @@ export     ILUAMPreconditioner,    PILUAMPreconditioner
 export     reorderlinsys, nnz_noflush
 
 
-include("abstractextendable.jl")
-
 include("sparsematrixdict.jl")
 export SparseMatrixDict
 
diff --git a/src/experimental/abstractextendable.jl b/src/experimental/abstractextendable.jl
deleted file mode 100644
index 2cd72c8..0000000
--- a/src/experimental/abstractextendable.jl
+++ /dev/null
@@ -1,35 +0,0 @@
-abstract type AbstractExtendableSparseMatrix{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end
-
-SparseArrays.nnz(ext::AbstractExtendableSparseMatrix)=nnz(sparse(ext))
-
-SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrix)=nonzeros(sparse(ext))
-
-Base.size(ext::AbstractExtendableSparseMatrix)=size(ext.cscmatrix)
-
-function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrix)
-    A=sparse(ext)
-    xnnz = nnz(A)
-    m, n = size(A)
-    print(io,
-          m,
-          "×",
-          n,
-          " ",
-          typeof(ext),
-          " with ",
-          xnnz,
-          " stored ",
-          xnnz == 1 ? "entry" : "entries")
-
-    if !haskey(io, :compact)
-        io = IOContext(io, :compact => true)
-    end
-
-    if !(m == 0 || n == 0 || xnnz == 0)
-        print(io, ":\n")
-        Base.print_array(IOContext(io), A)
-    end
-end
-
-
-LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrix{Tv,Ti}, x) where {Tv,Ti} = mul!(r,sparse(ext),x)
diff --git a/src/matrix/abstractextendable.jl b/src/matrix/abstractextendable.jl
new file mode 100644
index 0000000..589376c
--- /dev/null
+++ b/src/matrix/abstractextendable.jl
@@ -0,0 +1,299 @@
+"""
+
+Must implement:
+sparse
+Constructor from SparseMatrixCSC
+rawupdateindex!
+"""
+
+abstract type AbstractExtendableSparseMatrix{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and return number of nonzeros in ext.cscmatrix.
+"""
+SparseArrays.nnz(ext::AbstractExtendableSparseMatrix)=nnz(sparse(ext))
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and return nonzeros in ext.cscmatrix.
+"""
+SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrix)=nonzeros(sparse(ext))
+
+Base.size(ext::AbstractExtendableSparseMatrix)=size(ext.cscmatrix)
+
+
+
+"""
+$(SIGNATURES)
+
+Return element type.
+"""
+Base.eltype(::AbstractExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti} = Tv
+
+
+
+"""
+$(SIGNATURES)
+
+ Create SparseMatrixCSC from ExtendableSparseMatrix
+"""
+SparseArrays.SparseMatrixCSC(A::AbstractExtendableSparseMatrix)=sparse(A)
+
+
+
+
+function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrix)
+    A=sparse(ext)
+    xnnz = nnz(A)
+    m, n = size(A)
+    print(io,
+          m,
+          "×",
+          n,
+          " ",
+          typeof(ext),
+          " with ",
+          xnnz,
+          " stored ",
+          xnnz == 1 ? "entry" : "entries")
+
+    if !haskey(io, :compact)
+        io = IOContext(io, :compact => true)
+    end
+
+    if !(m == 0 || n == 0 || xnnz == 0)
+        print(io, ":\n")
+        Base.print_array(IOContext(io), A)
+    end
+end
+
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and return rowvals in ext.cscmatrix.
+"""
+SparseArrays.rowvals(ext::AbstractExtendableSparseMatrix)=rowvals(sparse(ext))
+
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and return colptr of  in ext.cscmatrix.
+"""
+SparseArrays.getcolptr(ext::AbstractExtendableSparseMatrix)=getcolptr(sparse(ext))
+
+    
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and return findnz(ext.cscmatrix).
+"""
+SparseArrays.findnz(ext::AbstractExtendableSparseMatrix)=findnz(sparse(ext))
+
+
+@static if VERSION >= v"1.7"
+    SparseArrays._checkbuffers(ext::AbstractExtendableSparseMatrix)=  SparseArrays._checkbuffers(sparse(ext))
+end
+
+"""
+    A\b
+
+[`\\`](@ref) for ExtendableSparse. It calls the LU factorization form Sparspak.jl, unless GPL components
+are allowed  in the Julia sysimage and the floating point type of the matrix is Float64 or Complex64.
+In that case, Julias standard `\` is called, which is realized via UMFPACK.
+"""
+function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrix{Tv, Ti},
+                          b::AbstractVector) where {Tv, Ti}
+    SparspakLU(sparse(ext)) \ b
+end
+
+
+"""
+$(SIGNATURES)
+
+[`\\`](@ref) for Symmetric{ExtendableSparse}
+"""
+function LinearAlgebra.:\(symm_ext::Symmetric{Tm, T},
+                          b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrix{Tm,Ti}}
+    Symmetric(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ...
+end
+
+"""
+$(SIGNATURES)
+
+[`\\`](@ref) for Hermitian{ExtendableSparse}
+"""
+function LinearAlgebra.:\(symm_ext::Hermitian{Tm, T},
+                          b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrix{Tm,Ti}}
+    Hermitian(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ...
+end
+
+if USE_GPL_LIBS
+    for (Tv) in (:Float64, :ComplexF64)
+        @eval begin function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrix{$Tv, Ti},
+                                              B::AbstractVector) where {Ti}
+            sparse(ext) \ B
+        end end
+
+        @eval begin function LinearAlgebra.:\(symm_ext::Symmetric{$Tv,
+                                                                  AbstractExtendableSparseMatrix{
+                                                                      $Tv,
+                                                                      Ti
+                                                                  }},
+            B::AbstractVector) where {Ti}
+            symm_csc = Symmetric(sparse(symm_ext.data), Symbol(symm_ext.uplo))
+            symm_csc \ B
+        end end
+
+        @eval begin function LinearAlgebra.:\(symm_ext::Hermitian{$Tv,
+                                                                  AbstractExtendableSparseMatrix{
+                                                                      $Tv,
+                                                                      Ti
+                                                                  }},
+                                              B::AbstractVector) where {Ti}
+            symm_csc = Hermitian(sparse(symm_ext.data), Symbol(symm_ext.uplo))
+            symm_csc \ B
+        end end
+    end
+end # USE_GPL_LIBS
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and ldiv with ext.cscmatrix
+"""
+function LinearAlgebra.ldiv!(r, ext::AbstractExtendableSparseMatrix, x)
+    LinearAlgebra.ldiv!(r, sparse(ext), x)
+end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and multiply with ext.cscmatrix
+"""
+function LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrix, x)
+    LinearAlgebra.mul!(r, sparse(ext), x)
+end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and calculate norm from cscmatrix
+"""
+function LinearAlgebra.norm(A::AbstractExtendableSparseMatrix, p::Real = 2)
+    return LinearAlgebra.norm(sparse(A), p)
+end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and calculate opnorm from cscmatrix
+"""
+function LinearAlgebra.opnorm(A::AbstractExtendableSparseMatrix, p::Real = 2)
+    return LinearAlgebra.opnorm(sparse(A), p)
+end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and calculate cond from cscmatrix
+"""
+function LinearAlgebra.cond(A::AbstractExtendableSparseMatrix, p::Real = 2)
+    return LinearAlgebra.cond(sparse(A), p)
+end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and check for symmetry of cscmatrix
+"""
+function LinearAlgebra.issymmetric(A::AbstractExtendableSparseMatrix)
+    return LinearAlgebra.issymmetric(sparse(A))
+end
+    
+    
+
+
+    
+
+function Base.:+(A::T, B::T) where T<:AbstractExtendableSparseMatrix
+    T(sparse(A) + sparse(B))
+end
+
+function Base.:-(A::T, B::T) where T<:AbstractExtendableSparseMatrix
+    T(sparse(A) - sparse(B))
+end
+
+function Base.:*(A::T, B::T) where T<:AbstractExtendableSparseMatrix
+    T(sparse(A) * sparse(B))
+end
+
+"""
+$(SIGNATURES)
+"""
+function Base.:*(d::Diagonal, ext::T)where T<:AbstractExtendableSparseMatrix
+    return T(d * sparse(ext))
+end
+
+"""
+$(SIGNATURES)
+"""
+function Base.:*(ext::T, d::Diagonal) where  T<:AbstractExtendableSparseMatrix
+    return T(sparse(ext) * d)
+end
+
+
+"""
+$(SIGNATURES)
+
+Add SparseMatrixCSC matrix and [`ExtendableSparseMatrix`](@ref)  ext.
+"""
+function Base.:+(ext::AbstractExtendableSparseMatrix, csc::SparseMatrixCSC)
+    return sparse(ext) + csc
+end
+
+
+"""
+$(SIGNATURES)
+
+Subtract  SparseMatrixCSC matrix from  [`ExtendableSparseMatrix`](@ref)  ext.
+"""
+function Base.:-(ext::AbstractExtendableSparseMatrix, csc::SparseMatrixCSC)
+    return sparse(ext) - csc
+end
+
+"""
+$(SIGNATURES)
+
+Subtract  [`ExtendableSparseMatrix`](@ref)  ext from  SparseMatrixCSC.
+"""
+function Base.:-(csc::SparseMatrixCSC, ext::AbstractExtendableSparseMatrix)
+    return csc - sparse(ext)
+end
+
+"""
+$(SIGNATURES)
+"""
+function SparseArrays.dropzeros!(ext::AbstractExtendableSparseMatrix)
+    dropzeros!(sparse(ext))
+end
+
+
+
+function mark_dirichlet(A::AbstractExtendableSparseMatrix;penalty=1.0e20)
+    mark_dirichlet(sparse(A);penalty)
+end
+
+function eliminate_dirichlet(A::T,dirichlet) where T<:AbstractExtendableSparseMatrix
+   T(eliminate_dirichlet(sparse(A),dirichlet))
+end
+
+function eliminate_dirichlet!(A::AbstractExtendableSparseMatrix,dirichlet)
+    eliminate_dirichlet!(sparse(A),dirichlet)
+    A
+end
+
diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl
index df67dc7..a655c12 100644
--- a/src/matrix/extendable.jl
+++ b/src/matrix/extendable.jl
@@ -7,7 +7,7 @@ either in cscmatrix, or in lnkmatrix, never in both.
 
 $(TYPEDFIELDS)
 """
-mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractSparseMatrixCSC{Tv, Ti}
+mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
     """
     Final matrix data
     """
@@ -82,9 +82,12 @@ $(SIGNATURES)
 
 Create ExtendableSparseMatrix from SparseMatrixCSC
 """
-
 function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
-    return ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc))
+    ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc))
+end
+
+function ExtendableSparseMatrix{Tv,Ti}(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
+    ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc))
 end
 
 """
@@ -111,12 +114,15 @@ ExtendableSparseMatrix(A::AbstractMatrix) = ExtendableSparseMatrix(sparse(A))
 Create ExtendableSparseMatrix from triplet (COO) data.
 """
 ExtendableSparseMatrix(I, J, V::AbstractVector) = ExtendableSparseMatrix(sparse(I, J, V))
+
 function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n)
     ExtendableSparseMatrix(sparse(I, J, V, m, n))
 end
+
 function ExtendableSparseMatrix(I, J, V::AbstractVector, combine::Function)
     ExtendableSparseMatrix(sparse(I, J, V, combine))
 end
+
 function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n, combine::Function)
     ExtendableSparseMatrix(sparse(I, J, V, m, n, combine))
 end
@@ -136,17 +142,6 @@ end
 
 
 
-"""
-$(SIGNATURES)
-
- Create SparseMatrixCSC from ExtendableSparseMatrix
-"""
-function SparseArrays.SparseMatrixCSC(A::ExtendableSparseMatrix)
-    flush!(A)
-    A.cscmatrix
-end
-
-
 """
 $(SIGNATURES)
 
@@ -285,42 +280,7 @@ function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti},
     end
 end
 
-"""
-$(SIGNATURES)
 
-Size of ExtendableSparseMatrix.
-"""
-Base.size(ext::ExtendableSparseMatrix) = (ext.cscmatrix.m, ext.cscmatrix.n)
-
-"""
-$(SIGNATURES)
-
-Show matrix
-"""
-function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrix)
-    flush!(ext)
-    xnnz = nnz(ext)
-    m, n = size(ext)
-    print(io,
-          m,
-          "×",
-          n,
-          " ",
-          typeof(ext),
-          " with ",
-          xnnz,
-          " stored ",
-          xnnz == 1 ? "entry" : "entries")
-
-    if !haskey(io, :compact)
-        io = IOContext(io, :compact => true)
-    end
-
-    if !(m == 0 || n == 0 || xnnz == 0)
-        print(io, ":\n")
-        Base.print_array(IOContext(io), ext.cscmatrix)
-    end
-end
 
 """
 $(SIGNATURES)
@@ -336,276 +296,25 @@ function flush!(ext::ExtendableSparseMatrix)
     end
     return ext
 end
-"""
-$(SIGNATURES)
-
-Reset ExtenableSparseMatrix into state similar to that after creation.
-"""
-function reset!(A::ExtendableSparseMatrix)
-    A.cscmatrix=spzeros(size(A)...)
-    A.lnkmatrix=nothing
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and return number of nonzeros in ext.cscmatrix.
-"""
-function SparseArrays.nnz(ext::ExtendableSparseMatrix)
-    flush!(ext)
-    return nnz(ext.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and return nonzeros in ext.cscmatrix.
-"""
-function SparseArrays.nonzeros(ext::ExtendableSparseMatrix)
-    flush!(ext)
-    return nonzeros(ext.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-
-Return element type.
-"""
-Base.eltype(::ExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti} = Tv
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and return rowvals in ext.cscmatrix.
-"""
-function SparseArrays.rowvals(ext::ExtendableSparseMatrix)
-    flush!(ext)
-    rowvals(ext.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and return colptr of  in ext.cscmatrix.
-"""
-function SparseArrays.getcolptr(ext::ExtendableSparseMatrix)
-    flush!(ext)
-    return getcolptr(ext.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and return findnz(ext.cscmatrix).
-"""
-function SparseArrays.findnz(ext::ExtendableSparseMatrix)
-    flush!(ext)
-    return findnz(ext.cscmatrix)
-end
-
-@static if VERSION >= v"1.7"
-    function SparseArrays._checkbuffers(ext::ExtendableSparseMatrix)
-        flush!(ext)
-        SparseArrays._checkbuffers(ext.cscmatrix)
-    end
-end
-
-"""
-    A\b
-
-[`\\`](@ref) for ExtendableSparse. It calls the LU factorization form Sparspak.jl, unless GPL components
-are allowed  in the Julia sysimage and the floating point type of the matrix is Float64 or Complex64.
-In that case, Julias standard `\` is called, which is realized via UMFPACK.
-"""
-function LinearAlgebra.:\(ext::ExtendableSparseMatrix{Tv, Ti},
-                          b::AbstractVector) where {Tv, Ti}
-    flush!(ext)
-    SparspakLU(ext) \ b
-end
-
-"""
-$(SIGNATURES)
-
-[`\\`](@ref) for Symmetric{ExtendableSparse}
-"""
-function LinearAlgebra.:\(symm_ext::Symmetric{Tm, ExtendableSparseMatrix{Tm, Ti}},
-                          b::AbstractVector) where {Tm, Ti}
-    symm_ext.data \ b # no ldlt yet ...
-end
-
-"""
-$(SIGNATURES)
-
-[`\\`](@ref) for Hermitian{ExtendableSparse}
-"""
-function LinearAlgebra.:\(symm_ext::Hermitian{Tm, ExtendableSparseMatrix{Tm, Ti}},
-                          b::AbstractVector) where {Tm, Ti}
-    symm_ext.data \ B # no ldlt yet ...
-end
-
-if USE_GPL_LIBS
-    for (Tv) in (:Float64, :ComplexF64)
-        @eval begin function LinearAlgebra.:\(ext::ExtendableSparseMatrix{$Tv, Ti},
-                                              B::AbstractVector) where {Ti}
-            flush!(ext)
-            ext.cscmatrix \ B
-        end end
-
-        @eval begin function LinearAlgebra.:\(symm_ext::Symmetric{$Tv,
-                                                                  ExtendableSparseMatrix{
-                                                                                         $Tv,
-                                                                                         Ti
-                                                                                         }},
-                                              B::AbstractVector) where {Ti}
-            flush!(symm_ext.data)
-            symm_csc = Symmetric(symm_ext.data.cscmatrix, Symbol(symm_ext.uplo))
-            symm_csc \ B
-        end end
-
-        @eval begin function LinearAlgebra.:\(symm_ext::Hermitian{$Tv,
-                                                                  ExtendableSparseMatrix{
-                                                                                         $Tv,
-                                                                                         Ti
-                                                                                         }},
-                                              B::AbstractVector) where {Ti}
-            flush!(symm_ext.data)
-            symm_csc = Hermitian(symm_ext.data.cscmatrix, Symbol(symm_ext.uplo))
-            symm_csc \ B
-        end end
-    end
-end # USE_GPL_LIBS
 
-"""
-$(SIGNATURES)
 
-[`flush!`](@ref) and ldiv with ext.cscmatrix
-"""
-function LinearAlgebra.ldiv!(r, ext::ExtendableSparse.ExtendableSparseMatrix, x)
+function SparseArrays.sparse(ext::ExtendableSparseMatrix)
     flush!(ext)
-    return LinearAlgebra.ldiv!(r, ext.cscmatrix, x)
+    ext.cscmatrix
 end
 
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and multiply with ext.cscmatrix
-"""
-function LinearAlgebra.mul!(r, ext::ExtendableSparse.ExtendableSparseMatrix, x)
-    flush!(ext)
-    return LinearAlgebra.mul!(r, ext.cscmatrix, x)
-end
 
 """
 $(SIGNATURES)
 
-[`flush!`](@ref) and calculate norm from cscmatrix
-"""
-function LinearAlgebra.norm(A::ExtendableSparseMatrix, p::Real = 2)
-    flush!(A)
-    return LinearAlgebra.norm(A.cscmatrix, p)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and calculate opnorm from cscmatrix
-"""
-function LinearAlgebra.opnorm(A::ExtendableSparseMatrix, p::Real = 2)
-    flush!(A)
-    return LinearAlgebra.opnorm(A.cscmatrix, p)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and calculate cond from cscmatrix
-"""
-function LinearAlgebra.cond(A::ExtendableSparseMatrix, p::Real = 2)
-    flush!(A)
-    return LinearAlgebra.cond(A.cscmatrix, p)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and check for symmetry of cscmatrix
-"""
-function LinearAlgebra.issymmetric(A::ExtendableSparseMatrix)
-    flush!(A)
-    return LinearAlgebra.issymmetric(A.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-
-Add SparseMatrixCSC matrix and [`ExtendableSparseMatrix`](@ref)  ext.
-"""
-function Base.:+(ext::ExtendableSparseMatrix, csc::SparseMatrixCSC)
-    flush!(ext)
-    return ext.cscmatrix + csc
-end
-
-function Base.:+(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix)
-    flush!(A)
-    flush!(B)
-    return ExtendableSparseMatrix(A.cscmatrix + B.cscmatrix)
-end
-
-function Base.:-(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix)
-    flush!(A)
-    flush!(B)
-    return ExtendableSparseMatrix(A.cscmatrix - B.cscmatrix)
-end
-
-function Base.:*(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix)
-    flush!(A)
-    flush!(B)
-    return ExtendableSparseMatrix(A.cscmatrix * B.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-"""
-function Base.:*(d::Diagonal, ext::ExtendableSparseMatrix)
-    flush!(ext)
-    return ExtendableSparseMatrix(d * ext.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-"""
-function Base.:*(ext::ExtendableSparseMatrix, d::Diagonal)
-    flush!(ext)
-    return ExtendableSparseMatrix(ext.cscmatrix * d)
-end
-
-"""
-$(SIGNATURES)
-
-Subtract  SparseMatrixCSC matrix from  [`ExtendableSparseMatrix`](@ref)  ext.
+Reset ExtenableSparseMatrix into state similar to that after creation.
 """
-function Base.:-(ext::ExtendableSparseMatrix, csc::SparseMatrixCSC)
-    flush!(ext)
-    return ext.cscmatrix - csc
+function reset!(A::ExtendableSparseMatrix)
+    A.cscmatrix=spzeros(size(A)...)
+    A.lnkmatrix=nothing
 end
 
-"""
-$(SIGNATURES)
-
-Subtract  [`ExtendableSparseMatrix`](@ref)  ext from  SparseMatrixCSC.
-"""
-function Base.:-(csc::SparseMatrixCSC, ext::ExtendableSparseMatrix)
-    flush!(ext)
-    return csc - ext.cscmatrix
-end
 
-"""
-$(SIGNATURES)
-"""
-function SparseArrays.dropzeros!(ext::ExtendableSparseMatrix)
-    flush!(ext)
-    dropzeros!(ext.cscmatrix)
-end
 
 """
 $(SIGNATURES)
@@ -652,19 +361,3 @@ function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti}
 end
 
 
-function mark_dirichlet(A::ExtendableSparseMatrix;penalty=1.0e20)
-    flush!(A)
-    mark_dirichlet(A.cscmatrix;penalty)
-end
-
-function eliminate_dirichlet(A::ExtendableSparseMatrix,dirichlet)
-    flush!(A)
-    ExtendableSparseMatrix(eliminate_dirichlet(A.cscmatrix,dirichlet))
-end
-
-function eliminate_dirichlet!(A::ExtendableSparseMatrix,dirichlet)
-    flush!(A)
-    eliminate_dirichlet!(A.cscmatrix,dirichlet)
-    A
-end
-

From 94250d1ecd70ce027936eabebbdd21223f8a891f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Thu, 23 May 2024 11:11:52 +0200
Subject: [PATCH 22/44] add Experimental tests to CI

---
 test/ExperimentalDict.jl         |  1 -
 test/ExperimentalParallelDict.jl |  1 -
 test/runtests.jl                 | 12 ++++++++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/test/ExperimentalDict.jl b/test/ExperimentalDict.jl
index a8b0375..7bb8211 100644
--- a/test/ExperimentalDict.jl
+++ b/test/ExperimentalDict.jl
@@ -1,7 +1,6 @@
 module ExperimentalDict
 
 using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
-using DocStringExtensions
 using BenchmarkTools
 using Test
 
diff --git a/test/ExperimentalParallelDict.jl b/test/ExperimentalParallelDict.jl
index 96d2d95..6d817a0 100644
--- a/test/ExperimentalParallelDict.jl
+++ b/test/ExperimentalParallelDict.jl
@@ -1,7 +1,6 @@
 module ExperimentalParallelDict
 
 using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
-using DocStringExtensions
 using BenchmarkTools
 using Test
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 4d7ae07..1c63f1c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -8,6 +8,18 @@ using BenchmarkTools
 using MultiFloats
 using ForwardDiff
 
+@testset "ExperimentalDict" begin
+    include("ExperimentalDict.jl")
+    ExperimentalDict.test_correctness_build(100)
+end
+
+@testset "ExperimentalParallelDict" begin
+    include("ExperimentalParallelDict.jl")
+    ExperimentalParallelDict.test_correctness_update(200)
+    ExperimentalParallelDict.test_correctness_build(200)
+    ExperimentalParallelDict.test_correctness_mul(200)
+end
+
 @testset "Constructors" begin include("test_constructors.jl") end
 
 @testset "Copy-Methods" begin include("test_copymethods.jl") end

From e6887fc5576b13b24800051b976225baee033b3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Thu, 23 May 2024 11:41:16 +0200
Subject: [PATCH 23/44] add ExperimentalParallelLocking to tests

---
 src/matrix/extendable.jl                      |  6 +++-
 ...llel.jl => ExperimentalParallelLocking.jl} | 33 +++++++------------
 test/runtests.jl                              | 15 +++++++++
 3 files changed, 32 insertions(+), 22 deletions(-)
 rename test/{ExperimentalParallel.jl => ExperimentalParallelLocking.jl} (85%)

diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl
index a655c12..572227e 100644
--- a/src/matrix/extendable.jl
+++ b/src/matrix/extendable.jl
@@ -30,7 +30,11 @@ mutable struct Locking
     locking::Bool
 end
 
-const locking=Locking(true)
+#
+# Locking functionality just for developing parallelization.
+# To be removed before merging into main branch.
+#
+const locking=Locking(false)
 
 function with_locking!(l::Bool)
     global locking
diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallelLocking.jl
similarity index 85%
rename from test/ExperimentalParallel.jl
rename to test/ExperimentalParallelLocking.jl
index fa42d1d..4ad7ff2 100644
--- a/test/ExperimentalParallel.jl
+++ b/test/ExperimentalParallelLocking.jl
@@ -1,6 +1,8 @@
+module ExperimentalParallelLocking
+
 using ExtendableSparse,SparseArrays
+using ExtendableSparse: with_locking!
 using ExtendableSparse.Experimental
-using DocStringExtensions
 using BenchmarkTools
 using Test
 
@@ -11,6 +13,7 @@ Test correctness of parallel assembly on NxN grid  during
 update phase, assuming that the structure has been assembled.
 """
 function test_correctness_update(N)
+    with_locking!(true)
     X=1:N
     Y=1:N
     A=ExtendableSparseMatrix(N^2,N^2)
@@ -29,6 +32,7 @@ function test_correctness_update(N)
         partassemble!(A,X,Y, np)
         @test nonzeros(A)≈nz
     end
+    with_locking!(false)
 end
 
 """
@@ -38,6 +42,7 @@ Test correctness of parallel assembly on NxN grid  during
 build phase, assuming that no structure has been assembled.
 """
 function test_correctness_build(N)
+    with_locking!(true)
     X=1:N
     Y=1:N
     allnp=[4,5,6,7,8]
@@ -52,20 +57,10 @@ function test_correctness_build(N)
         partassemble!(A,X,Y, np)
         @test nonzeros(A)≈nz
     end
+    with_locking!(false)
 end
 
 
-@testset "update correctness" begin
-    test_correctness_update(50)
-    test_correctness_update(100)
-    test_correctness_update(rand(30:200))
-end
-
-@testset "build correctness" begin
-    test_correctness_build(50)
-    test_correctness_build(100)
-    test_correctness_build(rand(30:200))
-end
 
 """
     speedup_update(N)
@@ -74,6 +69,7 @@ Benchmark parallel speedup of update phase of parallel assembly on NxN grid.
 Check for correctness as well.
 """
 function speedup_update(N; allnp=[4,5,6,7,8,9,10])
+    with_locking!(true)
     X=1:N
     Y=1:N
     A=ExtendableSparseMatrix(N^2,N^2)
@@ -90,18 +86,10 @@ function speedup_update(N; allnp=[4,5,6,7,8,9,10])
         @assert nonzeros(A)≈nz
         push!(result,(np,round(t0/t,digits=2)))
     end
+    with_locking!(false)
     result
 end
 
-"""
-    reset!(A)
-
-Reset ExtenableSparseMatrix into state similar to that after creation.
-"""
-function ExtendableSparse.reset!(A::ExtendableSparseMatrix)
-    A.cscmatrix=spzeros(size(A)...)
-    A.lnkmatrix=nothing
-end
 
 """
     speedup_build(N)
@@ -112,6 +100,7 @@ Check for correctness as well.
 Works in the moment with locking.
 """
 function speedup_build(N; allnp=[4,5,6,7,8,9,10])
+    with_locking!(true)
     X=1:N
     Y=1:N
     A=ExtendableSparseMatrix(N^2,N^2)
@@ -133,5 +122,7 @@ function speedup_build(N; allnp=[4,5,6,7,8,9,10])
         @assert nonzeros(A)≈nz
         push!(result,(np,round(t0/t,digits=2)))
     end
+    with_locking!(false)
     result
 end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 1c63f1c..e7dc9d7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -8,6 +8,21 @@ using BenchmarkTools
 using MultiFloats
 using ForwardDiff
 
+
+@testset "ExperimentalParallelLocking" begin
+    include("ExperimentalParallelLocking.jl")
+    @testset "update correctness" begin
+        ExperimentalParallelLocking.test_correctness_update(50)
+        ExperimentalParallelLocking.test_correctness_update(100)
+        ExperimentalParallelLocking.test_correctness_update(rand(30:200))
+    end
+    
+    @testset "build correctness" begin
+        ExperimentalParallelLocking.test_correctness_build(50)
+        ExperimentalParallelLocking.test_correctness_build(100)
+        ExperimentalParallelLocking.test_correctness_build(rand(30:200))
+    end
+end
 @testset "ExperimentalDict" begin
     include("ExperimentalDict.jl")
     ExperimentalDict.test_correctness_build(100)

From 58fbb32d8ae5341bf7fdd5b9f4939adae0f854a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Sun, 26 May 2024 19:57:46 +0200
Subject: [PATCH 24/44] Generic extendable sparse matrix structs for parallel,
 scalar

---
 src/experimental/Experimental.jl              |  25 +-
 .../extendablesparsematrixdict.jl             | 101 ----
 .../extendablesparsematrixparallel.jl         | 159 ++++++
 .../extendablesparsematrixparalleldict.jl     | 194 --------
 .../extendablesparsematrixscalar.jl           |  75 +++
 src/experimental/parallel_testtools.jl        |   4 +-
 src/experimental/sparsematrixdict.jl          |  82 +++-
 src/experimental/sparsematrixlnkdict.jl       | 452 ++++++++++++++++++
 ...mental_rect.jl => ExperimentalParallel.jl} |   0
 9 files changed, 782 insertions(+), 310 deletions(-)
 delete mode 100644 src/experimental/extendablesparsematrixdict.jl
 create mode 100644 src/experimental/extendablesparsematrixparallel.jl
 delete mode 100644 src/experimental/extendablesparsematrixparalleldict.jl
 create mode 100644 src/experimental/extendablesparsematrixscalar.jl
 create mode 100644 src/experimental/sparsematrixlnkdict.jl
 rename test/{experimental_rect.jl => ExperimentalParallel.jl} (100%)

diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
index e9e64e9..37a34d1 100644
--- a/src/experimental/Experimental.jl
+++ b/src/experimental/Experimental.jl
@@ -41,13 +41,34 @@ export     reorderlinsys, nnz_noflush
 include("sparsematrixdict.jl")
 export SparseMatrixDict
 
-include("extendablesparsematrixdict.jl")
+include("sparsematrixlnkdict.jl")
+export SparseMatrixLNKDict
+
+include("extendablesparsematrixscalar.jl")
+export ExtendableSparseMatrixScalar
+
+const ExtendableSparseMatrixDict{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixDict{Tv,Ti},Tv,Ti}
 export ExtendableSparseMatrixDict
 
-include("extendablesparsematrixparalleldict.jl")
+
+const ExtendableSparseMatrixLNKDict{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNKDict{Tv,Ti},Tv,Ti}
+export ExtendableSparseMatrixLNKDict
+
+const ExtendableSparseMatrixLNK{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNK{Tv,Ti},Tv,Ti}
+export ExtendableSparseMatrixLNK
+
+
+include("extendablesparsematrixparallel.jl")
+const ExtendableSparseMatrixParallelDict{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixDict{Tv,Ti},Tv,Ti}
+ExtendableSparseMatrixParallelDict(m,n,p)= ExtendableSparseMatrixParallelDict{Float64,Int64}(m,n,p)
 export ExtendableSparseMatrixParallelDict, partcolors!
 
 
+const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixLNKDict{Tv,Ti},Tv,Ti}
+ExtendableSparseMatrixParallelLNKDict(m,n,p)= ExtendableSparseMatrixParallelLNKDict{Float64,Int64}(m,n,p)
+export ExtendableSparseMatrixParallelLNKDict, partcolors!
+
+
 include("parallel_testtools.jl")
 export part2d, showgrid, partassemble!,  assemblepartition!
 
diff --git a/src/experimental/extendablesparsematrixdict.jl b/src/experimental/extendablesparsematrixdict.jl
deleted file mode 100644
index 6641e5a..0000000
--- a/src/experimental/extendablesparsematrixdict.jl
+++ /dev/null
@@ -1,101 +0,0 @@
-mutable struct ExtendableSparseMatrixDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
-    """
-    Final matrix data
-    """
-    cscmatrix::SparseMatrixCSC{Tv, Ti}
-
-    """
-        Vector of dictionaries for new entries
-    """
-    dictmatrix::SparseMatrixDict{Tv,Ti}
-end
-
-
-function ExtendableSparseMatrixDict{Tv, Ti}(n::Integer,m::Integer) where{Tv, Ti<:Integer}
-    ExtendableSparseMatrixDict(spzeros(Tv, Ti, m, n),
-                               SparseMatrixDict{Tv,Ti}(m,n)
-                               )
-end
-
-ExtendableSparseMatrixDict(n::Integer,m::Integer)=ExtendableSparseMatrixDict{Float64,Int}(n,m)
-
-function reset!(ext::ExtendableSparseMatrixDict{Tv,Ti}) where {Tv,Ti}
-    m,n=size(ext.cscmatrix)
-    ext.cscmatrix=spzeros(Tv, Ti, m, n)
-    ext.dictmatrix=SparseMatrixDict{Tv,Ti}(m,n)
-    ext
-end
-
-
-function flush!(ext::ExtendableSparseMatrixDict{Tv,Ti}) where{Tv,Ti}
-    lnew=length(ext.dictmatrix.values)
-    if lnew>0
-        (;colptr,nzval,rowval,m,n)=ext.cscmatrix
-        l=lnew+nnz(ext.cscmatrix)
-        I=Vector{Ti}(undef,l)
-        J=Vector{Ti}(undef,l)
-        V=Vector{Tv}(undef,l)
-        i=1
-        for icsc=1:length(colptr)-1
-            for j=colptr[icsc]:colptr[icsc+1]-1
-                I[i]=icsc
-                J[i]=rowval[j]
-                V[i]=nzval[j]
-                i=i+1
-            end            
-        end
-
-        for (p,v) in ext.dictmatrix.values
-	    I[i]=first(p)
-	    J[i]=last(p)
-	    V[i]=v
-	    i=i+1
-        end
-        
-        ext.dictmatrix=SparseMatrixDict{Tv,Ti}(m,n)
-        ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+)
-    end
-    ext
-end
-    
-function SparseArrays.sparse(ext::ExtendableSparseMatrixDict)
-    flush!(ext)
-    ext.cscmatrix
-end
-
-function Base.setindex!(ext::ExtendableSparseMatrixDict{Tv, Ti},
-                        v::Union{Number,AbstractVecOrMat},
-                        i::Integer,
-                        j::Integer) where {Tv, Ti}
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        ext.cscmatrix.nzval[k] = v
-    else
-        setindex!(ext.dictmatrix,v,i,j)
-    end
-end
-
-
-function Base.getindex(ext::ExtendableSparseMatrixDict{Tv, Ti},
-                       i::Integer,
-                       j::Integer) where {Tv, Ti <: Integer}
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        ext.cscmatrix.nzval[k]
-    else
-        getindex(ext.dictmatrix,i,j)
-    end
-end
-
-function rawupdateindex!(ext::ExtendableSparseMatrixDict{Tv, Ti},
-                         op,
-                         v,
-                         i,
-                         j) where {Tv, Ti <: Integer}
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
-    else
-        rawupdateindex!(ext.dictmatrix,op,v,i,j)
-    end
-end
diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl
new file mode 100644
index 0000000..49566b1
--- /dev/null
+++ b/src/experimental/extendablesparsematrixparallel.jl
@@ -0,0 +1,159 @@
+mutable struct ExtendableSparseMatrixXParallel{Tm, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
+    """
+    Final matrix data
+    """
+    cscmatrix::SparseMatrixCSC{Tv, Ti}
+
+    """
+        Vector of dictionaries for new entries
+    """
+    xmatrices::Vector{Tm}
+
+    nodeparts::Vector{Ti}
+    partnodes::Vector{Vector{Ti}}
+    colparts::Vector{Vector{Ti}}
+end
+
+
+function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m,p::Integer) where{Tm, Tv, Ti}
+    ExtendableSparseMatrixXParallel(spzeros(Tv, Ti, m, n),
+                                    [Tm(m,n) for i=1:p],
+                                    zeros(Ti,n),
+                                    Vector{Ti}[],
+                                    Vector{Ti}[]
+                                    )
+end
+
+function partcolors!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}, partcolors) where {Tm, Tv, Ti}
+    ncol=maximum(partcolors)
+    colparts=[Ti[] for i=1:ncol]
+    for i=1:length(partcolors)
+        push!(colparts[partcolors[i]],i)
+    end
+    ext.colparts=colparts
+    ext
+end
+
+function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m, pc::Vector) where{Tm, Tv, Ti}
+    ext=ExtendableSparseMatrixXParallel(m,n,length(pc))
+    partcolors!(ext,pc)
+end
+
+
+function reset!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti},p::Integer) where {Tm,Tv,Ti}
+    m,n=size(ext.cscmatrix)
+    ext.cscmatrix=spzeros(Tv, Ti, m, n)
+    ext.xmatrices=[Tm(m,n) for i=1:p]
+    ext.nodeparts.=zero(Ti)
+    ext
+end
+
+function reset!(ext::ExtendableSparseMatrixXParallel)
+    reset!(ext,length(ext.xmatrices))
+end
+
+function reset!(ext::ExtendableSparseMatrixXParallel,pc::Vector)
+    reset!(ext,length(pc))
+    partcolors!(ext,pc)
+end
+
+function flush!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}) where{Tm,Tv,Ti}
+    ext.cscmatrix=sum!(ext.nodeparts, ext.xmatrices, ext.cscmatrix)
+    np=length(ext.xmatrices)
+    (m,n)=size(ext.cscmatrix)
+    ext.xmatrices=[Tm(m,n) for i=1:np]
+ 
+    npts::Vector{Ti}=ext.nodeparts
+    pn=zeros(Ti,np)
+    for i=1:n
+        npi=npts[i]
+        if npi>0
+            pn[npi]+=1
+        end
+    end
+    partnodes=[zeros(Int,pn[i]) for i=1:np]
+    pn.=1
+    for i=1:n
+        npi=ext.nodeparts[i]
+        if npi>0
+            partnodes[npi][pn[npi]]=i
+            pn[npi]+=1
+        end
+    end
+    ext.partnodes=partnodes
+    ext
+end
+
+
+function SparseArrays.sparse(ext::ExtendableSparseMatrixXParallel)
+    flush!(ext)
+    ext.cscmatrix
+end
+
+
+
+function Base.setindex!(ext::ExtendableSparseMatrixXParallel,
+                        v::Union{Number,AbstractVecOrMat},
+                        i::Integer,
+                        j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = v
+    else
+        error("use rawupdateindex! for new entries into ExtendableSparseMatrixXParallel")
+    end
+end
+
+
+function Base.getindex(ext::ExtendableSparseMatrixXParallel,
+                       i::Integer,
+                       j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        return ext.cscmatrix.nzval[k]
+    elseif sum(nnz,ext.xmatrices) == 0
+        return zero(eltype(ext.cscmatrix))
+    else
+        error("flush! ExtendableSparseMatrixXParallel before using getindex")
+    end
+end
+
+function rawupdateindex!(ext::ExtendableSparseMatrixXParallel,
+                         op,
+                         v,
+                         i,
+                         j,
+                         tid)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        rawupdateindex!(ext.xmatrices[tid],op,v,i,j)
+    end
+end
+
+function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x)
+    A=ext.cscmatrix
+    colparts=ext.colparts
+    partnodes=ext.partnodes
+    rows = SparseArrays.rowvals(A)
+    vals = nonzeros(A)
+    
+    r.=zero(Tv)
+    m,n=size(A)
+    for icol=1:length(colparts)
+        part=colparts[icol]
+        @tasks for ip=1:length(part)
+            @inbounds begin
+                for j in partnodes[part[ip]]
+                    for i in nzrange(A,j)
+                        row = rows[i]
+                        val = vals[i]
+                        r[row]+=val*x[j]
+                    end
+                end
+            end
+        end
+    end
+    r
+end
diff --git a/src/experimental/extendablesparsematrixparalleldict.jl b/src/experimental/extendablesparsematrixparalleldict.jl
deleted file mode 100644
index 241964d..0000000
--- a/src/experimental/extendablesparsematrixparalleldict.jl
+++ /dev/null
@@ -1,194 +0,0 @@
-mutable struct ExtendableSparseMatrixParallelDict{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
-    """
-    Final matrix data
-    """
-    cscmatrix::SparseMatrixCSC{Tv, Ti}
-
-    """
-        Vector of dictionaries for new entries
-    """
-    dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}
-
-    nodeparts::Vector{Ti}
-    partnodes::Vector{Vector{Ti}}
-    colparts::Vector{Vector{Ti}}
-end
-
-
-function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,p::Integer) where{Tv, Ti}
-    ExtendableSparseMatrixParallelDict(spzeros(Tv, Ti, m, n),
-                                       [SparseMatrixDict{Tv,Ti}(m,n) for i=1:p],
-                                       zeros(Ti,n),
-                                       Vector{Ti}[],
-                                       Vector{Ti}[]
-                                       )
-end
-
-function partcolors!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, partcolors) where {Tv, Ti}
-    ncol=maximum(partcolors)
-    colparts=[Ti[] for i=1:ncol]
-    for i=1:length(partcolors)
-        push!(colparts[partcolors[i]],i)
-    end
-    ext.colparts=colparts
-    ext
-end
-
-function ExtendableSparseMatrixParallelDict{Tv, Ti}(n,m,pc::Vector) where{Tv, Ti}
-    ext=ExtendableSparseMatrixParallelDict(m,n,length(pc))
-    partcolors!(ext,pc)
-end
-
-
-ExtendableSparseMatrixParallelDict(n,m,p)=ExtendableSparseMatrixParallelDict{Float64,Int}(n,m,p)
-
-
-function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},p::Integer) where {Tv,Ti}
-    m,n=size(ext.cscmatrix)
-    ext.cscmatrix=spzeros(Tv, Ti, m, n)
-    ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:p]
-    ext.nodeparts.=zero(Ti)
-    ext
-end
-
-function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where {Tv,Ti}
-    reset!(ext,length(ext.dictmatrices))
-end
-
-function reset!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti},pc::Vector) where {Tv,Ti}
-    reset!(ext,length(pc))
-    partcolors!(ext,pc)
-end
-
-
-function flush!(ext::ExtendableSparseMatrixParallelDict{Tv,Ti}) where{Tv,Ti}
-    lnew=sumlength(ext.dictmatrices)
-    if lnew>0
-        (;colptr,nzval,rowval,m,n)=ext.cscmatrix
-        l=lnew+nnz(ext.cscmatrix)
-        I=Vector{Ti}(undef,l)
-        J=Vector{Ti}(undef,l)
-        V=Vector{Tv}(undef,l)
-        i=1
-        
-        for icsc=1:length(colptr)-1
-            for j=colptr[icsc]:colptr[icsc+1]-1
-                I[i]=icsc
-                J[i]=rowval[j]
-                V[i]=nzval[j]
-                i=i+1
-            end            
-        end
-
-        ip=1
-        for m in ext.dictmatrices
-            for (p,v) in m.values
-                ext.nodeparts[last(p)]=ip
-	        I[i]=first(p)
-	        J[i]=last(p)
-	        V[i]=v
-	        i=i+1
-            end
-            ip=ip+1
-        end
-        
-
-        np=length(ext.dictmatrices)
-        ext.dictmatrices=[SparseMatrixDict{Tv,Ti}(m,n) for i=1:np]
-        ext.cscmatrix=SparseArrays.sparse!(I,J,V,m,n,+)
-
-        npts::Vector{Ti}=ext.nodeparts
-        pn=zeros(Ti,np)
-        for i=1:n
-            npi=npts[i]
-            if npi>0
-                pn[npi]+=1
-            end
-        end
-            partnodes=[zeros(Int,pn[i]) for i=1:np]
-        pn.=1
-        for i=1:n
-            npi=ext.nodeparts[i]
-            if npi>0
-                partnodes[npi][pn[npi]]=i
-                pn[npi]+=1
-            end
-        end
-        ext.partnodes=partnodes
-    end
-    ext
-end
-
-function SparseArrays.sparse(ext::ExtendableSparseMatrixParallelDict)
-    flush!(ext)
-    ext.cscmatrix
-end
-
-
-
-function Base.setindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
-                        v::Union{Number,AbstractVecOrMat},
-                        i::Integer,
-                        j::Integer) where {Tv, Ti}
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        ext.cscmatrix.nzval[k] = v
-    else
-        error("use rawupdateindex! for new entries into ExtendableSparseMatrixParallelDict")
-    end
-end
-
-
-function Base.getindex(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
-                       i::Integer,
-                       j::Integer) where {Tv, Ti <: Integer}
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        return ext.cscmatrix.nzval[k]
-    elseif sumlength(ext.dictmatrices) == 0
-        return zero(Tv)
-    else
-        error("flush! ExtendableSparseMatrixParallelDict before using getindex")
-    end
-end
-
-function rawupdateindex!(ext::ExtendableSparseMatrixParallelDict{Tv, Ti},
-                         op,
-                         v,
-                         i,
-                         j,
-                         tid) where {Tv, Ti <: Integer}
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
-    else
-        rawupdateindex!(ext.dictmatrices[tid],op,v,i,j)
-    end
-end
-
-function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixParallelDict{Tv,Ti}, x) where {Tv,Ti}
-    A=ext.cscmatrix
-    colparts=ext.colparts
-    partnodes=ext.partnodes
-    rows = rowvals(A)
-    vals = nonzeros(A)
-
-    r.=zero(Tv)
-    m,n=size(A)
-    for icol=1:length(colparts)
-        part=colparts[icol]
-        @tasks for ip=1:length(part)
-            @inbounds begin
-                for j in partnodes[part[ip]]
-                    for i in nzrange(A,j)
-                        row = rows[i]
-                        val = vals[i]
-                        r[row]+=val*x[j]
-                    end
-                end
-            end
-        end
-    end
-    r
-end
-
diff --git a/src/experimental/extendablesparsematrixscalar.jl b/src/experimental/extendablesparsematrixscalar.jl
new file mode 100644
index 0000000..d7fdc67
--- /dev/null
+++ b/src/experimental/extendablesparsematrixscalar.jl
@@ -0,0 +1,75 @@
+mutable struct ExtendableSparseMatrixScalar{Tm, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
+    """
+    Final matrix data
+    """
+    cscmatrix::SparseMatrixCSC{Tv, Ti}
+    
+    """
+    Matrix for new entries
+    """
+    xmatrix::Tm
+end
+
+
+function ExtendableSparseMatrixScalar{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm, Tv, Ti<:Integer}
+    ExtendableSparseMatrixScalar(spzeros(Tv, Ti, m, n),
+                                 Tm(m,n)
+                                 )
+end
+
+
+function reset!(ext::ExtendableSparseMatrixScalar{Tm,Tv,Ti}) where {Tm,Tv,Ti}
+    m,n=size(ext.cscmatrix)
+    ext.cscmatrix=spzeros(Tv, Ti, m, n)
+    ext.xmatrix=Tm(m,n)
+    ext
+end
+
+
+function flush!(ext::ExtendableSparseMatrixScalar{Tm,Tv,Ti}) where{Tm,Tv,Ti}
+    ext.cscmatrix=ext.xmatrix+ext.cscmatrix
+    ext.xmatrix=Tm(size(ext.cscmatrix)...)
+    ext
+end
+    
+function SparseArrays.sparse(ext::ExtendableSparseMatrixScalar)
+    flush!(ext)
+    ext.cscmatrix
+end
+
+function Base.setindex!(ext::ExtendableSparseMatrixScalar,
+                        v::Union{Number,AbstractVecOrMat},
+                        i::Integer,
+                        j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = v
+    else
+        setindex!(ext.xmatrix,v,i,j)
+    end
+end
+
+
+function Base.getindex(ext::ExtendableSparseMatrixScalar,
+                       i::Integer,
+                       j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k]
+    else
+        getindex(ext.xmatrix,i,j)
+    end
+end
+
+function rawupdateindex!(ext::ExtendableSparseMatrixScalar,
+                         op,
+                         v,
+                         i,
+                         j)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        rawupdateindex!(ext.xmatrix,op,v,i,j)
+    end
+end
diff --git a/src/experimental/parallel_testtools.jl b/src/experimental/parallel_testtools.jl
index 4f99283..fd261cb 100644
--- a/src/experimental/parallel_testtools.jl
+++ b/src/experimental/parallel_testtools.jl
@@ -179,7 +179,7 @@ function partassemble!(A,X,Y,nt=1;d=0.1)
 
     lindexes=LinearIndices((1:Nx,1:Ny))
     if nt==1
-	assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d)
+        assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d)
     else
         p=part2d(X,Y,nt)
         for icol=1:length(p)
@@ -192,7 +192,7 @@ function partassemble!(A,X,Y,nt=1;d=0.1)
 end
 
 
-function partassemble!(A::ExtendableSparseMatrixParallelDict,X,Y,nt=1;d=0.1, reset=true)
+function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict},X,Y,nt=1;d=0.1, reset=true)
     Nx=length(X)
     Ny=length(Y)
     size(A,1)==Nx*Ny || error("incompatible size of A")
diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl
index 2288c89..666aa10 100644
--- a/src/experimental/sparsematrixdict.jl
+++ b/src/experimental/sparsematrixdict.jl
@@ -1,3 +1,8 @@
+"""
+    $(TYPEDEF)    
+
+Sparse matrix where entries are organized as dictionary.
+"""
 mutable struct SparseMatrixDict{Tv,Ti} <: AbstractSparseMatrix{Tv,Ti}
     m::Ti
     n::Ti
@@ -24,26 +29,81 @@ end
 
 Base.size(m::SparseMatrixDict)=(m.m,m.n)
 
-flush!(m::SparseMatrixDict)=nothing
-
-sumlength(mv::Vector{SparseMatrixDict{Tv,Ti}}) where{Tv,Ti}=sum(m->length(m.values),mv)
-
-function SparseArrays.sparse(mv::Vector{SparseMatrixDict{Tv,Ti}}) where {Tv,Ti}
-    l=sumlength(mv)
+function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} 
+    l=length(m.values)
     I=Vector{Ti}(undef,l)
     J=Vector{Ti}(undef,l)
     V=Vector{Tv}(undef,l)
     i=1
-    for m in mv
-        for (p,v) in m.values
+    for (p,v) in m.values
+	I[i]=first(p)
+	J[i]=last(p)
+	V[i]=v
+	i=i+1
+    end
+    SparseArrays.sparse!(I,J,V,size(mv[1])...,+)
+end
+
+function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} 
+    lnew=length(dictmatrix.values)
+    if lnew>0
+        (;colptr,nzval,rowval,m,n)=cscmatrix
+        l=lnew+nnz(cscmatrix)
+        I=Vector{Ti}(undef,l)
+        J=Vector{Ti}(undef,l)
+        V=Vector{Tv}(undef,l)
+        i=1
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+        
+        for (p,v) in dictmatrix.values
 	    I[i]=first(p)
 	    J[i]=last(p)
 	    V[i]=v
 	    i=i+1
         end
+        return SparseArrays.sparse!(I,J,V,m,n,+)
     end
-    SparseArrays.sparse!(I,J,V,size(mv[1])...,+)
+    cscmatrix
 end
 
-
-SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} = sparse([m])
+function sum!(nodeparts, dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+    lnew=sum(m->length(m.values),dictmatrices)
+    if lnew>0
+        (;colptr,nzval,rowval,m,n)=cscmatrix
+        l=lnew+nnz(cscmatrix)
+        I=Vector{Ti}(undef,l)
+        J=Vector{Ti}(undef,l)
+        V=Vector{Tv}(undef,l)
+        i=1
+        
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+        
+        ip=1
+        for m in dictmatrices
+            for (p,v) in m.values
+                nodeparts[last(p)]=ip
+	        I[i]=first(p)
+	        J[i]=last(p)
+	        V[i]=v
+	        i=i+1
+            end
+            ip=ip+1
+        end
+        return SparseArrays.sparse!(I,J,V,m,n,+)
+    end
+    return cscmatrix
+end
diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl
new file mode 100644
index 0000000..07f25dc
--- /dev/null
+++ b/src/experimental/sparsematrixlnkdict.jl
@@ -0,0 +1,452 @@
+"""
+    $(TYPEDEF)
+
+Modification of SparseMatrixLNK where the pointer to first index of
+column j is stored in a dictionary.
+"""
+mutable struct SparseMatrixLNKDict{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti}
+    """
+    Number of rows
+    """
+    m::Ti
+
+    """
+    Number of columns
+    """
+    n::Ti
+
+    """
+    Number of nonzeros
+    """
+    nnz::Ti
+
+    """
+    Length of arrays
+    """
+    nentries::Ti
+
+    """
+    Linked list of column entries. Initial length is n,
+    it grows with each new entry.
+
+    colptr[index] contains the next
+    index in the list or zero, in the later case terminating the list which
+    starts at index 1<=j<=n for each column j.
+    """
+    colptr::Vector{Ti}
+
+    """
+    Dictionary to store start indices of columns
+    """
+    colstart::Dict{Ti,Ti}
+
+    """
+    Row numbers. For each index it contains the zero (initial state)
+    or the row numbers corresponding to the column entry list in colptr.
+    """
+    rowval::Vector{Ti}
+
+    """
+    Nonzero entry values correspondin to each pair
+    (colptr[index],rowval[index])
+    """
+    nzval::Vector{Tv}
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+function SparseMatrixLNKDict{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
+    SparseMatrixLNKDict{Tv, Ti}(m, n, 0, 0,  zeros(Ti,10), Dict{Ti,Ti}(), zeros(Ti,10), zeros(Ti,10))
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+function SparseMatrixLNKDict(valuetype::Type{Tv}, indextype::Type{Ti}, m,
+                         n) where {Tv, Ti <: Integer}
+    SparseMatrixLNKDict{Tv, Ti}(m, n)
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+SparseMatrixLNKDict(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixLNKDict(Tv, Int, m, n)
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+SparseMatrixLNKDict(m, n) = SparseMatrixLNKDict(Float64, m, n)
+
+"""
+$(SIGNATURES)
+    
+Constructor from SparseMatrixCSC.
+
+"""
+function SparseMatrixLNKDict(csc::SparseArrays.SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <:
+                                                                               Integer}
+    lnk = SparseMatrixLNKDict{Tv, Ti}(csc.m, csc.n)
+    for j = 1:(csc.n)
+        for k = csc.colptr[j]:(csc.colptr[j + 1] - 1)
+            lnk[csc.rowval[k], j] = csc.nzval[k]
+        end
+    end
+    lnk
+end
+
+function findindex(lnk::SparseMatrixLNKDict, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    k = get(lnk.colstart, j, 0)
+    if k==0
+        return 0,0
+    end
+    k0 = k
+    while k > 0
+        if lnk.rowval[k] == i
+            return k, 0
+        end
+        k0 = k
+        k = lnk.colptr[k]
+    end
+    return 0, k0
+end
+
+"""
+$(SIGNATURES)
+    
+Return value stored for entry or zero if not found
+"""
+function Base.getindex(lnk::SparseMatrixLNKDict{Tv, Ti}, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k == 0
+        return zero(Tv)
+    else
+        return lnk.nzval[k]
+    end
+end
+
+function addentry!(lnk::SparseMatrixLNKDict, i, j, k, k0)
+    # increase number of entries
+    lnk.nentries += 1
+    if length(lnk.nzval) < lnk.nentries
+        newsize = Int(ceil(5.0 * lnk.nentries / 4.0))
+        resize!(lnk.nzval, newsize)
+        resize!(lnk.rowval, newsize)
+        resize!(lnk.colptr, newsize)
+    end
+    
+    if k0==0
+        lnk.colstart[j]=lnk.nentries
+    end
+
+    # Append entry if not found
+    lnk.rowval[lnk.nentries] = i
+
+    # Shift the end of the list
+    lnk.colptr[lnk.nentries] = 0
+
+    if k0>0
+        lnk.colptr[k0] = lnk.nentries
+    end
+    
+    # Update number of nonzero entries
+    lnk.nnz += 1
+    return lnk.nentries
+end
+
+"""
+$(SIGNATURES)
+    
+Update value of existing entry, otherwise extend matrix if v is nonzero.
+"""
+function Base.setindex!(lnk::SparseMatrixLNKDict, v, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = v
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = v
+    end
+    return lnk
+end
+
+"""
+$(SIGNATURES)
+
+Update element of the matrix  with operation `op`. 
+It assumes that `op(0,0)==0`. If `v` is zero, no new 
+entry is created.
+"""
+function updateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+"""
+$(SIGNATURES)
+
+Update element of the matrix  with operation `op`. 
+It assumes that `op(0,0)==0`. If `v` is zero a new entry
+is created nevertheless.
+"""
+function rawupdateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+    else
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+"""
+$(SIGNATURES)
+
+Return tuple containing size of the matrix.
+"""
+Base.size(lnk::SparseMatrixLNKDict) = (lnk.m, lnk.n)
+
+"""
+$(SIGNATURES)
+
+Return number of nonzero entries.
+"""
+SparseArrays.nnz(lnk::SparseMatrixLNKDict) = lnk.nnz
+
+"""
+$(SIGNATURES)
+
+Dummy flush! method for SparseMatrixLNKDict. Just
+used in test methods
+"""
+function flush!(lnk::SparseMatrixLNKDict{Tv, Ti}) where {Tv, Ti}
+    return lnk
+end
+
+"""
+    $(SIGNATURES)
+Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V.
+"""
+function add_via_COO(lnk::SparseMatrixLNKDict{Tv, Ti},
+                     csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
+    (;colptr,nzval,rowval,m,n)=csc
+    l=nnz(lnk)+nnz(csc)
+    I=Vector{Ti}(undef,l)
+    J=Vector{Ti}(undef,l)
+    V=Vector{Tv}(undef,l)
+    i=1
+    for icsc=1:length(colptr)-1
+        for j=colptr[icsc]:colptr[icsc+1]-1
+            I[i]=icsc
+            J[i]=rowval[j]
+            V[i]=nzval[j]
+            i=i+1
+        end            
+    end
+    for (j,k) in lnk.colstart
+        while k>0
+            I[i]=lnk.rowval[k]
+            J[i]=j
+            V[i]=lnk.nzval[k]
+            k=lnk.colptr[k]
+            i=i+1
+        end
+    end
+    return SparseArrays.sparse!(I,J,V,m,n,+)
+end
+
+
+"""
+    $(SIGNATURES)
+Add lnk and csc without creation of intermediate data.
+"""
+function add_directly(lnk::SparseMatrixLNKDict{Tv, Ti},
+                      csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
+    @assert(csc.m==lnk.m)
+    @assert(csc.n==lnk.n)
+
+    # overallocate arrays in order to avoid
+    # presumably slower push!
+    xnnz = nnz(csc) + nnz(lnk)
+    colptr = Vector{Ti}(undef, csc.n + 1)
+    rowval = Vector{Ti}(undef, xnnz)
+    nzval = Vector{Tv}(undef, xnnz)
+
+    # Detect the maximum column length of lnk
+    lnk_maxcol = 0
+    for (j,k) in lnk.colstart
+        lcol = zero(Ti)
+        while k > 0
+            lcol += 1
+            k = lnk.colptr[k]
+        end
+        lnk_maxcol = max(lcol, lnk_maxcol)
+    end
+
+    # pre-allocate column  data
+    col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol]
+
+    inz = 1 # counts the nonzero entries in the new matrix
+
+    in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1])
+
+    in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col)
+
+    # loop over all columns
+    for j = 1:(csc.n)
+        # Copy extension entries into col and sort them
+        k = get(lnk.colstart, j, 0)
+        l_lnk_col = 0
+        while k > 0
+            if lnk.rowval[k] > 0
+                l_lnk_col += 1
+                col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k])
+            end
+            k = lnk.colptr[k]
+        end
+        sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward)
+
+        # jointly sort lnk and csc entries  into new matrix data
+        # this could be replaced in a more transparent manner by joint sorting:
+        # make a joint array for csc and lnk col, sort them.
+        # Will this be faster? 
+
+        colptr[j] = inz
+        jlnk = one(Ti) # counts the entries in col
+        jcsc = csc.colptr[j]  # counts entries in csc
+
+        while true
+            if in_csc_col(jcsc, j) &&
+               (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval ||
+                !in_lnk_col(jlnk, l_lnk_col))
+                # Insert entries from csc into new structure
+                rowval[inz] = csc.rowval[jcsc]
+                nzval[inz] = csc.nzval[jcsc]
+                jcsc += 1
+                inz += 1
+            elseif in_csc_col(jcsc, j) &&
+                   (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval)
+                # Add up entries from csc and lnk
+                rowval[inz] = csc.rowval[jcsc]
+                nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval
+                jcsc += 1
+                inz += 1
+                jlnk += 1
+            elseif in_lnk_col(jlnk, l_lnk_col)
+                # Insert entries from lnk res. col into new structure
+                rowval[inz] = col[jlnk].rowval
+                nzval[inz] = col[jlnk].nzval
+                jlnk += 1
+                inz += 1
+            else
+                break
+            end
+        end
+    end
+    colptr[csc.n + 1] = inz
+    resize!(rowval, inz - 1)
+    resize!(nzval, inz - 1)
+    SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval)
+end
+
+
+
+"""
+    $(SIGNATURES)
+
+Add SparseMatrixCSC matrix and [`SparseMatrixLNKDict`](@ref)  lnk, returning a SparseMatrixCSC
+"""
+Base.:+(lnk::SparseMatrixLNKDict, csc::SparseMatrixCSC) = add_directly(lnk, csc)
+
+function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+    lnew=sum(nnz,lnkdictmatrices)
+    if lnew>0
+        (;colptr,nzval,rowval,m,n)=cscmatrix
+        l=lnew+nnz(cscmatrix)
+        I=Vector{Ti}(undef,l)
+        J=Vector{Ti}(undef,l)
+        V=Vector{Tv}(undef,l)
+        i=1
+        
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+
+        ip=1
+        for lnk in lnkdictmatrices
+            for (j,k) in lnk.colstart
+                nodeparts[j]=ip
+                while k>0
+                    I[i]=lnk.rowval[k]
+                    J[i]=j
+                    V[i]=lnk.nzval[k]
+                    k=lnk.colptr[k]
+                    i=i+1
+                end
+            end
+            ip=ip+1
+        end
+        return SparseArrays.sparse!(I,J,V,m,n,+)
+    end
+    return cscmatrix
+end
+        
+
+
+"""
+$(SIGNATURES)
+    
+Constructor from SparseMatrixLNKDict.
+
+"""
+function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixLNKDict)::SparseMatrixCSC
+    csc = spzeros(lnk.m, lnk.n)
+    lnk + csc
+end
+
+function SparseArrays.sparse(lnk::SparseMatrixLNKDict)
+    lnk + spzeros(lnk.m, lnk.n)
+end
+
+function Base.copy(S::SparseMatrixLNKDict)
+    SparseMatrixLNKDict(size(S, 1),
+                        size(S, 2),
+                        S.nnz,
+                        S.nentries,
+                        copy(S.colptr),
+                        copy(S.colstart),
+                        copy(S.rowvals),
+                        copy(S.nzval))
+end
diff --git a/test/experimental_rect.jl b/test/ExperimentalParallel.jl
similarity index 100%
rename from test/experimental_rect.jl
rename to test/ExperimentalParallel.jl

From db51b63cedaf918171dd534bd8c0d1de972729a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Sun, 26 May 2024 20:11:24 +0200
Subject: [PATCH 25/44] tests using generic extendable code

---
 .../extendablesparsematrixparallel.jl         |  2 +-
 src/matrix/abstractextendable.jl              |  7 ++-
 test/ExperimentalParallel.jl                  | 58 +++++++++++++------
 ...erimentalDict.jl => ExperimentalScalar.jl} | 19 ++----
 ...rallelDict.jl => ExperimentalXParallel.jl} | 28 ++++-----
 test/runtests.jl                              | 37 +++++++-----
 6 files changed, 89 insertions(+), 62 deletions(-)
 rename test/{ExperimentalDict.jl => ExperimentalScalar.jl} (59%)
 rename test/{ExperimentalParallelDict.jl => ExperimentalXParallel.jl} (80%)

diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl
index 49566b1..2855855 100644
--- a/src/experimental/extendablesparsematrixparallel.jl
+++ b/src/experimental/extendablesparsematrixparallel.jl
@@ -139,7 +139,7 @@ function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x)
     rows = SparseArrays.rowvals(A)
     vals = nonzeros(A)
     
-    r.=zero(Tv)
+    r.=zero(eltype(ext))
     m,n=size(A)
     for icol=1:length(colparts)
         part=colparts[icol]
diff --git a/src/matrix/abstractextendable.jl b/src/matrix/abstractextendable.jl
index 589376c..dae94bb 100644
--- a/src/matrix/abstractextendable.jl
+++ b/src/matrix/abstractextendable.jl
@@ -1,9 +1,10 @@
 """
 
-Must implement:
-sparse
-Constructor from SparseMatrixCSC
+Subtypes must implement:
+- SparseArrays.sparse (may be should be sparse! ?) flush+return SparseMatrixCSC
+- Constructor from SparseMatrixCSC
 rawupdateindex!
+reset!: empty all internals, just keep size 
 """
 
 abstract type AbstractExtendableSparseMatrix{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end
diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallel.jl
index 367e1ac..4800281 100644
--- a/test/ExperimentalParallel.jl
+++ b/test/ExperimentalParallel.jl
@@ -1,7 +1,9 @@
+module ExperimentalParallel
+
 using ExtendableSparse,SparseArrays
 using ExtendableSparse.Experimental
-using DocStringExtensions
 using BenchmarkTools
+using OhMyThreads: @tasks
 using Test
 
 
@@ -48,40 +50,60 @@ function test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)
 end
 
 
-function speedup_build_ESMP(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10])
+function speedup_build(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10])
     m = n
     lindexes = LinearIndices((1:n,1:m))
     X = collect(1:n) #LinRange(0,1,n)
     Y = collect(1:n) #LinRange(0,1,m)
 
-
-    ExtendableSparse.with_locking!(false)
-    A = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m)
-    t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A))
-    ExtendableSparse.with_locking!(true)
-
     mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti)
+
+    A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m)
+    t0=@belapsed assemble_ESMP($A0, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) seconds=1 setup=(reset!($A0))
+    
     result=[]
 
     for nt in allnp
         A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false)
         t=@belapsed assemble_ESMP($A, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) setup=(ExtendableSparse.reset!($A)) seconds=1
+        @assert A.cscmatrix≈A0.cscmatrix
         push!(result,(nt,round(t0/t,digits=2)))
     end
 
-    # #update
-    # times_update = zeros(k)
-    # for i=1:k
-    #     times_update[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=true)
-    # end
+    result
+    
+end
+
+
+function speedup_update(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10])
+    m = n
+    lindexes = LinearIndices((1:n,1:m))
+    X = collect(1:n) #LinRange(0,1,n)
+    Y = collect(1:n) #LinRange(0,1,m)
+
+    mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti)
+
+    A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m)
+    assemble_ESMP(A0, n-1, m-1, mat_cell_node, X, Y)
+    t0=@belapsed assemble_ESMP($A0, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) seconds=1 setup=(nonzeros($A0.cscmatrix).=0)
+    
+
+
+    result=[]
+
+    for nt in allnp
+        A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false)
+        assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false)
+        t=@belapsed assemble_ESMP($A, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) setup=(nonzeros($A.cscmatrix).=0) seconds=1
+        @assert A.cscmatrix≈A0.cscmatrix
+        push!(result,(nt,round(t0/t,digits=2)))
+    end
 
-    # @info "TIMES:  MIN,  AVG,  MAX"
-    # info_minmax(times_build, "build ")
-    # info_minmax(times_update, "update")
     result
     
 end
 
+
 """
 `generate_rectangle_grid(lindexes, Ti)`
 
@@ -126,7 +148,7 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell
     end
 
     for level=1:A.depth
-        Threads.@threads for tid=1:A.nt
+        @tasks for tid=1:A.nt
             for cell in A.cellsforpart[(level-1)*A.nt+tid]
                 assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, tid)
             end
@@ -147,6 +169,7 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell
     end 
 end
 
+
 function assembleedge!(A::ExtendableSparseMatrixParallel{Tv, Ti},v,k,l,tid) where {Tv, Ti <: Integer}
     addtoentry!(A, k, k, tid, +v)
     addtoentry!(A, k, l, tid, -v)
@@ -222,3 +245,4 @@ function assemblecell!(A::ExtendableSparseMatrix{Tv, Ti},n,m,mcn,X,Y,d,cell) whe
     A[ij10,ij10]+=v*d
     A[ij11,ij11]+=v*d
 end
+end
diff --git a/test/ExperimentalDict.jl b/test/ExperimentalScalar.jl
similarity index 59%
rename from test/ExperimentalDict.jl
rename to test/ExperimentalScalar.jl
index 7bb8211..58de771 100644
--- a/test/ExperimentalDict.jl
+++ b/test/ExperimentalScalar.jl
@@ -1,35 +1,28 @@
-module ExperimentalDict
-
+module ExperimentalScalar
 using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
 using BenchmarkTools
 using Test
 
 
-function ExtendableSparse.reset!(A::ExtendableSparseMatrix)
-    A.cscmatrix=spzeros(size(A)...)
-    A.lnkmatrix=nothing
-end
-
-
-function test_correctness_build(N)
+function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix})
     X=1:N
     Y=1:N
     A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2)
-    A=ExtendableSparseMatrixDict{Float64,Int}(N^2,N^2)
+    A=Tm{Float64,Int}(N^2,N^2)
     partassemble!(A0,X,Y)
     partassemble!(A,X,Y)
     @test sparse(A0)≈sparse(A)
 end
 
-function speed_build(N)
+function speed_build(N,Tm::Type{<:AbstractSparseMatrix})
     X=1:N
     Y=1:N
     A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2)
-    A=ExtendableSparseMatrixDict{Float64,Int}(N^2,N^2)
+    A=Tm{Float64,Int}(N^2,N^2)
 
     tlnk= @belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0))
     tdict= @belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A))
-    tdict/tlnk
+    tlnk/tdict
 end
 
 end
diff --git a/test/ExperimentalParallelDict.jl b/test/ExperimentalXParallel.jl
similarity index 80%
rename from test/ExperimentalParallelDict.jl
rename to test/ExperimentalXParallel.jl
index 6d817a0..c6b0122 100644
--- a/test/ExperimentalParallelDict.jl
+++ b/test/ExperimentalXParallel.jl
@@ -1,14 +1,14 @@
-module ExperimentalParallelDict
+module ExperimentalXParallel
 
 using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
 using BenchmarkTools
 using Test
 
 
-function test_correctness_update(N)
+function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix})
     X=1:N
     Y=1:N
-    A=ExtendableSparseMatrixParallelDict{Float64,Int}(N^2,N^2,1)
+    A=Tm{Float64,Int}(N^2,N^2,1)
     allnp=[4,5,6,7,8]
 
     # Assembele without partitioning
@@ -32,7 +32,7 @@ end
 Test correctness of parallel assembly on NxN grid  during 
 build phase, assuming that no structure has been assembled.
 """
-function test_correctness_build(N)
+function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix})
     X=1:N
     Y=1:N
     allnp=[4,5,6,7,8]
@@ -43,27 +43,27 @@ function test_correctness_build(N)
     for np in allnp
         # Make a new matrix and assemble parallel.
         # this should result in the same nonzeros
-        A=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
+        A=Tm(N^2,N^2,1)
         partassemble!(A,X,Y, np)
         @test nonzeros(A)≈nz
     end
 end
 
-function test_correctness_mul(N;     allnp=[4,5,6,7,8])
+function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix};     allnp=[4,5,6,7,8])
     X=1:N
     Y=1:N
     A0=ExtendableSparseMatrix(N^2,N^2)
     partassemble!(A0,X,Y)
 
     for np in allnp
-        A=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
+        A=Tm(N^2,N^2,1)
         partassemble!(A,X,Y,np)
         b=rand(N^2)
         @test A*b ≈ A0*b
     end    
 end
 
-function speedup_update(N; allnp=[4,5,6,7,8,9,10])
+function speedup_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10])
     X=1:N
     Y=1:N
     A=ExtendableSparseMatrix(N^2,N^2)
@@ -73,7 +73,7 @@ function speedup_update(N; allnp=[4,5,6,7,8,9,10])
     # During setup, set matrix entries to zero while keeping  the structure 
     t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(nonzeros($A).=0)
     result=[]
-    A=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
+    A=Tm(N^2,N^2,1)
     for np in allnp
         # Get the parallel timing
         # During setup, set matrix entries to zero while keeping  the structure
@@ -85,11 +85,11 @@ function speedup_update(N; allnp=[4,5,6,7,8,9,10])
     result
 end
 
-function speedup_build(N; allnp=[4,5,6,7,8,9,10])
+function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10])
     X=1:N
     Y=1:N
-    A0=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
-    A=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
+    A0=ExtendableSparseMatrix(N^2,N^2)
+    A=Tm(N^2,N^2,1)
     partassemble!(A0,X,Y)
     nz=copy(nonzeros(A0))
     reset!(A0)
@@ -117,7 +117,7 @@ function speedup_build(N; allnp=[4,5,6,7,8,9,10])
     result
 end
 
-function speedup_mul(N; allnp=[4,5,6,7,8,9,10])
+function speedup_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10])
     X=1:N
     Y=1:N
     
@@ -128,7 +128,7 @@ function speedup_mul(N; allnp=[4,5,6,7,8,9,10])
     
     result=[]
     for np in allnp
-        A=ExtendableSparseMatrixParallelDict(N^2,N^2,1)
+        A=Tm(N^2,N^2,1)
         partassemble!(A,X,Y,np)
         t=@belapsed $A*$b seconds=1
         push!(result,(np,round(t0/t,digits=2)))
diff --git a/test/runtests.jl b/test/runtests.jl
index e7dc9d7..4be9f0f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,6 +2,7 @@ using Test
 using LinearAlgebra
 using SparseArrays
 using ExtendableSparse
+using ExtendableSparse.Experimental
 using Printf
 using BenchmarkTools
 
@@ -12,27 +13,35 @@ using ForwardDiff
 @testset "ExperimentalParallelLocking" begin
     include("ExperimentalParallelLocking.jl")
     @testset "update correctness" begin
-        ExperimentalParallelLocking.test_correctness_update(50)
-        ExperimentalParallelLocking.test_correctness_update(100)
-        ExperimentalParallelLocking.test_correctness_update(rand(30:200))
+        for N in [100,rand(30:200),500]
+            ExperimentalParallelLocking.test_correctness_update(N)
+        end
     end
     
     @testset "build correctness" begin
-        ExperimentalParallelLocking.test_correctness_build(50)
-        ExperimentalParallelLocking.test_correctness_build(100)
-        ExperimentalParallelLocking.test_correctness_build(rand(30:200))
+        for N in [100,rand(30:200),500]
+            ExperimentalParallelLocking.test_correctness_build(N)
+        end
     end
 end
-@testset "ExperimentalDict" begin
-    include("ExperimentalDict.jl")
-    ExperimentalDict.test_correctness_build(100)
+@testset "ExperimentalScalar" begin
+    include("ExperimentalScalar.jl")
+    for Tm in [ExtendableSparseMatrixLNK,ExtendableSparseMatrixDict,ExtendableSparseMatrixLNKDict]
+        for N in [100,rand(30:200),500]
+            ExperimentalScalar.test_correctness_build(N,Tm)
+        end
+    end
 end
 
-@testset "ExperimentalParallelDict" begin
-    include("ExperimentalParallelDict.jl")
-    ExperimentalParallelDict.test_correctness_update(200)
-    ExperimentalParallelDict.test_correctness_build(200)
-    ExperimentalParallelDict.test_correctness_mul(200)
+@testset "ExperimentalXParallel" begin
+    include("ExperimentalXParallel.jl")
+    for Tm in [ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict]
+        for N in [100,rand(30:200),500]
+            ExperimentalXParallel.test_correctness_update(N,Tm)
+            ExperimentalXParallel.test_correctness_build(N,Tm)
+            ExperimentalXParallel.test_correctness_mul(N,Tm)
+        end
+    end
 end
 
 @testset "Constructors" begin include("test_constructors.jl") end

From adf95be89a9aa35639e3f00519fa5762338a92e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 27 May 2024 15:52:05 +0200
Subject: [PATCH 26/44] Introduce AbstractSparseMatrixExtension

- Remove parallel locking tests
- Test ExperimentalSparseParallel
---
 src/ExtendableSparse.jl                       |   1 +
 src/experimental/Experimental.jl              |  14 +-
 .../extendablesparsematrixparallel.jl         |   4 +-
 .../extendablesparsematrixscalar.jl           |  10 +-
 src/experimental/parallel_testtools.jl        |   4 +-
 src/experimental/sparsematrixdict.jl          |   4 +-
 src/experimental/sparsematrixlnkdict.jl       |  27 +-
 src/experimental/sparsematrixlnkx.jl          | 441 ++++++++++++++++++
 src/matrix/abstractextension.jl               |  28 ++
 src/matrix/extendable.jl                      |  72 +--
 src/matrix/sparsematrixlnk.jl                 |   2 +-
 test/ExperimentalParallel.jl                  |  22 +-
 test/ExperimentalParallelLocking.jl           | 128 -----
 test/ExperimentalScalar.jl                    |   6 +-
 test/ExperimentalXParallel.jl                 |   3 +-
 test/Project.toml                             |   1 +
 test/runtests.jl                              |  25 +-
 17 files changed, 553 insertions(+), 239 deletions(-)
 create mode 100644 src/experimental/sparsematrixlnkx.jl
 create mode 100644 src/matrix/abstractextension.jl
 delete mode 100644 test/ExperimentalParallelLocking.jl

diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 8ab64ba..39a8d19 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -22,6 +22,7 @@ using DocStringExtensions
 import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros
 
 include("matrix/sparsematrixcsc.jl")
+include("matrix/abstractextension.jl")
 include("matrix/sparsematrixlnk.jl")
 include("matrix/abstractextendable.jl")
 include("matrix/extendable.jl")
diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
index 37a34d1..162ed2f 100644
--- a/src/experimental/Experimental.jl
+++ b/src/experimental/Experimental.jl
@@ -4,7 +4,8 @@ using LinearAlgebra
 using SparseArrays: AbstractSparseMatrixCSC
 import SparseArrays: nonzeros, getcolptr,nzrange
 import ExtendableSparse: flush!, reset!, rawupdateindex!, findindex
-using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash, AbstractExtendableSparseMatrix
+using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash
+using ExtendableSparse:  AbstractExtendableSparseMatrix, AbstractSparseMatrixExtension
 using DocStringExtensions
 using Metis
 using Base.Threads
@@ -41,6 +42,9 @@ export     reorderlinsys, nnz_noflush
 include("sparsematrixdict.jl")
 export SparseMatrixDict
 
+include("sparsematrixlnkx.jl")
+export SparseMatrixLNKX
+
 include("sparsematrixlnkdict.jl")
 export SparseMatrixLNKDict
 
@@ -54,6 +58,9 @@ export ExtendableSparseMatrixDict
 const ExtendableSparseMatrixLNKDict{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNKDict{Tv,Ti},Tv,Ti}
 export ExtendableSparseMatrixLNKDict
 
+const ExtendableSparseMatrixLNKX{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNKX{Tv,Ti},Tv,Ti}
+export ExtendableSparseMatrixLNKX
+
 const ExtendableSparseMatrixLNK{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNK{Tv,Ti},Tv,Ti}
 export ExtendableSparseMatrixLNK
 
@@ -63,10 +70,13 @@ const ExtendableSparseMatrixParallelDict{Tv,Ti}=ExtendableSparseMatrixXParallel{
 ExtendableSparseMatrixParallelDict(m,n,p)= ExtendableSparseMatrixParallelDict{Float64,Int64}(m,n,p)
 export ExtendableSparseMatrixParallelDict, partcolors!
 
+const ExtendableSparseMatrixParallelLNKX{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixLNKX{Tv,Ti},Tv,Ti}
+ExtendableSparseMatrixParallelLNKX(m,n,p)= ExtendableSparseMatrixParallelLNKX{Float64,Int64}(m,n,p)
+export ExtendableSparseMatrixParallelLNKX
 
 const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixLNKDict{Tv,Ti},Tv,Ti}
 ExtendableSparseMatrixParallelLNKDict(m,n,p)= ExtendableSparseMatrixParallelLNKDict{Float64,Int64}(m,n,p)
-export ExtendableSparseMatrixParallelLNKDict, partcolors!
+export ExtendableSparseMatrixParallelLNKDict
 
 
 include("parallel_testtools.jl")
diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl
index 2855855..2f79985 100644
--- a/src/experimental/extendablesparsematrixparallel.jl
+++ b/src/experimental/extendablesparsematrixparallel.jl
@@ -1,4 +1,4 @@
-mutable struct ExtendableSparseMatrixXParallel{Tm, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
+mutable struct ExtendableSparseMatrixXParallel{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
     """
     Final matrix data
     """
@@ -15,7 +15,7 @@ mutable struct ExtendableSparseMatrixXParallel{Tm, Tv, Ti <: Integer} <: Abstrac
 end
 
 
-function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m,p::Integer) where{Tm, Tv, Ti}
+function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m,p::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti}
     ExtendableSparseMatrixXParallel(spzeros(Tv, Ti, m, n),
                                     [Tm(m,n) for i=1:p],
                                     zeros(Ti,n),
diff --git a/src/experimental/extendablesparsematrixscalar.jl b/src/experimental/extendablesparsematrixscalar.jl
index d7fdc67..887d275 100644
--- a/src/experimental/extendablesparsematrixscalar.jl
+++ b/src/experimental/extendablesparsematrixscalar.jl
@@ -1,4 +1,4 @@
-mutable struct ExtendableSparseMatrixScalar{Tm, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
+mutable struct ExtendableSparseMatrixScalar{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
     """
     Final matrix data
     """
@@ -11,7 +11,7 @@ mutable struct ExtendableSparseMatrixScalar{Tm, Tv, Ti <: Integer} <: AbstractEx
 end
 
 
-function ExtendableSparseMatrixScalar{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm, Tv, Ti<:Integer}
+function ExtendableSparseMatrixScalar{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti<:Integer}
     ExtendableSparseMatrixScalar(spzeros(Tv, Ti, m, n),
                                  Tm(m,n)
                                  )
@@ -27,8 +27,10 @@ end
 
 
 function flush!(ext::ExtendableSparseMatrixScalar{Tm,Tv,Ti}) where{Tm,Tv,Ti}
-    ext.cscmatrix=ext.xmatrix+ext.cscmatrix
-    ext.xmatrix=Tm(size(ext.cscmatrix)...)
+    if nnz(ext.xmatrix)>0
+        ext.cscmatrix=ext.xmatrix+ext.cscmatrix
+        ext.xmatrix=Tm(size(ext.cscmatrix)...)
+    end
     ext
 end
     
diff --git a/src/experimental/parallel_testtools.jl b/src/experimental/parallel_testtools.jl
index fd261cb..16a67c7 100644
--- a/src/experimental/parallel_testtools.jl
+++ b/src/experimental/parallel_testtools.jl
@@ -192,7 +192,7 @@ function partassemble!(A,X,Y,nt=1;d=0.1)
 end
 
 
-function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict},X,Y,nt=1;d=0.1, reset=true)
+function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict,ExtendableSparseMatrixParallelLNKX},X,Y,nt=1;d=0.1, reset=true)
     Nx=length(X)
     Ny=length(Y)
     size(A,1)==Nx*Ny || error("incompatible size of A")
@@ -201,7 +201,7 @@ function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSpa
     lindexes=LinearIndices((1:Nx,1:Ny))
     if nt==1
         reset!(A,1)
-	assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1)
+        assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1)
     else
         p,pc=colpart2d(X,Y,nt)
         if reset
diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl
index 666aa10..0ffe8aa 100644
--- a/src/experimental/sparsematrixdict.jl
+++ b/src/experimental/sparsematrixdict.jl
@@ -3,7 +3,7 @@
 
 Sparse matrix where entries are organized as dictionary.
 """
-mutable struct SparseMatrixDict{Tv,Ti} <: AbstractSparseMatrix{Tv,Ti}
+mutable struct SparseMatrixDict{Tv,Ti} <: AbstractSparseMatrixExtension{Tv,Ti}
     m::Ti
     n::Ti
     values::Dict{Pair{Ti,Ti}, Tv}
@@ -29,6 +29,8 @@ end
 
 Base.size(m::SparseMatrixDict)=(m.m,m.n)
 
+SparseArrays.nnz(m::SparseMatrixDict)=length(m.values)
+
 function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} 
     l=length(m.values)
     I=Vector{Ti}(undef,l)
diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl
index 07f25dc..c096c69 100644
--- a/src/experimental/sparsematrixlnkdict.jl
+++ b/src/experimental/sparsematrixlnkdict.jl
@@ -4,7 +4,7 @@
 Modification of SparseMatrixLNK where the pointer to first index of
 column j is stored in a dictionary.
 """
-mutable struct SparseMatrixLNKDict{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti}
+mutable struct SparseMatrixLNKDict{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti}
     """
     Number of rows
     """
@@ -240,15 +240,6 @@ Return number of nonzero entries.
 """
 SparseArrays.nnz(lnk::SparseMatrixLNKDict) = lnk.nnz
 
-"""
-$(SIGNATURES)
-
-Dummy flush! method for SparseMatrixLNKDict. Just
-used in test methods
-"""
-function flush!(lnk::SparseMatrixLNKDict{Tv, Ti}) where {Tv, Ti}
-    return lnk
-end
 
 """
     $(SIGNATURES)
@@ -262,13 +253,15 @@ function add_via_COO(lnk::SparseMatrixLNKDict{Tv, Ti},
     J=Vector{Ti}(undef,l)
     V=Vector{Tv}(undef,l)
     i=1
-    for icsc=1:length(colptr)-1
-        for j=colptr[icsc]:colptr[icsc+1]-1
-            I[i]=icsc
-            J[i]=rowval[j]
-            V[i]=nzval[j]
-            i=i+1
-        end            
+    if nnz(csc)>0
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
     end
     for (j,k) in lnk.colstart
         while k>0
diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl
new file mode 100644
index 0000000..6646e12
--- /dev/null
+++ b/src/experimental/sparsematrixlnkx.jl
@@ -0,0 +1,441 @@
+"""
+    $(TYPEDEF)
+
+Modification of SparseMatrixLNK where the pointer to first index of
+column j is stored in a dictionary.
+"""
+mutable struct SparseMatrixLNKX{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti}
+    """
+    Number of rows
+    """
+    m::Ti
+
+    """
+    Number of columns
+    """
+    n::Ti
+
+    """
+    Number of nonzeros
+    """
+    nnz::Ti
+
+    """
+    Length of arrays
+    """
+    nentries::Ti
+
+    """
+    Linked list of column entries. Initial length is n,
+    it grows with each new entry.
+
+    colptr[index] contains the next
+    index in the list or zero, in the later case terminating the list which
+    starts at index 1<=j<=n for each column j.
+    """
+    colptr::Vector{Ti}
+
+    """
+    Start indices of columns
+    """
+    colstart::Vector{Ti}
+
+    """
+    Row numbers. For each index it contains the zero (initial state)
+    or the row numbers corresponding to the column entry list in colptr.
+    """
+    rowval::Vector{Ti}
+
+    """
+    Nonzero entry values correspondin to each pair
+    (colptr[index],rowval[index])
+    """
+    nzval::Vector{Tv}
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+function SparseMatrixLNKX{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
+    SparseMatrixLNKX{Tv, Ti}(m, n, 0, 0,  zeros(Ti,10), zeros(Ti,n), zeros(Ti,10), zeros(Ti,10))
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+function SparseMatrixLNKX(valuetype::Type{Tv}, indextype::Type{Ti}, m,
+                         n) where {Tv, Ti <: Integer}
+    SparseMatrixLNKX{Tv, Ti}(m, n)
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+SparseMatrixLNKX(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixLNKX(Tv, Int, m, n)
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+SparseMatrixLNKX(m, n) = SparseMatrixLNKX(Float64, m, n)
+
+
+function findindex(lnk::SparseMatrixLNKX, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    k =lnk.colstart[j]
+    if k==0
+        return 0,0
+    end
+    k0 = k
+    while k > 0
+        if lnk.rowval[k] == i
+            return k, 0
+        end
+        k0 = k
+        k = lnk.colptr[k]
+    end
+    return 0, k0
+end
+
+"""
+$(SIGNATURES)
+    
+Return value stored for entry or zero if not found
+"""
+function Base.getindex(lnk::SparseMatrixLNKX{Tv, Ti}, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k == 0
+        return zero(Tv)
+    else
+        return lnk.nzval[k]
+    end
+end
+
+function addentry!(lnk::SparseMatrixLNKX, i, j, k, k0)
+    # increase number of entries
+    lnk.nentries += 1
+    if length(lnk.nzval) < lnk.nentries
+        newsize = Int(ceil(5.0 * lnk.nentries / 4.0))
+        resize!(lnk.nzval, newsize)
+        resize!(lnk.rowval, newsize)
+        resize!(lnk.colptr, newsize)
+    end
+    
+    if k0==0
+        lnk.colstart[j]=lnk.nentries
+    end
+
+    # Append entry if not found
+    lnk.rowval[lnk.nentries] = i
+
+    # Shift the end of the list
+    lnk.colptr[lnk.nentries] = 0
+
+    if k0>0
+        lnk.colptr[k0] = lnk.nentries
+    end
+    
+    # Update number of nonzero entries
+    lnk.nnz += 1
+    return lnk.nentries
+end
+
+"""
+$(SIGNATURES)
+    
+Update value of existing entry, otherwise extend matrix if v is nonzero.
+"""
+function Base.setindex!(lnk::SparseMatrixLNKX, v, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = v
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = v
+    end
+    return lnk
+end
+
+"""
+$(SIGNATURES)
+
+Update element of the matrix  with operation `op`. 
+It assumes that `op(0,0)==0`. If `v` is zero, no new 
+entry is created.
+"""
+function updateindex!(lnk::SparseMatrixLNKX{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+"""
+$(SIGNATURES)
+
+Update element of the matrix  with operation `op`. 
+It assumes that `op(0,0)==0`. If `v` is zero a new entry
+is created nevertheless.
+"""
+function rawupdateindex!(lnk::SparseMatrixLNKX{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+    else
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+"""
+$(SIGNATURES)
+
+Return tuple containing size of the matrix.
+"""
+Base.size(lnk::SparseMatrixLNKX) = (lnk.m, lnk.n)
+
+"""
+$(SIGNATURES)
+
+Return number of nonzero entries.
+"""
+SparseArrays.nnz(lnk::SparseMatrixLNKX) = lnk.nnz
+
+"""
+$(SIGNATURES)
+
+Dummy flush! method for SparseMatrixLNKX. Just
+used in test methods
+"""
+function flush!(lnk::SparseMatrixLNKX{Tv, Ti}) where {Tv, Ti}
+    return lnk
+end
+
+"""
+    $(SIGNATURES)
+Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V.
+"""
+function add_via_COO(lnk::SparseMatrixLNKX{Tv, Ti},
+                     csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
+    (;colptr,nzval,rowval,m,n)=csc
+    l=nnz(lnk)+nnz(csc)
+    I=Vector{Ti}(undef,l)
+    J=Vector{Ti}(undef,l)
+    V=Vector{Tv}(undef,l)
+    i=1
+    if nnz(csc)>0
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+    end
+    for j=1:n
+        k=lnk.colstart[j]
+        while k>0
+            I[i]=lnk.rowval[k]
+            J[i]=j
+            V[i]=lnk.nzval[k]
+            k=lnk.colptr[k]
+            i=i+1
+        end
+    end
+    return SparseArrays.sparse!(I,J,V,m,n,+)
+end
+
+
+"""
+    $(SIGNATURES)
+Add lnk and csc without creation of intermediate data.
+"""
+function add_directly(lnk::SparseMatrixLNKX{Tv, Ti},
+                      csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
+    @assert(csc.m==lnk.m)
+    @assert(csc.n==lnk.n)
+
+    # overallocate arrays in order to avoid
+    # presumably slower push!
+    xnnz = nnz(csc) + nnz(lnk)
+    colptr = Vector{Ti}(undef, csc.n + 1)
+    rowval = Vector{Ti}(undef, xnnz)
+    nzval = Vector{Tv}(undef, xnnz)
+
+    # Detect the maximum column length of lnk
+    lnk_maxcol = 0
+    for j=1:lnk.n
+        k=lnk.colstart[j]
+        lcol = zero(Ti)
+        while k > 0
+            lcol += 1
+            k = lnk.colptr[k]
+        end
+        lnk_maxcol = max(lcol, lnk_maxcol)
+    end
+
+    # pre-allocate column  data
+    col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol]
+
+    inz = 1 # counts the nonzero entries in the new matrix
+
+    in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1])
+
+    in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col)
+
+    # loop over all columns
+    for j = 1:(csc.n)
+        # Copy extension entries into col and sort them
+        k = lnk.colstart[j]
+        l_lnk_col = 0
+        while k > 0
+            if lnk.rowval[k] > 0
+                l_lnk_col += 1
+                col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k])
+            end
+            k = lnk.colptr[k]
+        end
+        sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward)
+
+        # jointly sort lnk and csc entries  into new matrix data
+        # this could be replaced in a more transparent manner by joint sorting:
+        # make a joint array for csc and lnk col, sort them.
+        # Will this be faster? 
+
+        colptr[j] = inz
+        jlnk = one(Ti) # counts the entries in col
+        jcsc = csc.colptr[j]  # counts entries in csc
+
+        while true
+            if in_csc_col(jcsc, j) &&
+               (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval ||
+                !in_lnk_col(jlnk, l_lnk_col))
+                # Insert entries from csc into new structure
+                rowval[inz] = csc.rowval[jcsc]
+                nzval[inz] = csc.nzval[jcsc]
+                jcsc += 1
+                inz += 1
+            elseif in_csc_col(jcsc, j) &&
+                   (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval)
+                # Add up entries from csc and lnk
+                rowval[inz] = csc.rowval[jcsc]
+                nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval
+                jcsc += 1
+                inz += 1
+                jlnk += 1
+            elseif in_lnk_col(jlnk, l_lnk_col)
+                # Insert entries from lnk res. col into new structure
+                rowval[inz] = col[jlnk].rowval
+                nzval[inz] = col[jlnk].nzval
+                jlnk += 1
+                inz += 1
+            else
+                break
+            end
+        end
+    end
+    colptr[csc.n + 1] = inz
+    resize!(rowval, inz - 1)
+    resize!(nzval, inz - 1)
+    SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval)
+end
+
+
+
+"""
+    $(SIGNATURES)
+
+Add SparseMatrixCSC matrix and [`SparseMatrixLNKX`](@ref)  lnk, returning a SparseMatrixCSC
+"""
+Base.:+(lnk::SparseMatrixLNKX, csc::SparseMatrixCSC) = add_directly(lnk, csc)
+
+function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+    lnew=sum(nnz,lnkdictmatrices)
+    if lnew>0
+        (;colptr,nzval,rowval,m,n)=cscmatrix
+        l=lnew+nnz(cscmatrix)
+        I=Vector{Ti}(undef,l)
+        J=Vector{Ti}(undef,l)
+        V=Vector{Tv}(undef,l)
+        i=1
+        
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+
+        ip=1
+        for lnk in lnkdictmatrices
+            for j=1:n
+                k=lnk.colstart[j]
+                nodeparts[j]=ip
+                while k>0
+                    I[i]=lnk.rowval[k]
+                    J[i]=j
+                    V[i]=lnk.nzval[k]
+                    k=lnk.colptr[k]
+                    i=i+1
+                end
+            end
+            ip=ip+1
+        end
+        return SparseArrays.sparse!(I,J,V,m,n,+)
+    end
+    return cscmatrix
+end
+        
+
+
+"""
+$(SIGNATURES)
+    
+Constructor from SparseMatrixLNKX.
+
+"""
+function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixLNKX)::SparseMatrixCSC
+    csc = spzeros(lnk.m, lnk.n)
+    lnk + csc
+end
+
+function SparseArrays.sparse(lnk::SparseMatrixLNKX)
+    lnk + spzeros(lnk.m, lnk.n)
+end
+
+function Base.copy(S::SparseMatrixLNKX)
+    SparseMatrixLNKX(size(S, 1),
+                        size(S, 2),
+                        S.nnz,
+                        S.nentries,
+                        copy(S.colptr),
+                        copy(S.colstart),
+                        copy(S.rowvals),
+                        copy(S.nzval))
+end
diff --git a/src/matrix/abstractextension.jl b/src/matrix/abstractextension.jl
new file mode 100644
index 0000000..378e54a
--- /dev/null
+++ b/src/matrix/abstractextension.jl
@@ -0,0 +1,28 @@
+"""
+    $(TYPEDEF)
+
+Abstract type for sparse matrix extension.
+
+Subtypes T_ext must implement:
+
+
+Constructor T_ext(m,n)
+SparseArrays.nnz(ext::T_ext)
+Base.size(ext::T_ext)
+
+Base.+(ext::T_ext, csc)
+  - Add extension matrix and csc matrix, return csc matrix
+
+sum!(nodeparts::Vector{Ti},extmatrices::Vector{T_ext}, cscmatrix)
+  - Add csc matrix and extension matrices (one per partition) and return csc matrix
+  - Fill nodeparts (already initialized at input) with information which partition was used to assemble node.
+    i.e. if entry [i,j] comes from extmatrixes[p], set nodeparts[j]=p .
+
+    This information may be used by matrix-vector multiplication and preconditioners
+
+rawupdateindex!(ext::Text, op, v, i, j) where {Tv, Ti}
+  - Set ext[i,j]+=v, possibly insert entry into matrix.
+
+
+"""
+abstract type AbstractSparseMatrixExtension{Tv, Ti} <: AbstractSparseMatrix{Tv,Ti} end
diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl
index 572227e..2d8a908 100644
--- a/src/matrix/extendable.jl
+++ b/src/matrix/extendable.jl
@@ -17,8 +17,6 @@ mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractExtendableSp
     Linked list structure holding data of extension
     """
     lnkmatrix::Union{SparseMatrixLNK{Tv, Ti}, Nothing}
-
-    lock::Base.ReentrantLock
     
     """
     Pattern hash
@@ -26,32 +24,6 @@ mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractExtendableSp
     phash::UInt64
 end
 
-mutable struct Locking
-    locking::Bool
-end
-
-#
-# Locking functionality just for developing parallelization.
-# To be removed before merging into main branch.
-#
-const locking=Locking(false)
-
-function with_locking!(l::Bool)
-    global locking
-    locking.locking=l
-end
-
-function with_locking()
-    global locking
-    locking.locking
-end
-
-mylock(x)=with_locking() ? Base.lock(x) : nothing
-myunlock(x)=with_locking() ? Base.unlock(x) : nothing
-
-
-#mylock(x)=nothing
-#myunlock(x)=nothing
 
 """
 ```
@@ -65,7 +37,7 @@ Create empty ExtendableSparseMatrix. This is equivalent to `spzeros(m,n)` for
 """
 
 function ExtendableSparseMatrix{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
-    ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing,Base.ReentrantLock(), 0)
+    ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0)
 end
 
 function ExtendableSparseMatrix(valuetype::Type{Tv},
@@ -87,11 +59,11 @@ $(SIGNATURES)
 Create ExtendableSparseMatrix from SparseMatrixCSC
 """
 function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
-    ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc))
+    ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc))
 end
 
 function ExtendableSparseMatrix{Tv,Ti}(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
-    ExtendableSparseMatrix{Tv, Ti}(csc, nothing, Base.ReentrantLock(), phash(csc))
+    ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc))
 end
 
 """
@@ -193,15 +165,10 @@ function updateindex!(ext::ExtendableSparseMatrix{Tv, Ti},
     if k > 0
         ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
     else
-        mylock(ext.lock)
-        try
-            if ext.lnkmatrix == nothing
-                ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
-            end
-            updateindex!(ext.lnkmatrix, op, v, i, j)
-        finally
-            myunlock(ext.lock)
+        if ext.lnkmatrix == nothing
+            ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
         end
+        updateindex!(ext.lnkmatrix, op, v, i, j)
     end
     ext
 end
@@ -220,15 +187,10 @@ function rawupdateindex!(ext::ExtendableSparseMatrix{Tv, Ti},
     if k > 0
         ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
     else
-        mylock(ext.lock)
-        try
             if ext.lnkmatrix == nothing
                 ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
             end
             rawupdateindex!(ext.lnkmatrix, op, v, i, j)
-        finally
-            myunlock(ext.lock)
-        end
     end
     ext
 end
@@ -247,15 +209,10 @@ function Base.setindex!(ext::ExtendableSparseMatrix{Tv, Ti},
     if k > 0
         ext.cscmatrix.nzval[k] = v
     else
-        mylock(ext.lock)
-        try
-            if ext.lnkmatrix == nothing
-                ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
-            end
-            ext.lnkmatrix[i, j] = v
-        finally
-            myunlock(ext.lock)
+        if ext.lnkmatrix == nothing
+            ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
         end
+        ext.lnkmatrix[i, j] = v
     end
 end
 
@@ -275,12 +232,7 @@ function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti},
         return zero(Tv)
     else
         v=zero(Tv)
-        mylock(ext.lock)
-        try
-            v=ext.lnkmatrix[i, j]
-        finally
-            myunlock(ext.lock)
-        end
+        v=ext.lnkmatrix[i, j]
     end
 end
 
@@ -325,9 +277,9 @@ $(SIGNATURES)
 """
 function Base.copy(S::ExtendableSparseMatrix)
     if isnothing(S.lnkmatrix)
-        ExtendableSparseMatrix(copy(S.cscmatrix), nothing,  Base.ReentrantLock(),S.phash)
+        ExtendableSparseMatrix(copy(S.cscmatrix), nothing,S.phash)
     else
-        ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), Base.ReentrantLock(), S.phash)
+        ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash)
     end
 end
 
diff --git a/src/matrix/sparsematrixlnk.jl b/src/matrix/sparsematrixlnk.jl
index b00c6fc..b69c863 100644
--- a/src/matrix/sparsematrixlnk.jl
+++ b/src/matrix/sparsematrixlnk.jl
@@ -18,7 +18,7 @@ can be conveniently updated via `push!`.  No copying of existing data is necessa
 
 $(TYPEDFIELDS)
 """
-mutable struct SparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti}
+mutable struct SparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti}
     """
     Number of rows
     """
diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallel.jl
index 4800281..936f566 100644
--- a/test/ExperimentalParallel.jl
+++ b/test/ExperimentalParallel.jl
@@ -50,6 +50,25 @@ function test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)
 end
 
 
+function test_correctness_build(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10])
+    m = n
+    lindexes = LinearIndices((1:n,1:m))
+    X = collect(1:n) #LinRange(0,1,n)
+    Y = collect(1:n) #LinRange(0,1,m)
+    
+    mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti)
+    
+    A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m)
+    assemble_ESMP(A0, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false)
+
+    for nt in allnp
+        A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false)
+        assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false)
+        @assert A.cscmatrix≈A0.cscmatrix
+    end
+end
+
+
 function speedup_build(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10])
     m = n
     lindexes = LinearIndices((1:n,1:m))
@@ -158,7 +177,6 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell
     for cell in A.cellsforpart[A.depth*A.nt+1]
         assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, 1)
     end
-
     nnzCSC, nnzLNK = nnz_noflush(A)
     if nnzCSC > 0 && nnzLNK > 0	
         flush!(A; do_dense=false)
@@ -166,7 +184,7 @@ function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell
     elseif nnzCSC == 0 && nnzLNK > 0
         flush!(A; do_dense=true)
         #dense flush
-    end 
+    end
 end
 
 
diff --git a/test/ExperimentalParallelLocking.jl b/test/ExperimentalParallelLocking.jl
deleted file mode 100644
index 4ad7ff2..0000000
--- a/test/ExperimentalParallelLocking.jl
+++ /dev/null
@@ -1,128 +0,0 @@
-module ExperimentalParallelLocking
-
-using ExtendableSparse,SparseArrays
-using ExtendableSparse: with_locking!
-using ExtendableSparse.Experimental
-using BenchmarkTools
-using Test
-
-"""
-    test_correctness_update(N)
-
-Test correctness of parallel assembly on NxN grid  during 
-update phase, assuming that the structure has been assembled.
-"""
-function test_correctness_update(N)
-    with_locking!(true)
-    X=1:N
-    Y=1:N
-    A=ExtendableSparseMatrix(N^2,N^2)
-    allnp=[4,5,6,7,8]
-
-    # Assembele without partitioning
-    # this gives the "base truth" to compare with
-    partassemble!(A,X,Y)
-
-    # Save the nonzeros 
-    nz=copy(nonzeros(A))
-    for np in allnp
-        # Reset the nonzeros, keeping the structure intact
-        nonzeros(A).=0
-        # Parallel assembly whith np threads
-        partassemble!(A,X,Y, np)
-        @test nonzeros(A)≈nz
-    end
-    with_locking!(false)
-end
-
-"""
-    test_correctness_build(N)
-
-Test correctness of parallel assembly on NxN grid  during 
-build phase, assuming that no structure has been assembled.
-"""
-function test_correctness_build(N)
-    with_locking!(true)
-    X=1:N
-    Y=1:N
-    allnp=[4,5,6,7,8]
-    # Get the "ground truth"
-    A=ExtendableSparseMatrix(N^2,N^2)
-    partassemble!(A,X,Y)
-    nz=copy(nonzeros(A))
-    for np in allnp
-        # Make a new matrix and assemble parallel.
-        # this should result in the same nonzeros
-        A=ExtendableSparseMatrix(N^2,N^2)
-        partassemble!(A,X,Y, np)
-        @test nonzeros(A)≈nz
-    end
-    with_locking!(false)
-end
-
-
-
-"""
-    speedup_update(N)
-
-Benchmark parallel speedup of update phase of parallel assembly on NxN grid.
-Check for correctness as well.
-"""
-function speedup_update(N; allnp=[4,5,6,7,8,9,10])
-    with_locking!(true)
-    X=1:N
-    Y=1:N
-    A=ExtendableSparseMatrix(N^2,N^2)
-    partassemble!(A,X,Y)
-    nz=copy(nonzeros(A))
-    # Get the base timing
-    # During setup, set matrix entries to zero while keeping  the structure 
-    t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(nonzeros($A).=0)
-    result=[]
-    for np in allnp
-        # Get the parallel timing
-        # During setup, set matrix entries to zero while keeping  the structure 
-        t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(nonzeros($A).=0)
-        @assert nonzeros(A)≈nz
-        push!(result,(np,round(t0/t,digits=2)))
-    end
-    with_locking!(false)
-    result
-end
-
-
-"""
-    speedup_build(N)
-
-Benchmark parallel speedup of structure build phase of parallel assembly on NxN grid.
-Check for correctness as well.
-
-Works in the moment with locking.
-"""
-function speedup_build(N; allnp=[4,5,6,7,8,9,10])
-    with_locking!(true)
-    X=1:N
-    Y=1:N
-    A=ExtendableSparseMatrix(N^2,N^2)
-    partassemble!(A,X,Y)
-    nz=copy(nonzeros(A))
-    reset!(A)
-    partassemble!(A,X,Y)
-    @assert nonzeros(A)≈(nz)
-    
-    # Get the base timing
-    # During setup, reset matrix to empty state.
-    t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A))
-    
-    result=[]
-    for np in allnp
-        # Get the parallel timing
-        # During setup, reset matrix to empty state.
-        t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(reset!($A))
-        @assert nonzeros(A)≈nz
-        push!(result,(np,round(t0/t,digits=2)))
-    end
-    with_locking!(false)
-    result
-end
-end
diff --git a/test/ExperimentalScalar.jl b/test/ExperimentalScalar.jl
index 58de771..11040cf 100644
--- a/test/ExperimentalScalar.jl
+++ b/test/ExperimentalScalar.jl
@@ -20,9 +20,9 @@ function speed_build(N,Tm::Type{<:AbstractSparseMatrix})
     A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2)
     A=Tm{Float64,Int}(N^2,N^2)
 
-    tlnk= @belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0))
-    tdict= @belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A))
-    tlnk/tdict
+    tbase= @belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0))
+    tx= @belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A))
+    tbase/tx
 end
 
 end
diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl
index c6b0122..e45767a 100644
--- a/test/ExperimentalXParallel.jl
+++ b/test/ExperimentalXParallel.jl
@@ -32,10 +32,9 @@ end
 Test correctness of parallel assembly on NxN grid  during 
 build phase, assuming that no structure has been assembled.
 """
-function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix})
+function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}, allnp=[4,5,6,7,8])
     X=1:N
     Y=1:N
-    allnp=[4,5,6,7,8]
     # Get the "ground truth"
     A=ExtendableSparseMatrix(N^2,N^2)
     partassemble!(A,X,Y)
diff --git a/test/Project.toml b/test/Project.toml
index e195dd2..8aa666d 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -9,6 +9,7 @@ IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
 MultiFloats = "bdf0d083-296b-4888-a5b6-7498122e68a5"
+OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
diff --git a/test/runtests.jl b/test/runtests.jl
index 4be9f0f..244dbb5 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -9,21 +9,6 @@ using BenchmarkTools
 using MultiFloats
 using ForwardDiff
 
-
-@testset "ExperimentalParallelLocking" begin
-    include("ExperimentalParallelLocking.jl")
-    @testset "update correctness" begin
-        for N in [100,rand(30:200),500]
-            ExperimentalParallelLocking.test_correctness_update(N)
-        end
-    end
-    
-    @testset "build correctness" begin
-        for N in [100,rand(30:200),500]
-            ExperimentalParallelLocking.test_correctness_build(N)
-        end
-    end
-end
 @testset "ExperimentalScalar" begin
     include("ExperimentalScalar.jl")
     for Tm in [ExtendableSparseMatrixLNK,ExtendableSparseMatrixDict,ExtendableSparseMatrixLNKDict]
@@ -44,6 +29,16 @@ end
     end
 end
 
+@testset "ExperimentalParallel" begin
+    include("ExperimentalParallel.jl")
+    for d=[1,2,3]
+        for N in [100,rand(30:200),500]
+            ExperimentalParallel.test_correctness_build(N,d)
+        end
+    end
+end
+
+
 @testset "Constructors" begin include("test_constructors.jl") end
 
 @testset "Copy-Methods" begin include("test_copymethods.jl") end

From 4fb318e29420030f0d314a35082a55413a561c56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 27 May 2024 15:58:11 +0200
Subject: [PATCH 27/44] ci: new ci matrix, multithreading

---
 .github/workflows/ci.yml | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2ff5995..882d933 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,15 +15,26 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
+          - '1.9' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
           - '1'   # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
           - 'nightly'
         os:
           - ubuntu-latest
-          - macos-latest
           - windows-latest
+          - macos-latest # arm
+          - macOS-13 # intel
         arch:
           - x64
+          - aarch64
+        exclude:
+          - os: ubuntu-latest
+            arch: aarch64
+          - os: windows-latest
+            arch: aarch64
+          - os: macOS-13 
+            arch: aarch64
+          - os: macos-latest
+            arch: x64
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v1
@@ -42,6 +53,8 @@ jobs:
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
+        env:
+          JULIA_NUM_THREADS: 4
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v3
   docs:

From 99f98517c2887f08046cbeafaa9a094a84204c61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 27 May 2024 16:26:09 +0200
Subject: [PATCH 28/44] fix sparse!, replace Threads.@threads by tasks

---
 src/ExtendableSparse.jl                   |  2 +-
 src/experimental/sparsematrixdict.jl      | 12 ++++++++++--
 src/experimental/sparsematrixlnkdict.jl   |  6 +++++-
 src/experimental/sparsematrixlnkx.jl      | 12 ++++++++++--
 src/factorizations/blockpreconditioner.jl | 10 +++++-----
 5 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 39a8d19..0d7bd5c 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -3,7 +3,7 @@ using SparseArrays,StaticArrays
 using LinearAlgebra
 using Sparspak
 using ILUZero
-
+using OhMyThreads: @tasks
 
 if  !isdefined(Base, :get_extension)
     using Requires
diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl
index 0ffe8aa..9c8cea8 100644
--- a/src/experimental/sparsematrixdict.jl
+++ b/src/experimental/sparsematrixdict.jl
@@ -43,7 +43,11 @@ function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti}
 	V[i]=v
 	i=i+1
     end
-    SparseArrays.sparse!(I,J,V,size(mv[1])...,+)
+    @static if VERSION>=v"1.10"
+        return SparseArrays.sparse!(I,J,V,m,n,+)
+    else
+        return SparseArrays.sparse!(I,J,V,m,n,+)
+    end
 end
 
 function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} 
@@ -70,7 +74,11 @@ function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC
 	    V[i]=v
 	    i=i+1
         end
-        return SparseArrays.sparse!(I,J,V,m,n,+)
+        @static if VERSION>=v"1.10"
+            return SparseArrays.sparse!(I,J,V,m,n,+)
+        else
+            return SparseArrays.sparse!(I,J,V,m,n,+)
+        end
     end
     cscmatrix
 end
diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl
index c096c69..f1f6ab5 100644
--- a/src/experimental/sparsematrixlnkdict.jl
+++ b/src/experimental/sparsematrixlnkdict.jl
@@ -272,7 +272,11 @@ function add_via_COO(lnk::SparseMatrixLNKDict{Tv, Ti},
             i=i+1
         end
     end
-    return SparseArrays.sparse!(I,J,V,m,n,+)
+    @static if VERSION>=v"1.10"
+        return SparseArrays.sparse!(I,J,V,m,n,+)
+    else
+        return SparseArrays.sparse!(I,J,V,m,n,+)
+    end
 end
 
 
diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl
index 6646e12..01fbbd0 100644
--- a/src/experimental/sparsematrixlnkx.jl
+++ b/src/experimental/sparsematrixlnkx.jl
@@ -266,7 +266,11 @@ function add_via_COO(lnk::SparseMatrixLNKX{Tv, Ti},
             i=i+1
         end
     end
-    return SparseArrays.sparse!(I,J,V,m,n,+)
+    @static if VERSION>=v"1.10"
+        return SparseArrays.sparse!(I,J,V,m,n,+)
+    else
+        return SparseArrays.sparse!(I,J,V,m,n,+)
+    end
 end
 
 
@@ -407,7 +411,11 @@ function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscma
             end
             ip=ip+1
         end
-        return SparseArrays.sparse!(I,J,V,m,n,+)
+        @static if VERSION>=v"1.10"
+            return SparseArrays.sparse!(I,J,V,m,n,+)
+        else
+            return SparseArrays.sparse!(I,J,V,m,n,+)
+        end
     end
     return cscmatrix
 end
diff --git a/src/factorizations/blockpreconditioner.jl b/src/factorizations/blockpreconditioner.jl
index 7c97eca..bf0d5de 100644
--- a/src/factorizations/blockpreconditioner.jl
+++ b/src/factorizations/blockpreconditioner.jl
@@ -49,7 +49,7 @@ function update!(precon::BlockPreconditioner)
 
     np=length(precon.partitioning)
     precon.facts=Vector{Any}(undef,np)
-    Threads.@threads for ipart=1:np
+    @tasks for ipart=1:np
         factorization=deepcopy(precon.factorization)
         AP=precon.A[precon.partitioning[ipart],precon.partitioning[ipart]]
         FP=factorization(AP)
@@ -66,11 +66,11 @@ function LinearAlgebra.ldiv!(p::BlockPreconditioner,v)
     np=length(partitioning)
 
     if allow_views(p.factorization)
-        Threads.@threads for ipart=1:np
+        @tasks for ipart=1:np
 	    ldiv!(facts[ipart],view(v,partitioning[ipart]))
         end
     else
-        Threads.@threads for ipart=1:np
+        @tasks for ipart=1:np
             vv=v[partitioning[ipart]]
 	    ldiv!(facts[ipart],vv)
             view(v,partitioning[ipart]).=vv
@@ -85,11 +85,11 @@ function LinearAlgebra.ldiv!(u,p::BlockPreconditioner,v)
     np=length(partitioning)
     
     if allow_views(p.factorization)
-        Threads.@threads for ipart=1:np
+        @tasks for ipart=1:np
 	    ldiv!(view(u,partitioning[ipart]),facts[ipart],view(v,partitioning[ipart]))
         end
     else
-        Threads.@threads for ipart=1:np
+        @tasks for ipart=1:np
             uu=u[partitioning[ipart]]
 	    ldiv!(uu,facts[ipart],v[partitioning[ipart]])
             view(u,partitioning[ipart]).=uu

From a78d8ecbc5fa3ef9f0d1eb7c873693884c619c6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 27 May 2024 16:35:34 +0200
Subject: [PATCH 29/44] fix sparse! calls

---
 src/experimental/sparsematrixdict.jl    | 10 +++++++---
 src/experimental/sparsematrixlnkdict.jl |  8 ++++++--
 src/experimental/sparsematrixlnkx.jl    |  4 ++--
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl
index 9c8cea8..2e58238 100644
--- a/src/experimental/sparsematrixdict.jl
+++ b/src/experimental/sparsematrixdict.jl
@@ -46,7 +46,7 @@ function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti}
     @static if VERSION>=v"1.10"
         return SparseArrays.sparse!(I,J,V,m,n,+)
     else
-        return SparseArrays.sparse!(I,J,V,m,n,+)
+        return SparseArrays.sparse(I,J,V,m,n,+)
     end
 end
 
@@ -77,7 +77,7 @@ function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC
         @static if VERSION>=v"1.10"
             return SparseArrays.sparse!(I,J,V,m,n,+)
         else
-            return SparseArrays.sparse!(I,J,V,m,n,+)
+            return SparseArrays.sparse(I,J,V,m,n,+)
         end
     end
     cscmatrix
@@ -113,7 +113,11 @@ function sum!(nodeparts, dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatri
             end
             ip=ip+1
         end
-        return SparseArrays.sparse!(I,J,V,m,n,+)
+        @static if VERSION>=v"1.10"
+            return SparseArrays.sparse!(I,J,V,m,n,+)
+        else
+            return SparseArrays.sparse(I,J,V,m,n,+)
+        end
     end
     return cscmatrix
 end
diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl
index f1f6ab5..a53df25 100644
--- a/src/experimental/sparsematrixlnkdict.jl
+++ b/src/experimental/sparsematrixlnkdict.jl
@@ -275,7 +275,7 @@ function add_via_COO(lnk::SparseMatrixLNKDict{Tv, Ti},
     @static if VERSION>=v"1.10"
         return SparseArrays.sparse!(I,J,V,m,n,+)
     else
-        return SparseArrays.sparse!(I,J,V,m,n,+)
+        return SparseArrays.sparse(I,J,V,m,n,+)
     end
 end
 
@@ -415,7 +415,11 @@ function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cs
             end
             ip=ip+1
         end
-        return SparseArrays.sparse!(I,J,V,m,n,+)
+        @static if VERSION>=v"1.10"
+            return SparseArrays.sparse!(I,J,V,m,n,+)
+        else
+            return SparseArrays.sparse(I,J,V,m,n,+)
+        end
     end
     return cscmatrix
 end
diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl
index 01fbbd0..f7a322a 100644
--- a/src/experimental/sparsematrixlnkx.jl
+++ b/src/experimental/sparsematrixlnkx.jl
@@ -269,7 +269,7 @@ function add_via_COO(lnk::SparseMatrixLNKX{Tv, Ti},
     @static if VERSION>=v"1.10"
         return SparseArrays.sparse!(I,J,V,m,n,+)
     else
-        return SparseArrays.sparse!(I,J,V,m,n,+)
+        return SparseArrays.sparse(I,J,V,m,n,+)
     end
 end
 
@@ -414,7 +414,7 @@ function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscma
         @static if VERSION>=v"1.10"
             return SparseArrays.sparse!(I,J,V,m,n,+)
         else
-            return SparseArrays.sparse!(I,J,V,m,n,+)
+            return SparseArrays.sparse(I,J,V,m,n,+)
         end
     end
     return cscmatrix

From f183d93d4ce78b15d902d378ea187c4f17ef6fe1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 27 May 2024 18:26:59 +0200
Subject: [PATCH 30/44] fix Base.: dispatch for 1.9

---
 Project.toml                                       | 1 +
 src/experimental/extendablesparsematrixparallel.jl | 8 +++++++-
 test/ExperimentalXParallel.jl                      | 3 ++-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 1d10c82..e1bcecd 100644
--- a/Project.toml
+++ b/Project.toml
@@ -18,6 +18,7 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+TestEnv = "1e6cf692-eddd-4d53-88a5-2d735e33781b"
 
 [weakdeps]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl
index 2f79985..c8125bf 100644
--- a/src/experimental/extendablesparsematrixparallel.jl
+++ b/src/experimental/extendablesparsematrixparallel.jl
@@ -62,7 +62,6 @@ function flush!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}) where{Tm,Tv,Ti}
     np=length(ext.xmatrices)
     (m,n)=size(ext.cscmatrix)
     ext.xmatrices=[Tm(m,n) for i=1:np]
- 
     npts::Vector{Ti}=ext.nodeparts
     pn=zeros(Ti,np)
     for i=1:n
@@ -132,7 +131,14 @@ function rawupdateindex!(ext::ExtendableSparseMatrixXParallel,
     end
 end
 
+
+# Needed in 1.9
+function Base.:*(ext::ExtendableSparse.Experimental.ExtendableSparseMatrixXParallel{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA
+    mul!(similar(x),ext,x)
+end
+
 function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x)
+    flush!(ext)
     A=ext.cscmatrix
     colparts=ext.colparts
     partnodes=ext.partnodes
diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl
index e45767a..d6bb113 100644
--- a/test/ExperimentalXParallel.jl
+++ b/test/ExperimentalXParallel.jl
@@ -53,11 +53,12 @@ function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix};     allnp=[4,5
     Y=1:N
     A0=ExtendableSparseMatrix(N^2,N^2)
     partassemble!(A0,X,Y)
-
     for np in allnp
         A=Tm(N^2,N^2,1)
         partassemble!(A,X,Y,np)
         b=rand(N^2)
+        flush!(A)
+        A*b
         @test A*b ≈ A0*b
     end    
 end

From c3b78c9486143e9b0c185cd4b7138901f05f3f96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Tue, 28 May 2024 11:57:07 +0200
Subject: [PATCH 31/44] working example with VoronoiFVM

---
 src/experimental/Experimental.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
index 162ed2f..4346122 100644
--- a/src/experimental/Experimental.jl
+++ b/src/experimental/Experimental.jl
@@ -10,6 +10,7 @@ using DocStringExtensions
 using Metis
 using Base.Threads
 using OhMyThreads: @tasks
+import ExtendableSparse: factorize!, update!
 
 
 include(joinpath(@__DIR__, "..", "matrix", "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl"))

From dea60a4cf374dad794144bf3e12057eb298a6086 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 17 Jun 2024 14:52:38 +0200
Subject: [PATCH 32/44] Tests with partitioned grids. Need for grid induced
 node partitioning

---
 .../extendablesparsematrixparallel.jl         |  11 +-
 src/experimental/parallel_testtools.jl        |   3 +
 src/experimental/sparsematrixdict.jl          |  10 +-
 test/ExperimentalScalar.jl                    |  30 +--
 test/ExperimentalXParallel.jl                 | 188 ++++++++++++------
 test/Project.toml                             |   1 +
 6 files changed, 156 insertions(+), 87 deletions(-)

diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl
index c8125bf..97b4423 100644
--- a/src/experimental/extendablesparsematrixparallel.jl
+++ b/src/experimental/extendablesparsematrixparallel.jl
@@ -150,13 +150,10 @@ function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x)
     for icol=1:length(colparts)
         part=colparts[icol]
         @tasks for ip=1:length(part)
-            @inbounds begin
-                for j in partnodes[part[ip]]
-                    for i in nzrange(A,j)
-                        row = rows[i]
-                        val = vals[i]
-                        r[row]+=val*x[j]
-                    end
+            pnodes=partnodes[part[ip]]
+            for j in  pnodes
+                @inbounds for i in nzrange(A,j)
+                    r[rows[i]]+=vals[i]*x[j]
                 end
             end
         end
diff --git a/src/experimental/parallel_testtools.jl b/src/experimental/parallel_testtools.jl
index 16a67c7..83f75cd 100644
--- a/src/experimental/parallel_testtools.jl
+++ b/src/experimental/parallel_testtools.jl
@@ -195,6 +195,7 @@ end
 function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict,ExtendableSparseMatrixParallelLNKX},X,Y,nt=1;d=0.1, reset=true)
     Nx=length(X)
     Ny=length(Y)
+
     size(A,1)==Nx*Ny || error("incompatible size of A")
     size(A,2)==Nx*Ny || error("incompatible size of A")
 
@@ -219,3 +220,5 @@ function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSpa
     end
     flush!(A)
 end
+
+
diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl
index 2e58238..2bd6295 100644
--- a/src/experimental/sparsematrixdict.jl
+++ b/src/experimental/sparsematrixdict.jl
@@ -31,22 +31,22 @@ Base.size(m::SparseMatrixDict)=(m.m,m.n)
 
 SparseArrays.nnz(m::SparseMatrixDict)=length(m.values)
 
-function SparseArrays.sparse(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} 
-    l=length(m.values)
+function SparseArrays.sparse(mat::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} 
+    l=length(mat.values)
     I=Vector{Ti}(undef,l)
     J=Vector{Ti}(undef,l)
     V=Vector{Tv}(undef,l)
     i=1
-    for (p,v) in m.values
+    for (p,v) in mat.values
 	I[i]=first(p)
 	J[i]=last(p)
 	V[i]=v
 	i=i+1
     end
     @static if VERSION>=v"1.10"
-        return SparseArrays.sparse!(I,J,V,m,n,+)
+        return SparseArrays.sparse!(I,J,V,size(mat)...,+)
     else
-        return SparseArrays.sparse(I,J,V,m,n,+)
+        return SparseArrays.sparse(I,J,V,size(mat)...,+)
     end
 end
 
diff --git a/test/ExperimentalScalar.jl b/test/ExperimentalScalar.jl
index 11040cf..1f1ee84 100644
--- a/test/ExperimentalScalar.jl
+++ b/test/ExperimentalScalar.jl
@@ -3,25 +3,25 @@ using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
 using BenchmarkTools
 using Test
 
+include("test_parallel.jl")
 
-function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix})
-    X=1:N
-    Y=1:N
-    A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2)
-    A=Tm{Float64,Int}(N^2,N^2)
-    partassemble!(A0,X,Y)
-    partassemble!(A,X,Y)
+function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
+    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
+    A=Tm{Float64,Int}(nnodes,nnodes)
+    testassemble!(A0,grid)
+    testassemble!(A,grid)
     @test sparse(A0)≈sparse(A)
 end
 
-function speed_build(N,Tm::Type{<:AbstractSparseMatrix})
-    X=1:N
-    Y=1:N
-    A0=ExtendableSparseMatrix{Float64,Int}(N^2,N^2)
-    A=Tm{Float64,Int}(N^2,N^2)
-
-    tbase= @belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0))
-    tx= @belapsed partassemble!($A,$X,$Y) seconds=1 setup=(reset!($A))
+function speed_build(N,Tm::Type{<:AbstractSparseMatrix}; dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
+    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
+    A=Tm{Float64,Int}(nnodes,nnodes)
+    tbase= @belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0))
+    tx= @belapsed testassemble!($A,$grid) seconds=1 setup=(reset!($A))
     tbase/tx
 end
 
diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl
index d6bb113..4daf8e9 100644
--- a/test/ExperimentalXParallel.jl
+++ b/test/ExperimentalXParallel.jl
@@ -2,18 +2,23 @@ module ExperimentalXParallel
 
 using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
 using BenchmarkTools
+using ExtendableGrids
+#using MKLSparse
+using SparseMatricesCSR
 using Test
+using OhMyThreads
 
+include("test_parallel.jl")
 
-function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix})
-    X=1:N
-    Y=1:N
-    A=Tm{Float64,Int}(N^2,N^2,1)
-    allnp=[4,5,6,7,8]
+function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix}; dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
+    A=Tm{Float64,Int}(nnodes,nnodes,1)
+    allnp=[10,15,20]
 
     # Assembele without partitioning
     # this gives the "base truth" to compare with
-    partassemble!(A,X,Y)
+    testassemble_parallel!(A,grid)
 
     # Save the nonzeros 
     nz=copy(nonzeros(A))
@@ -21,7 +26,9 @@ function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix})
         # Reset the nonzeros, keeping the structure intact
         nonzeros(A).=0
         # Parallel assembly whith np threads
-        partassemble!(A,X,Y, np)
+        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
+        @show num_partitions_per_color(pgrid)
+        testassemble_parallel!(A,pgrid)
         @test nonzeros(A)≈nz
     end
 end
@@ -32,109 +39,170 @@ end
 Test correctness of parallel assembly on NxN grid  during 
 build phase, assuming that no structure has been assembled.
 """
-function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}, allnp=[4,5,6,7,8])
-    X=1:N
-    Y=1:N
+function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
     # Get the "ground truth"
-    A=ExtendableSparseMatrix(N^2,N^2)
-    partassemble!(A,X,Y)
-    nz=copy(nonzeros(A))
+    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
+    testassemble!(A0,grid)
+    nz=copy(nonzeros(A0))
     for np in allnp
         # Make a new matrix and assemble parallel.
         # this should result in the same nonzeros
-        A=Tm(N^2,N^2,1)
-        partassemble!(A,X,Y, np)
-        @test nonzeros(A)≈nz
+        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
+        A=Tm(nnodes,nnodes, num_partitions(pgrid))
+        @show num_partitions_per_color(pgrid)
+        @test checkpartitioning(pgrid)
+        testassemble_parallel!(A,pgrid)
+        @test nonzeros(A) ≈ nz
     end
 end
 
-function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix};     allnp=[4,5,6,7,8])
-    X=1:N
-    Y=1:N
-    A0=ExtendableSparseMatrix(N^2,N^2)
-    partassemble!(A0,X,Y)
+function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
+    # Get the "ground truth"
+    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
+    testassemble!(A0,grid)
     for np in allnp
-        A=Tm(N^2,N^2,1)
-        partassemble!(A,X,Y,np)
-        b=rand(N^2)
+        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
+        A=Tm(nnodes,nnodes, num_partitions(pgrid))
+        testassemble_parallel!(A,pgrid)
         flush!(A)
-        A*b
+        partcolors!(A,partition_pcolors(pgrid))
+        b=rand(nnodes)
         @test A*b ≈ A0*b
     end    
 end
 
-function speedup_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10])
-    X=1:N
-    Y=1:N
-    A=ExtendableSparseMatrix(N^2,N^2)
-    partassemble!(A,X,Y)
-    nz=copy(nonzeros(A))
+function speedup_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
+    # Get the "ground truth"
+    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
+    testassemble!(A0,grid)
+    nz=copy(nonzeros(A0))
     # Get the base timing
     # During setup, set matrix entries to zero while keeping  the structure 
-    t0=@belapsed partassemble!($A,$X,$Y) seconds=1 setup=(nonzeros($A).=0)
+    t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(nonzeros($A0).=0)
     result=[]
-    A=Tm(N^2,N^2,1)
+    A=Tm(nnodes,nnodes,1)
     for np in allnp
         # Get the parallel timing
         # During setup, set matrix entries to zero while keeping  the structure
-        partassemble!(A,X,Y,np)
-        t=@belapsed partassemble!($A,$X,$Y,$np,reset=false) seconds=1 setup=(nonzeros($A).=0)
+        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
+        @show num_partitions_per_color(pgrid)
+        reset!(A,num_partitions(pgrid))
+        testassemble_parallel!(A,pgrid)
+        t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(nonzeros($A).=0)
         @assert nonzeros(A)≈nz
         push!(result,(np,round(t0/t,digits=2)))
     end
     result
 end
 
-function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10])
-    X=1:N
-    Y=1:N
-    A0=ExtendableSparseMatrix(N^2,N^2)
-    A=Tm(N^2,N^2,1)
-    partassemble!(A0,X,Y)
+function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
+    # Get the "ground truth"
+    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
+    testassemble!(A0,grid)
     nz=copy(nonzeros(A0))
     reset!(A0)
-    partassemble!(A0,X,Y)
+    testassemble!(A0,grid)
     @assert nonzeros(A0)≈(nz)
 
-    partassemble!(A,X,Y)
-    nz=copy(nonzeros(A))
-    reset!(A)
-    partassemble!(A,X,Y)
-    @assert nonzeros(A)≈(nz)
-
     # Get the base timing
     # During setup, reset matrix to empty state.
-    t0=@belapsed partassemble!($A0,$X,$Y) seconds=1 setup=(reset!($A0))
+    t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0))
     
     result=[]
+    A=Tm(nnodes,nnodes,1)
     for np in allnp
         # Get the parallel timing
         # During setup, reset matrix to empty state.
-        t=@belapsed partassemble!($A,$X,$Y,$np) seconds=1 setup=(reset!($A))
+        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
+        reset!(A,num_partitions(pgrid))
+        @show num_partitions_per_color(pgrid)
+        t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(reset!($A,num_partitions($pgrid)))
         @assert nonzeros(A)≈nz
         push!(result,(np,round(t0/t,digits=2)))
     end
     result
 end
 
-function speedup_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[4,5,6,7,8,9,10])
-    X=1:N
-    Y=1:N
-    
-    A0=ExtendableSparseMatrix(N^2,N^2)
-    partassemble!(A0,X,Y)
-    b=rand(N^2)
+function speedup_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
+    # Get the "ground truth"
+    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
+    testassemble!(A0,grid)
+    b=rand(nnodes)
     t0=@belapsed $A0*$b seconds=1
-    
+    A0b=A0*b
     result=[]
+    A=Tm(nnodes,nnodes,1)
     for np in allnp
-        A=Tm(N^2,N^2,1)
-        partassemble!(A,X,Y,np)
+        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
+        @show num_partitions_per_color(pgrid)
+        reset!(A,num_partitions(pgrid))
+        testassemble_parallel!(A,pgrid)
+        flush!(A)
+        partcolors!(A,partition_pcolors(pgrid))
+
         t=@belapsed $A*$b seconds=1
+        @assert A0b≈A*b
         push!(result,(np,round(t0/t,digits=2)))
     end
     result
 end
 
+
+function mymul(A::SparseMatrixCSR,v::AbstractVector)
+    y=copy(v)
+    A.n == size(v, 1) || throw(DimensionMismatch())
+    A.m == size(y, 1) || throw(DimensionMismatch())
+    @tasks for row = 1:size(y, 1)
+        y[row]=0.0
+        @inbounds for nz in nzrange(A,row)
+            col = A.colval[nz]
+            y[row] += A.nzval[nz]*v[col]
+        end
+    end
+    return y
+end
+
+function speedup_csrmul(N; dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
+    # Get the "ground truth"
+    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
+    t00=@belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0))
+
+    reset!(A0)
+    testassemble!(A0,grid)
+    b=rand(nnodes)
+    t0=@belapsed $A0*$b seconds=1
+    A0b=A0*b
+
+
+    t0x=@belapsed  A0x=sparse(transpose(sparse($A0)))
+
+    A0x=sparse(transpose(sparse(A0)))
+
+    tx=@belapsed A=SparseMatrixCSR{1}(transpose($A0x))
+
+    A=SparseMatrixCSR{1}(transpose(sparse(A0x)))
+    t1=@belapsed $A*$b seconds=1
+
+    t2=@belapsed mymul($A, $b) seconds=1
+
+    @info t00,t0,t0x, tx,t1, t2
+    
+    @assert A0b≈A*b
+    t0/t1
+end
+
+
 end
 
diff --git a/test/Project.toml b/test/Project.toml
index 8aa666d..78cab80 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -2,6 +2,7 @@
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
 AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f"
 IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895"

From d16e33d175f9f756e15715f0561d962f0c1eee6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 17 Jun 2024 22:57:21 +0200
Subject: [PATCH 33/44] parallelization + tests based on grid node partitioning

---
 Project.toml                                  |   4 +-
 src/experimental/Experimental.jl              |   4 -
 .../extendablesparsematrixparallel.jl         |  63 ++---
 src/experimental/parallel_testtools.jl        | 224 ------------------
 src/experimental/sparsematrixdict.jl          |   3 +-
 src/experimental/sparsematrixlnkdict.jl       |   3 +-
 src/experimental/sparsematrixlnkx.jl          |   2 +-
 test/ExperimentalParallel.jl                  | 220 +++++++++++++++++
 test/ExperimentalScalar.jl                    |  28 ---
 test/ExperimentalXParallel.jl                 | 197 +++++++++++++--
 test/Project.toml                             |   4 +
 test/runtests.jl                              |  14 +-
 12 files changed, 425 insertions(+), 341 deletions(-)
 delete mode 100644 src/experimental/parallel_testtools.jl
 delete mode 100644 test/ExperimentalScalar.jl

diff --git a/Project.toml b/Project.toml
index e1bcecd..0372ecb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,11 +1,10 @@
 name = "ExtendableSparse"
 uuid = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3"
 authors = ["Juergen Fuhrmann <juergen.fuhrmann@wias-berlin.de>"]
-version = "1.4.0"
+version = "1.5.0"
 
 [deps]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
-ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -18,7 +17,6 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 SuiteSparse = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-TestEnv = "1e6cf692-eddd-4d53-88a5-2d735e33781b"
 
 [weakdeps]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
index 4346122..3c45c6d 100644
--- a/src/experimental/Experimental.jl
+++ b/src/experimental/Experimental.jl
@@ -79,9 +79,5 @@ const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=ExtendableSparseMatrixXParall
 ExtendableSparseMatrixParallelLNKDict(m,n,p)= ExtendableSparseMatrixParallelLNKDict{Float64,Int64}(m,n,p)
 export ExtendableSparseMatrixParallelLNKDict
 
-
-include("parallel_testtools.jl")
-export part2d, showgrid, partassemble!,  assemblepartition!
-
 end
 
diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl
index 97b4423..5325e76 100644
--- a/src/experimental/extendablesparsematrixparallel.jl
+++ b/src/experimental/extendablesparsematrixparallel.jl
@@ -9,34 +9,28 @@ mutable struct ExtendableSparseMatrixXParallel{Tm<:AbstractSparseMatrixExtension
     """
     xmatrices::Vector{Tm}
 
-    nodeparts::Vector{Ti}
-    partnodes::Vector{Vector{Ti}}
-    colparts::Vector{Vector{Ti}}
+    colparts::Vector{Ti}
+    partnodes::Vector{Ti}
 end
 
 
 function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m,p::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti}
+    
     ExtendableSparseMatrixXParallel(spzeros(Tv, Ti, m, n),
                                     [Tm(m,n) for i=1:p],
-                                    zeros(Ti,n),
-                                    Vector{Ti}[],
-                                    Vector{Ti}[]
+                                    Ti[1,2],
+                                    Ti[1,n+1],
                                     )
 end
 
-function partcolors!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}, partcolors) where {Tm, Tv, Ti}
-    ncol=maximum(partcolors)
-    colparts=[Ti[] for i=1:ncol]
-    for i=1:length(partcolors)
-        push!(colparts[partcolors[i]],i)
-    end
+function partitioning!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}, colparts, partnodes) where {Tm, Tv, Ti}
+    ext.partnodes=partnodes
     ext.colparts=colparts
     ext
 end
 
 function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m, pc::Vector) where{Tm, Tv, Ti}
     ext=ExtendableSparseMatrixXParallel(m,n,length(pc))
-    partcolors!(ext,pc)
 end
 
 
@@ -44,7 +38,8 @@ function reset!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti},p::Integer) where
     m,n=size(ext.cscmatrix)
     ext.cscmatrix=spzeros(Tv, Ti, m, n)
     ext.xmatrices=[Tm(m,n) for i=1:p]
-    ext.nodeparts.=zero(Ti)
+    ext.colparts=Ti[1,2]
+    ext.partnodes=Ti[1,n+1]
     ext
 end
 
@@ -52,34 +47,12 @@ function reset!(ext::ExtendableSparseMatrixXParallel)
     reset!(ext,length(ext.xmatrices))
 end
 
-function reset!(ext::ExtendableSparseMatrixXParallel,pc::Vector)
-    reset!(ext,length(pc))
-    partcolors!(ext,pc)
-end
 
 function flush!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}) where{Tm,Tv,Ti}
-    ext.cscmatrix=sum!(ext.nodeparts, ext.xmatrices, ext.cscmatrix)
+    ext.cscmatrix=Base.sum(ext.xmatrices, ext.cscmatrix)
     np=length(ext.xmatrices)
     (m,n)=size(ext.cscmatrix)
     ext.xmatrices=[Tm(m,n) for i=1:np]
-    npts::Vector{Ti}=ext.nodeparts
-    pn=zeros(Ti,np)
-    for i=1:n
-        npi=npts[i]
-        if npi>0
-            pn[npi]+=1
-        end
-    end
-    partnodes=[zeros(Int,pn[i]) for i=1:np]
-    pn.=1
-    for i=1:n
-        npi=ext.nodeparts[i]
-        if npi>0
-            partnodes[npi][pn[npi]]=i
-            pn[npi]+=1
-        end
-    end
-    ext.partnodes=partnodes
     ext
 end
 
@@ -131,7 +104,6 @@ function rawupdateindex!(ext::ExtendableSparseMatrixXParallel,
     end
 end
 
-
 # Needed in 1.9
 function Base.:*(ext::ExtendableSparse.Experimental.ExtendableSparseMatrixXParallel{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA
     mul!(similar(x),ext,x)
@@ -141,19 +113,18 @@ function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x)
     flush!(ext)
     A=ext.cscmatrix
     colparts=ext.colparts
+    @show colparts
     partnodes=ext.partnodes
+    @show partnodes
     rows = SparseArrays.rowvals(A)
     vals = nonzeros(A)
-    
     r.=zero(eltype(ext))
     m,n=size(A)
-    for icol=1:length(colparts)
-        part=colparts[icol]
-        @tasks for ip=1:length(part)
-            pnodes=partnodes[part[ip]]
-            for j in  pnodes
-                @inbounds for i in nzrange(A,j)
-                    r[rows[i]]+=vals[i]*x[j]
+    for icol=1:length(colparts)-1
+        @tasks for ip=colparts[icol]:colparts[icol+1]-1
+            for inode in  partnodes[ip]:partnodes[ip+1]-1
+                @inbounds for i in nzrange(A,inode)
+                    r[rows[i]]+=vals[i]*x[inode]
                 end
             end
         end
diff --git a/src/experimental/parallel_testtools.jl b/src/experimental/parallel_testtools.jl
deleted file mode 100644
index 83f75cd..0000000
--- a/src/experimental/parallel_testtools.jl
+++ /dev/null
@@ -1,224 +0,0 @@
-import ChunkSplitters
-# Methods to test parallel assembly
-# Will eventually become part of the package.
-
-"""
-    $(SIGNATURES)
-
-Return colored partitioing of grid made up by `X` and `Y`  for work with `max(nt,4)` threads
-as a vector `p` of a vector pairs of index ranges such that `p[i]` containes partions
-of color i which can be assembled independently.
-
-The current algorithm  creates `nt^2` partitions with `nt` colors.
-"""
-function part2d(X,Y, nt)
-    nt=max(4,nt)
-    XP=collect(ChunkSplitters.chunks(1:length(X)-1,n=nt))
-    YP=collect(ChunkSplitters.chunks(1:length(Y)-1,n=nt))
-    partitions = [Tuple{StepRange{Int64}, StepRange{Int64}}[] for i = 1:nt]
-    ipart=1
-    col=1
-    for jp=1:nt
-        for ip=1:nt
-            push!(partitions[col], (XP[ip], YP[jp]))
-            col=(col -1 +1 )%nt+1
-        end
-        col=(col -1 +2)%nt+1
-    end
-    partitions
-end
-
-function colpart2d(X,Y,nt)
-    Nx=length(X)
-    Ny=length(Y)
-    p=part2d(X,Y,nt)
-    pc=zeros(Int,sum(length,p))
-    jp=1
-    for icol=1:length(p)
-        for ip=1:length(p[icol])
-            pc[jp]=icol
-            jp+=1
-        end
-    end
-    p,pc
-end
-
-
-"""
-    showgrid(Makie, ColorSchemes, X,Y,nt)
-
-Show grid partitioned according to [`part2d`](@ref). Needs a makie variant and ColorSchemes
-to be passed as modules.
-"""
-function showgrid(Makie, ColorSchemes, X,Y,nt)
-    f = Makie.Figure()
-    ax = Makie.Axis(f[1, 1]; aspect = 1)
-    p=part2d(X,Y,nt)
-    ncol=length(p)
-    @show sum(length,p), ncol
-    colors=get(ColorSchemes.rainbow,collect(1:ncol)/ncol)
-    poly=Vector{Makie.Point2f}(undef,4)
-    for icol = 1:ncol
-        for (xp, yp) in p[icol]
-            for j in yp
-                for i in xp
-                    poly[1]=Makie.Point2f(X[i], Y[j])
-                    poly[2]=Makie.Point2f(X[i + 1], Y[j])
-                    poly[3]=Makie.Point2f(X[i + 1], Y[j + 1])
-                    poly[4]=Makie.Point2f(X[i], Y[j + 1])
-                    Makie.poly!(copy(poly),color = colors[icol])
-                end
-            end
-        end
-    end
-    f
-end
-
-
-"""
-    $(SIGNATURES)
-
-Assemble edge for finite volume laplacian.
-Used by [`partassemble!`](@ref).
-"""
-function assembleedge!(A,v,k,l)
-    rawupdateindex!(A,+,v,k,k)
-    rawupdateindex!(A,+,-v,k,l)
-    rawupdateindex!(A,+,-v,l,k)
-    rawupdateindex!(A,+,v,l,l)
-end
-
-function assembleedge!(A,v,k,l,tid)
-    rawupdateindex!(A,+,v,k,k,tid)
-    rawupdateindex!(A,+,-v,k,l,tid)
-    rawupdateindex!(A,+,-v,l,k,tid)
-    rawupdateindex!(A,+,v,l,l,tid)
-end
-
-"""
-    $(SIGNATURES)
-
-Assemble finite volume Laplacian + diagnonal term
-on grid cell `i,j`.
-Used by [`partassemble!`](@ref).
-"""
-function assemblecell!(A,lindexes,X,Y,i,j,d)
-    hx=X[i+1]-X[i]
-    hy=Y[j+1]-Y[j]
-    ij00=lindexes[i,j]
-    ij10=lindexes[i+1,j]
-    ij11=lindexes[i+1,j+1]
-    ij01=lindexes[i,j+1]
-    
-    assembleedge!(A,0.5*hx/hy,ij00,ij01)
-    assembleedge!(A,0.5*hx/hy,ij10,ij11)
-    assembleedge!(A,0.5*hy/hx,ij00,ij10)
-    assembleedge!(A,0.5*hy/hx,ij01,ij11)
-    v=0.25*hx*hy
-    rawupdateindex!(A,+,v*d,ij00,ij00)
-    rawupdateindex!(A,+,v*d,ij01,ij01)
-    rawupdateindex!(A,+,v*d,ij10,ij10)
-    rawupdateindex!(A,+,v*d,ij11,ij11)
-end
-
-function assemblecell!(A,lindexes,X,Y,i,j,d,tid)
-    hx=X[i+1]-X[i]
-    hy=Y[j+1]-Y[j]
-    ij00=lindexes[i,j]
-    ij10=lindexes[i+1,j]
-    ij11=lindexes[i+1,j+1]
-    ij01=lindexes[i,j+1]
-    
-    assembleedge!(A,0.5*hx/hy,ij00,ij01,tid)
-    assembleedge!(A,0.5*hx/hy,ij10,ij11,tid)
-    assembleedge!(A,0.5*hy/hx,ij00,ij10,tid)
-    assembleedge!(A,0.5*hy/hx,ij01,ij11,tid)
-    v=0.25*hx*hy
-    rawupdateindex!(A,+,v*d,ij00,ij00,tid)
-    rawupdateindex!(A,+,v*d,ij01,ij01,tid)
-    rawupdateindex!(A,+,v*d,ij10,ij10,tid)
-    rawupdateindex!(A,+,v*d,ij11,ij11,tid)
-end
-
-"""
-    $(SIGNATURES)
-
-Assemble finite volume Laplacian + diagnonal term
-on grid cells in partition described by ranges xp,yp.
-Used by [`partassemble!`](@ref).
-"""
-function assemblepartition!(A,lindexes,X,Y,xp,yp,d)
-    for j in yp
-	for i in xp
-	    assemblecell!(A,lindexes,X,Y,i,j,d)
-	end
-    end
-end
-
-function assemblepartition!(A,lindexes,X,Y,xp,yp,d,tid)
-    for j in yp
-	for i in xp
-	    assemblecell!(A,lindexes,X,Y,i,j,d,tid)
-	end
-    end
-end
-
-"""
-    partassemble!(A,N,nt=1;xrange=(0,1),yrange=(0,1), d=0.1)
-
-Partitioned, cellwise, multithreaded assembly of finite difference matrix for
-` -Δu + d*u=f` with homogeneous Neumann bc on grid  set up by coordinate vectors
-`X` and `Y` partitioned for work with `nt` threads
-Does not work during structure setup.
-"""
-function partassemble!(A,X,Y,nt=1;d=0.1)
-    Nx=length(X)
-    Ny=length(Y)
-    size(A,1)==Nx*Ny || error("incompatible size of A")
-    size(A,2)==Nx*Ny || error("incompatible size of A")
-
-    lindexes=LinearIndices((1:Nx,1:Ny))
-    if nt==1
-        assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d)
-    else
-        p=part2d(X,Y,nt)
-        for icol=1:length(p)
-	    @tasks for (xp, yp) in p[icol]
-	        assemblepartition!(A,lindexes,X,Y,xp,yp,d)
-	    end
-        end
-    end
-    flush!(A)
-end
-
-
-function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict,ExtendableSparseMatrixParallelLNKX},X,Y,nt=1;d=0.1, reset=true)
-    Nx=length(X)
-    Ny=length(Y)
-
-    size(A,1)==Nx*Ny || error("incompatible size of A")
-    size(A,2)==Nx*Ny || error("incompatible size of A")
-
-    lindexes=LinearIndices((1:Nx,1:Ny))
-    if nt==1
-        reset!(A,1)
-        assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1)
-    else
-        p,pc=colpart2d(X,Y,nt)
-        if reset
-            reset!(A,pc)
-        end
-        jp0=0
-        for icol=1:length(p)
-            npc=length(p[icol])
-	    @tasks for ip=1:npc
-                (xp, yp)=p[icol][ip]
-	        assemblepartition!(A,lindexes,X,Y,xp,yp,d,jp0+ip)
-	    end
-            jp0+=npc
-        end
-    end
-    flush!(A)
-end
-
-
diff --git a/src/experimental/sparsematrixdict.jl b/src/experimental/sparsematrixdict.jl
index 2bd6295..c5ee469 100644
--- a/src/experimental/sparsematrixdict.jl
+++ b/src/experimental/sparsematrixdict.jl
@@ -83,7 +83,7 @@ function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC
     cscmatrix
 end
 
-function sum!(nodeparts, dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+function Base.sum(dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
     lnew=sum(m->length(m.values),dictmatrices)
     if lnew>0
         (;colptr,nzval,rowval,m,n)=cscmatrix
@@ -105,7 +105,6 @@ function sum!(nodeparts, dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatri
         ip=1
         for m in dictmatrices
             for (p,v) in m.values
-                nodeparts[last(p)]=ip
 	        I[i]=first(p)
 	        J[i]=last(p)
 	        V[i]=v
diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl
index a53df25..bcc67f4 100644
--- a/src/experimental/sparsematrixlnkdict.jl
+++ b/src/experimental/sparsematrixlnkdict.jl
@@ -382,7 +382,7 @@ Add SparseMatrixCSC matrix and [`SparseMatrixLNKDict`](@ref)  lnk, returning a S
 """
 Base.:+(lnk::SparseMatrixLNKDict, csc::SparseMatrixCSC) = add_directly(lnk, csc)
 
-function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
     lnew=sum(nnz,lnkdictmatrices)
     if lnew>0
         (;colptr,nzval,rowval,m,n)=cscmatrix
@@ -404,7 +404,6 @@ function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cs
         ip=1
         for lnk in lnkdictmatrices
             for (j,k) in lnk.colstart
-                nodeparts[j]=ip
                 while k>0
                     I[i]=lnk.rowval[k]
                     J[i]=j
diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl
index f7a322a..d445728 100644
--- a/src/experimental/sparsematrixlnkx.jl
+++ b/src/experimental/sparsematrixlnkx.jl
@@ -377,7 +377,7 @@ Add SparseMatrixCSC matrix and [`SparseMatrixLNKX`](@ref)  lnk, returning a Spar
 """
 Base.:+(lnk::SparseMatrixLNKX, csc::SparseMatrixCSC) = add_directly(lnk, csc)
 
-function sum!(nodeparts, lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
     lnew=sum(nnz,lnkdictmatrices)
     if lnew>0
         (;colptr,nzval,rowval,m,n)=cscmatrix
diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallel.jl
index 936f566..7fe1029 100644
--- a/test/ExperimentalParallel.jl
+++ b/test/ExperimentalParallel.jl
@@ -6,6 +6,226 @@ using BenchmarkTools
 using OhMyThreads: @tasks
 using Test
 
+import ChunkSplitters
+# Methods to test parallel assembly
+# Will eventually become part of the package.
+
+"""
+
+Return colored partitioing of grid made up by `X` and `Y`  for work with `max(nt,4)` threads
+as a vector `p` of a vector pairs of index ranges such that `p[i]` containes partions
+of color i which can be assembled independently.
+
+The current algorithm  creates `nt^2` partitions with `nt` colors.
+"""
+function part2d(X,Y, nt)
+    nt=max(4,nt)
+    XP=collect(ChunkSplitters.chunks(1:length(X)-1,n=nt))
+    YP=collect(ChunkSplitters.chunks(1:length(Y)-1,n=nt))
+    partitions = [Tuple{StepRange{Int64}, StepRange{Int64}}[] for i = 1:nt]
+    ipart=1
+    col=1
+    for jp=1:nt
+        for ip=1:nt
+            push!(partitions[col], (XP[ip], YP[jp]))
+            col=(col -1 +1 )%nt+1
+        end
+        col=(col -1 +2)%nt+1
+    end
+    partitions
+end
+
+function colpart2d(X,Y,nt)
+    Nx=length(X)
+    Ny=length(Y)
+    p=part2d(X,Y,nt)
+    pc=zeros(Int,sum(length,p))
+    jp=1
+    for icol=1:length(p)
+        for ip=1:length(p[icol])
+            pc[jp]=icol
+            jp+=1
+        end
+    end
+    p,pc
+end
+
+
+"""
+    showgrid(Makie, ColorSchemes, X,Y,nt)
+
+Show grid partitioned according to [`part2d`](@ref). Needs a makie variant and ColorSchemes
+to be passed as modules.
+"""
+function showgrid(Makie, ColorSchemes, X,Y,nt)
+    f = Makie.Figure()
+    ax = Makie.Axis(f[1, 1]; aspect = 1)
+    p=part2d(X,Y,nt)
+    ncol=length(p)
+    @show sum(length,p), ncol
+    colors=get(ColorSchemes.rainbow,collect(1:ncol)/ncol)
+    poly=Vector{Makie.Point2f}(undef,4)
+    for icol = 1:ncol
+        for (xp, yp) in p[icol]
+            for j in yp
+                for i in xp
+                    poly[1]=Makie.Point2f(X[i], Y[j])
+                    poly[2]=Makie.Point2f(X[i + 1], Y[j])
+                    poly[3]=Makie.Point2f(X[i + 1], Y[j + 1])
+                    poly[4]=Makie.Point2f(X[i], Y[j + 1])
+                    Makie.poly!(copy(poly),color = colors[icol])
+                end
+            end
+        end
+    end
+    f
+end
+
+
+"""
+
+Assemble edge for finite volume laplacian.
+Used by [`partassemble!`](@ref).
+"""
+function assembleedge!(A,v,k,l)
+    rawupdateindex!(A,+,v,k,k)
+    rawupdateindex!(A,+,-v,k,l)
+    rawupdateindex!(A,+,-v,l,k)
+    rawupdateindex!(A,+,v,l,l)
+end
+
+function assembleedge!(A,v,k,l,tid)
+    rawupdateindex!(A,+,v,k,k,tid)
+    rawupdateindex!(A,+,-v,k,l,tid)
+    rawupdateindex!(A,+,-v,l,k,tid)
+    rawupdateindex!(A,+,v,l,l,tid)
+end
+
+"""
+Assemble finite volume Laplacian + diagnonal term
+on grid cell `i,j`.
+Used by [`partassemble!`](@ref).
+"""
+function assemblecell!(A,lindexes,X,Y,i,j,d)
+    hx=X[i+1]-X[i]
+    hy=Y[j+1]-Y[j]
+    ij00=lindexes[i,j]
+    ij10=lindexes[i+1,j]
+    ij11=lindexes[i+1,j+1]
+    ij01=lindexes[i,j+1]
+    
+    assembleedge!(A,0.5*hx/hy,ij00,ij01)
+    assembleedge!(A,0.5*hx/hy,ij10,ij11)
+    assembleedge!(A,0.5*hy/hx,ij00,ij10)
+    assembleedge!(A,0.5*hy/hx,ij01,ij11)
+    v=0.25*hx*hy
+    rawupdateindex!(A,+,v*d,ij00,ij00)
+    rawupdateindex!(A,+,v*d,ij01,ij01)
+    rawupdateindex!(A,+,v*d,ij10,ij10)
+    rawupdateindex!(A,+,v*d,ij11,ij11)
+end
+
+function assemblecell!(A,lindexes,X,Y,i,j,d,tid)
+    hx=X[i+1]-X[i]
+    hy=Y[j+1]-Y[j]
+    ij00=lindexes[i,j]
+    ij10=lindexes[i+1,j]
+    ij11=lindexes[i+1,j+1]
+    ij01=lindexes[i,j+1]
+    
+    assembleedge!(A,0.5*hx/hy,ij00,ij01,tid)
+    assembleedge!(A,0.5*hx/hy,ij10,ij11,tid)
+    assembleedge!(A,0.5*hy/hx,ij00,ij10,tid)
+    assembleedge!(A,0.5*hy/hx,ij01,ij11,tid)
+    v=0.25*hx*hy
+    rawupdateindex!(A,+,v*d,ij00,ij00,tid)
+    rawupdateindex!(A,+,v*d,ij01,ij01,tid)
+    rawupdateindex!(A,+,v*d,ij10,ij10,tid)
+    rawupdateindex!(A,+,v*d,ij11,ij11,tid)
+end
+
+"""
+
+Assemble finite volume Laplacian + diagnonal term
+on grid cells in partition described by ranges xp,yp.
+Used by [`partassemble!`](@ref).
+"""
+function assemblepartition!(A,lindexes,X,Y,xp,yp,d)
+    for j in yp
+	for i in xp
+	    assemblecell!(A,lindexes,X,Y,i,j,d)
+	end
+    end
+end
+
+function assemblepartition!(A,lindexes,X,Y,xp,yp,d,tid)
+    for j in yp
+	for i in xp
+	    assemblecell!(A,lindexes,X,Y,i,j,d,tid)
+	end
+    end
+end
+
+"""
+    partassemble!(A,N,nt=1;xrange=(0,1),yrange=(0,1), d=0.1)
+
+Partitioned, cellwise, multithreaded assembly of finite difference matrix for
+` -Δu + d*u=f` with homogeneous Neumann bc on grid  set up by coordinate vectors
+`X` and `Y` partitioned for work with `nt` threads
+Does not work during structure setup.
+"""
+function partassemble!(A,X,Y,nt=1;d=0.1)
+    Nx=length(X)
+    Ny=length(Y)
+    size(A,1)==Nx*Ny || error("incompatible size of A")
+    size(A,2)==Nx*Ny || error("incompatible size of A")
+
+    lindexes=LinearIndices((1:Nx,1:Ny))
+    if nt==1
+        assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d)
+    else
+        p=part2d(X,Y,nt)
+        for icol=1:length(p)
+	    @tasks for (xp, yp) in p[icol]
+	        assemblepartition!(A,lindexes,X,Y,xp,yp,d)
+	    end
+        end
+    end
+    flush!(A)
+end
+
+
+function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict,ExtendableSparseMatrixParallelLNKX},X,Y,nt=1;d=0.1, reset=true)
+    Nx=length(X)
+    Ny=length(Y)
+
+    size(A,1)==Nx*Ny || error("incompatible size of A")
+    size(A,2)==Nx*Ny || error("incompatible size of A")
+
+    lindexes=LinearIndices((1:Nx,1:Ny))
+    if nt==1
+        reset!(A,1)
+        assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1)
+    else
+        p,pc=colpart2d(X,Y,nt)
+        if reset
+            reset!(A,pc)
+        end
+        jp0=0
+        for icol=1:length(p)
+            npc=length(p[icol])
+	    @tasks for ip=1:npc
+                (xp, yp)=p[icol][ip]
+	        assemblepartition!(A,lindexes,X,Y,xp,yp,d,jp0+ip)
+	    end
+            jp0+=npc
+        end
+    end
+    flush!(A)
+end
+
+
+
 
 """
 `test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)`
diff --git a/test/ExperimentalScalar.jl b/test/ExperimentalScalar.jl
deleted file mode 100644
index 1f1ee84..0000000
--- a/test/ExperimentalScalar.jl
+++ /dev/null
@@ -1,28 +0,0 @@
-module ExperimentalScalar
-using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
-using BenchmarkTools
-using Test
-
-include("test_parallel.jl")
-
-function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; dim=3)
-    grid=testgrid(N;dim)
-    nnodes=num_nodes(grid)
-    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
-    A=Tm{Float64,Int}(nnodes,nnodes)
-    testassemble!(A0,grid)
-    testassemble!(A,grid)
-    @test sparse(A0)≈sparse(A)
-end
-
-function speed_build(N,Tm::Type{<:AbstractSparseMatrix}; dim=3)
-    grid=testgrid(N;dim)
-    nnodes=num_nodes(grid)
-    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
-    A=Tm{Float64,Int}(nnodes,nnodes)
-    tbase= @belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0))
-    tx= @belapsed testassemble!($A,$grid) seconds=1 setup=(reset!($A))
-    tbase/tx
-end
-
-end
diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl
index 4daf8e9..b87b71a 100644
--- a/test/ExperimentalXParallel.jl
+++ b/test/ExperimentalXParallel.jl
@@ -4,32 +4,179 @@ using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
 using BenchmarkTools
 using ExtendableGrids
 #using MKLSparse
-using SparseMatricesCSR
+#using SparseMatricesCSR
 using Test
-using OhMyThreads
 
-include("test_parallel.jl")
+using ExtendableSparse, ExtendableGrids, Metis
+using LinearAlgebra
+using BenchmarkTools
+using Test
+using OhMyThreads: @tasks
+using RecursiveFactorization
+
+function testgrid(N; dim=3)
+    X=range(0,1,length=N^(1.0/dim)|> ceil |> Int)
+    simplexgrid((X for i=1:dim)...)
+end
+
+
+function coordmatrix!(C,coord, cellnodes,k)
+    spacedim=size(coord,1)
+    celldim=size(cellnodes,1)
+    @inbounds for jj=1:celldim
+        C[1,jj]=1
+        @inbounds for ii=1:spacedim
+            C[ii+1,jj]=coord[ii,cellnodes[jj,k]]
+        end
+    end
+end
+
+function gradient!(G,C,factdim,I,ipiv)
+    clu=RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false))
+    vol=abs(det(clu))/factdim
+    ldiv!(G,clu,I)
+    return vol
+end
+
+function scalpro(G,dim,jl,il)
+    s=0.0
+    @inbounds @simd for k=1:dim
+        s+=G[jl,k+1]*G[il,k+1]
+    end
+    return s
+end
+
+function stiffness!(S,dim,G)
+    @inbounds for il=1:dim+1
+        S[il,il]=scalpro(G,dim,il,il)
+        @inbounds for jl=il+1:dim+1
+            S[il,jl]=scalpro(G,dim,jl,il)
+            S[jl,il]=S[il,jl]
+        end
+    end
+    return S
+end
+
+function testassemble!(A_h,grid)
+    coord=grid[Coordinates]
+    cellnodes=grid[CellNodes]
+    ncells=num_cells(grid)
+    dim=size(coord,1)
+    lnodes=dim+1
+    factdim::Float64=factorial(dim)
+    S=zeros(lnodes, lnodes) # local stiffness matrix
+    C=zeros(lnodes,lnodes)  # local coordinate matrix
+    G=zeros(lnodes, lnodes) # shape function gradients
+    ipiv=zeros(Int,lnodes)
+    I=Matrix(Diagonal(ones(lnodes)))
+    ncells=size(cellnodes,2)
+    for icell=1:ncells
+        coordmatrix!(C,coord,cellnodes,icell)
+        vol=gradient!(G,C,factdim,I,ipiv)
+        stiffness!(S,dim,G)
+        for il=1:lnodes
+            i=cellnodes[il,icell]
+            rawupdateindex!(A_h,+,0.1*vol/(dim+1),i,i)
+            for jl=1:lnodes
+                j=cellnodes[jl,icell]
+                rawupdateindex!(A_h,+,vol*(S[il,jl]),i,j)
+            end
+        end
+    end
+    flush!(A_h)
+end
+
+function testassemble_parallel!(A_h,grid)
+    coord=grid[Coordinates]
+    cellnodes=grid[CellNodes]
+    ncells=num_cells(grid)
+    dim=size(coord,1)
+    lnodes=dim+1
+    npart=num_partitions(grid)
+    factdim::Float64=factorial(dim)
+    SS=[zeros(lnodes, lnodes) for i=1:npart] # local stiffness matrix
+    CC=[zeros(lnodes, lnodes) for i=1:npart] # local coordinate matrix
+    GG=[zeros(lnodes, lnodes) for i=1:npart] # shape function gradients
+    IP=[zeros(Int,lnodes) for i=1:npart] # shape function gradients
+    I=Matrix(Diagonal(ones(lnodes)))
+    ncells=size(cellnodes,2)
+    for color in pcolors(grid)
+        @tasks for part in pcolor_partitions(grid,color)
+            C=CC[part]
+            S=SS[part]
+            G=GG[part]
+            ipiv=IP[part]
+            for icell in partition_cells(grid, part)
+                coordmatrix!(C,coord,cellnodes,icell)
+                vol=gradient!(G,C,factdim,I,ipiv)
+                stiffness!(S,dim,G)
+                for il=1:lnodes
+                    i=cellnodes[il,icell]
+                    rawupdateindex!(A_h,+,0.1*vol/(dim+1),i,i, part)
+                    for jl=1:lnodes
+                        j=cellnodes[jl,icell]
+                        rawupdateindex!(A_h,+,vol*(S[il,jl]),i,j, part)
+                    end
+                end
+            end
+        end
+    end
+    flush!(A_h)
+end
+
+
+
+function testassemble(grid)
+    nnodes=num_nodes(grid)
+    A_h=ExtendableSparseMatrix(nnodes,nnodes)
+    testassemble!(A_h,grid)
+    A_h.cscmatrix.nzval.=0
+    testassemble!(A_h,grid)    
+end
+
+
+function test_correctness_build_seq(N,Tm::Type{<:AbstractSparseMatrix}; dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
+    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
+    A=Tm{Float64,Int}(nnodes,nnodes)
+    testassemble!(A0,grid)
+    testassemble!(A,grid)
+    @test sparse(A0)≈sparse(A)
+end
 
-function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix}; dim=3)
+function speed_build_seq(N,Tm::Type{<:AbstractSparseMatrix}; dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
+    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
+    A=Tm{Float64,Int}(nnodes,nnodes)
+    tbase= @belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0))
+    tx= @belapsed testassemble!($A,$grid) seconds=1 setup=(reset!($A))
+    tbase/tx
+end
+
+
+function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix};  allnp=[10,15,20], dim=3)
     grid=testgrid(N;dim)
     nnodes=num_nodes(grid)
     A=Tm{Float64,Int}(nnodes,nnodes,1)
-    allnp=[10,15,20]
+   
 
     # Assembele without partitioning
     # this gives the "base truth" to compare with
     testassemble_parallel!(A,grid)
 
     # Save the nonzeros 
-    nz=copy(nonzeros(A))
+    nz=sort(copy(nonzeros(A)))
     for np in allnp
         # Reset the nonzeros, keeping the structure intact
         nonzeros(A).=0
         # Parallel assembly whith np threads
         pgrid=partition(grid,PlainMetisPartitioning(npart=np))
+        reset!(A,np)
         @show num_partitions_per_color(pgrid)
         testassemble_parallel!(A,pgrid)
-        @test nonzeros(A)≈nz
+        @test sort(nonzeros(A))≈nz
     end
 end
 
@@ -45,16 +192,16 @@ function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15
     # Get the "ground truth"
     A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
     testassemble!(A0,grid)
-    nz=copy(nonzeros(A0))
+    nz=sort(copy(nonzeros(A0)))
     for np in allnp
         # Make a new matrix and assemble parallel.
         # this should result in the same nonzeros
         pgrid=partition(grid,PlainMetisPartitioning(npart=np))
         A=Tm(nnodes,nnodes, num_partitions(pgrid))
         @show num_partitions_per_color(pgrid)
-        @test checkpartitioning(pgrid)
+        @test check_partitioning(pgrid)
         testassemble_parallel!(A,pgrid)
-        @test nonzeros(A) ≈ nz
+        @test sort(nonzeros(A)) ≈ nz
     end
 end
 
@@ -64,14 +211,17 @@ function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,2
     # Get the "ground truth"
     A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
     testassemble!(A0,grid)
+    b=rand(nnodes)
+    A0b=A0*b
     for np in allnp
         pgrid=partition(grid,PlainMetisPartitioning(npart=np))
+        @test check_partitioning(pgrid)
         A=Tm(nnodes,nnodes, num_partitions(pgrid))
+        ExtendableSparse.Experimental.partitioning!(A,pgrid[PColorPartitions], pgrid[PartitionNodes])
         testassemble_parallel!(A,pgrid)
-        flush!(A)
-        partcolors!(A,partition_pcolors(pgrid))
-        b=rand(nnodes)
-        @test A*b ≈ A0*b
+        invp=invperm(pgrid[NodePermutation])
+        @show norm(A0b[invp] - A*b[invp], Inf)
+        @test A0b[invp] ≈ A*b[invp]
     end    
 end
 
@@ -81,7 +231,7 @@ function speedup_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], di
     # Get the "ground truth"
     A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
     testassemble!(A0,grid)
-    nz=copy(nonzeros(A0))
+    nz=copy(nonzeros(A0)) |>sort
     # Get the base timing
     # During setup, set matrix entries to zero while keeping  the structure 
     t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(nonzeros($A0).=0)
@@ -95,7 +245,7 @@ function speedup_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], di
         reset!(A,num_partitions(pgrid))
         testassemble_parallel!(A,pgrid)
         t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(nonzeros($A).=0)
-        @assert nonzeros(A)≈nz
+        @assert sort(nonzeros(A))≈nz
         push!(result,(np,round(t0/t,digits=2)))
     end
     result
@@ -107,11 +257,11 @@ function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim
     # Get the "ground truth"
     A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
     testassemble!(A0,grid)
-    nz=copy(nonzeros(A0))
+    nz=nonzeros(A0)
     reset!(A0)
     testassemble!(A0,grid)
     @assert nonzeros(A0)≈(nz)
-
+    nz=sort(nz)
     # Get the base timing
     # During setup, reset matrix to empty state.
     t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0))
@@ -125,7 +275,7 @@ function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim
         reset!(A,num_partitions(pgrid))
         @show num_partitions_per_color(pgrid)
         t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(reset!($A,num_partitions($pgrid)))
-        @assert nonzeros(A)≈nz
+        @assert sort(nonzeros(A))≈nz
         push!(result,(np,round(t0/t,digits=2)))
     end
     result
@@ -148,16 +298,16 @@ function speedup_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3
         reset!(A,num_partitions(pgrid))
         testassemble_parallel!(A,pgrid)
         flush!(A)
-        partcolors!(A,partition_pcolors(pgrid))
-
+        ExtendableSparse.Experimental.partitioning!(A,pgrid[PColorPartitions], pgrid[PartitionNodes])
         t=@belapsed $A*$b seconds=1
-        @assert A0b≈A*b
+        invp=invperm(pgrid[NodePermutation])
+        @assert A0b[invp] ≈ A*b[invp]
         push!(result,(np,round(t0/t,digits=2)))
     end
     result
 end
 
-
+#=
 function mymul(A::SparseMatrixCSR,v::AbstractVector)
     y=copy(v)
     A.n == size(v, 1) || throw(DimensionMismatch())
@@ -203,6 +353,7 @@ function speedup_csrmul(N; dim=3)
     t0/t1
 end
 
+=#
 
 end
 
diff --git a/test/Project.toml b/test/Project.toml
index 78cab80..24a28c6 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -2,6 +2,7 @@
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
 AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
 ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f"
@@ -9,13 +10,16 @@ IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895"
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 MultiFloats = "bdf0d083-296b-4888-a5b6-7498122e68a5"
 OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [compat]
+ExtendableGrids = "1.7"
 IterativeSolvers = "0.9"
diff --git a/test/runtests.jl b/test/runtests.jl
index 244dbb5..25d5d4c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -9,19 +9,17 @@ using BenchmarkTools
 using MultiFloats
 using ForwardDiff
 
-@testset "ExperimentalScalar" begin
-    include("ExperimentalScalar.jl")
+
+@testset "ExperimentalXParallel" begin
+    include("ExperimentalXParallel.jl")
     for Tm in [ExtendableSparseMatrixLNK,ExtendableSparseMatrixDict,ExtendableSparseMatrixLNKDict]
-        for N in [100,rand(30:200),500]
-            ExperimentalScalar.test_correctness_build(N,Tm)
+        for N in [10000,20000]
+            ExperimentalXParallel.test_correctness_build_seq(N,Tm)
         end
     end
-end
 
-@testset "ExperimentalXParallel" begin
-    include("ExperimentalXParallel.jl")
     for Tm in [ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict]
-        for N in [100,rand(30:200),500]
+        for N in [10000,20000]
             ExperimentalXParallel.test_correctness_update(N,Tm)
             ExperimentalXParallel.test_correctness_build(N,Tm)
             ExperimentalXParallel.test_correctness_mul(N,Tm)

From d796b787886ce76f37477c8aeb0ecad9a8afce64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Tue, 18 Jun 2024 09:57:00 +0200
Subject: [PATCH 34/44] move all experimental code to experimental subdir

---
 docs/make.jl                                           |  1 +
 src/experimental/Experimental.jl                       | 10 +++++-----
 .../ExtendableSparseParallel.jl                        |  0
 .../ilu_Al-Kurdi_Mittal.jl                             |  0
 .../ExtendableSparseMatrixParallel}/iluam.jl           |  0
 .../pilu_Al-Kurdi_Mittal.jl                            |  0
 .../ExtendableSparseMatrixParallel}/piluam.jl          |  0
 .../ExtendableSparseMatrixParallel/preparatory.jl      |  0
 .../ExtendableSparseMatrixParallel/struct_flush.jl     |  0
 .../ExtendableSparseMatrixParallel/supersparse.jl      |  0
 10 files changed, 6 insertions(+), 5 deletions(-)
 rename src/{matrix => experimental}/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl (100%)
 rename src/{factorizations => experimental/ExtendableSparseMatrixParallel}/ilu_Al-Kurdi_Mittal.jl (100%)
 rename src/{factorizations => experimental/ExtendableSparseMatrixParallel}/iluam.jl (100%)
 rename src/{factorizations => experimental/ExtendableSparseMatrixParallel}/pilu_Al-Kurdi_Mittal.jl (100%)
 rename src/{factorizations => experimental/ExtendableSparseMatrixParallel}/piluam.jl (100%)
 rename src/{matrix => experimental}/ExtendableSparseMatrixParallel/preparatory.jl (100%)
 rename src/{matrix => experimental}/ExtendableSparseMatrixParallel/struct_flush.jl (100%)
 rename src/{matrix => experimental}/ExtendableSparseMatrixParallel/supersparse.jl (100%)

diff --git a/docs/make.jl b/docs/make.jl
index 3a70bf5..5dbe7c4 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -5,6 +5,7 @@ function mkdocs()
     makedocs(; sitename = "ExtendableSparse.jl",
              modules = [ExtendableSparse],
              doctest = false,
+             warnonly = true,
              clean = false,
              authors = "J. Fuhrmann",
              repo = "https://github.com/j-fu/ExtendableSparse.jl",
diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
index 3c45c6d..546810e 100644
--- a/src/experimental/Experimental.jl
+++ b/src/experimental/Experimental.jl
@@ -13,15 +13,15 @@ using OhMyThreads: @tasks
 import ExtendableSparse: factorize!, update!
 
 
-include(joinpath(@__DIR__, "..", "matrix", "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl"))
+include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl"))
 
-include(joinpath(@__DIR__, "..", "factorizations","ilu_Al-Kurdi_Mittal.jl"))
+include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "ilu_Al-Kurdi_Mittal.jl"))
 #using .ILUAM
-include(joinpath(@__DIR__, "..", "factorizations","pilu_Al-Kurdi_Mittal.jl"))
+include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "pilu_Al-Kurdi_Mittal.jl"))
 #using .PILUAM
 
-include(joinpath(@__DIR__, "..", "factorizations","iluam.jl"))
-include(joinpath(@__DIR__, "..", "factorizations","piluam.jl"))
+include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel" ,"iluam.jl"))
+include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "piluam.jl"))
 
 @eval begin
     @makefrommatrix ILUAMPreconditioner
diff --git a/src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/experimental/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
similarity index 100%
rename from src/matrix/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
rename to src/experimental/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
diff --git a/src/factorizations/ilu_Al-Kurdi_Mittal.jl b/src/experimental/ExtendableSparseMatrixParallel/ilu_Al-Kurdi_Mittal.jl
similarity index 100%
rename from src/factorizations/ilu_Al-Kurdi_Mittal.jl
rename to src/experimental/ExtendableSparseMatrixParallel/ilu_Al-Kurdi_Mittal.jl
diff --git a/src/factorizations/iluam.jl b/src/experimental/ExtendableSparseMatrixParallel/iluam.jl
similarity index 100%
rename from src/factorizations/iluam.jl
rename to src/experimental/ExtendableSparseMatrixParallel/iluam.jl
diff --git a/src/factorizations/pilu_Al-Kurdi_Mittal.jl b/src/experimental/ExtendableSparseMatrixParallel/pilu_Al-Kurdi_Mittal.jl
similarity index 100%
rename from src/factorizations/pilu_Al-Kurdi_Mittal.jl
rename to src/experimental/ExtendableSparseMatrixParallel/pilu_Al-Kurdi_Mittal.jl
diff --git a/src/factorizations/piluam.jl b/src/experimental/ExtendableSparseMatrixParallel/piluam.jl
similarity index 100%
rename from src/factorizations/piluam.jl
rename to src/experimental/ExtendableSparseMatrixParallel/piluam.jl
diff --git a/src/matrix/ExtendableSparseMatrixParallel/preparatory.jl b/src/experimental/ExtendableSparseMatrixParallel/preparatory.jl
similarity index 100%
rename from src/matrix/ExtendableSparseMatrixParallel/preparatory.jl
rename to src/experimental/ExtendableSparseMatrixParallel/preparatory.jl
diff --git a/src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl b/src/experimental/ExtendableSparseMatrixParallel/struct_flush.jl
similarity index 100%
rename from src/matrix/ExtendableSparseMatrixParallel/struct_flush.jl
rename to src/experimental/ExtendableSparseMatrixParallel/struct_flush.jl
diff --git a/src/matrix/ExtendableSparseMatrixParallel/supersparse.jl b/src/experimental/ExtendableSparseMatrixParallel/supersparse.jl
similarity index 100%
rename from src/matrix/ExtendableSparseMatrixParallel/supersparse.jl
rename to src/experimental/ExtendableSparseMatrixParallel/supersparse.jl

From 6cd48cdc64c5b279b5808229436cbca063a72aa3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Wed, 19 Jun 2024 12:52:28 +0200
Subject: [PATCH 35/44] tweak tests

---
 src/experimental/extendablesparsematrixparallel.jl |  6 ++----
 src/experimental/sparsematrixlnkx.jl               |  1 -
 test/ExperimentalXParallel.jl                      |  9 +++++----
 test/runtests.jl                                   | 12 ++++++------
 4 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl
index 5325e76..ba70a65 100644
--- a/src/experimental/extendablesparsematrixparallel.jl
+++ b/src/experimental/extendablesparsematrixparallel.jl
@@ -113,16 +113,14 @@ function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x)
     flush!(ext)
     A=ext.cscmatrix
     colparts=ext.colparts
-    @show colparts
     partnodes=ext.partnodes
-    @show partnodes
     rows = SparseArrays.rowvals(A)
     vals = nonzeros(A)
     r.=zero(eltype(ext))
     m,n=size(A)
     for icol=1:length(colparts)-1
-        @tasks for ip=colparts[icol]:colparts[icol+1]-1
-            for inode in  partnodes[ip]:partnodes[ip+1]-1
+        @tasks for ip in colparts[icol]:colparts[icol+1]-1
+            @inbounds for inode in  partnodes[ip]:partnodes[ip+1]-1
                 @inbounds for i in nzrange(A,inode)
                     r[rows[i]]+=vals[i]*x[inode]
                 end
diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl
index d445728..bd2cdcf 100644
--- a/src/experimental/sparsematrixlnkx.jl
+++ b/src/experimental/sparsematrixlnkx.jl
@@ -400,7 +400,6 @@ function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscmatrix::S
         for lnk in lnkdictmatrices
             for j=1:n
                 k=lnk.colstart[j]
-                nodeparts[j]=ip
                 while k>0
                     I[i]=lnk.rowval[k]
                     J[i]=j
diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl
index b87b71a..e6322f4 100644
--- a/test/ExperimentalXParallel.jl
+++ b/test/ExperimentalXParallel.jl
@@ -199,7 +199,7 @@ function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15
         pgrid=partition(grid,PlainMetisPartitioning(npart=np))
         A=Tm(nnodes,nnodes, num_partitions(pgrid))
         @show num_partitions_per_color(pgrid)
-        @test check_partitioning(pgrid)
+        @test check_partitioning(pgrid, cellpartonly=true)
         testassemble_parallel!(A,pgrid)
         @test sort(nonzeros(A)) ≈ nz
     end
@@ -215,13 +215,14 @@ function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,2
     A0b=A0*b
     for np in allnp
         pgrid=partition(grid,PlainMetisPartitioning(npart=np))
-        @test check_partitioning(pgrid)
+        @test check_partitioning(pgrid, cellpartonly=false)
         A=Tm(nnodes,nnodes, num_partitions(pgrid))
         ExtendableSparse.Experimental.partitioning!(A,pgrid[PColorPartitions], pgrid[PartitionNodes])
         testassemble_parallel!(A,pgrid)
         invp=invperm(pgrid[NodePermutation])
-        @show norm(A0b[invp] - A*b[invp], Inf)
-        @test A0b[invp] ≈ A*b[invp]
+        diff=norm(A0b[invp] - A*b[invp], Inf)
+        @show diff
+        @test diff<sqrt(eps())
     end    
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 25d5d4c..856b8b2 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -12,17 +12,17 @@ using ForwardDiff
 
 @testset "ExperimentalXParallel" begin
     include("ExperimentalXParallel.jl")
-    for Tm in [ExtendableSparseMatrixLNK,ExtendableSparseMatrixDict,ExtendableSparseMatrixLNKDict]
+    for Tm in [ExtendableSparseMatrixLNK,ExtendableSparseMatrixLNKX,ExtendableSparseMatrixDict,ExtendableSparseMatrixLNKDict]
         for N in [10000,20000]
-            ExperimentalXParallel.test_correctness_build_seq(N,Tm)
+            ExperimentalXParallel.test_correctness_build_seq(N,Tm, dim=2)
         end
     end
 
     for Tm in [ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict]
         for N in [10000,20000]
-            ExperimentalXParallel.test_correctness_update(N,Tm)
-            ExperimentalXParallel.test_correctness_build(N,Tm)
-            ExperimentalXParallel.test_correctness_mul(N,Tm)
+            ExperimentalXParallel.test_correctness_update(N,Tm, dim=2)
+            ExperimentalXParallel.test_correctness_build(N,Tm, dim=2)
+            ExperimentalXParallel.test_correctness_mul(N,Tm,dim=2)
         end
     end
 end
@@ -30,7 +30,7 @@ end
 @testset "ExperimentalParallel" begin
     include("ExperimentalParallel.jl")
     for d=[1,2,3]
-        for N in [100,rand(30:200),500]
+        for N in [10,rand(30:40),50]
             ExperimentalParallel.test_correctness_build(N,d)
         end
     end

From c6d9f09a55e51ebcf9101ae0bf1e054ecbe0acca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 24 Jun 2024 00:18:10 +0200
Subject: [PATCH 36/44] untweak tests, use partitioning correction

---
 .JuliaFormatter.toml                    |   7 +-
 src/experimental/sparsematrixlnkdict.jl |  11 +-
 test/ExperimentalXParallel.jl           | 397 ++++++++++++------------
 3 files changed, 219 insertions(+), 196 deletions(-)

diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
index 02aa07d..5458760 100644
--- a/.JuliaFormatter.toml
+++ b/.JuliaFormatter.toml
@@ -1,4 +1,7 @@
-style = "sciml"
+style = "yas"
 always_for_in = false
-separate_kwargs_with_semicolon = true
 format_markdown = true
+import_to_using = false
+pipe_to_function_call = false
+short_to_long_function_def = false
+always_use_return = false
\ No newline at end of file
diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl
index bcc67f4..6ae7a6c 100644
--- a/src/experimental/sparsematrixlnkdict.jl
+++ b/src/experimental/sparsematrixlnkdict.jl
@@ -216,7 +216,7 @@ It assumes that `op(0,0)==0`. If `v` is zero a new entry
 is created nevertheless.
 """
 function rawupdateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti}
-    k, k0 = findindex(lnk, i, j)
+@time    k, k0 = findindex(lnk, i, j)
     if k > 0
         lnk.nzval[k] = op(lnk.nzval[k], v)
     else
@@ -423,6 +423,15 @@ function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cscmatrix
     return cscmatrix
 end
         
+function reset!(m::SparseMatrixLNKDict{Tv,Ti}) where {Tv,Ti}
+    m.nnz=0
+    m.nentries=0
+    m.colptr=zeros(Ti,10)
+    m.colstart::Dict{Ti,Ti}
+    m.rowval=zeros(Ti,10)
+    m.nzval=zeros(Ti,10)
+    m
+end
 
 
 """
diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl
index e6322f4..03bf2ad 100644
--- a/test/ExperimentalXParallel.jl
+++ b/test/ExperimentalXParallel.jl
@@ -1,6 +1,6 @@
 module ExperimentalXParallel
 
-using ExtendableSparse,SparseArrays, ExtendableSparse.Experimental
+using ExtendableSparse, SparseArrays, ExtendableSparse.Experimental
 using BenchmarkTools
 using ExtendableGrids
 #using MKLSparse
@@ -15,107 +15,105 @@ using OhMyThreads: @tasks
 using RecursiveFactorization
 
 function testgrid(N; dim=3)
-    X=range(0,1,length=N^(1.0/dim)|> ceil |> Int)
-    simplexgrid((X for i=1:dim)...)
+    X = range(0, 1; length=N^(1.0 / dim) |> ceil |> Int)
+    simplexgrid((X for i = 1:dim)...)
 end
 
-
-function coordmatrix!(C,coord, cellnodes,k)
+function coordmatrix!(C, coord, cellnodes, k)
     spacedim=size(coord,1)
     celldim=size(cellnodes,1)
-    @inbounds for jj=1:celldim
-        C[1,jj]=1
-        @inbounds for ii=1:spacedim
-            C[ii+1,jj]=coord[ii,cellnodes[jj,k]]
+    @inbounds for jj = 1:celldim
+        C[1, jj] = 1
+        @inbounds for ii = 1:spacedim
+            C[ii + 1, jj] = coord[ii, cellnodes[jj, k]]
         end
     end
 end
 
-function gradient!(G,C,factdim,I,ipiv)
-    clu=RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false))
-    vol=abs(det(clu))/factdim
-    ldiv!(G,clu,I)
-    return vol
+function gradient!(G, C, factdim, I, ipiv)
+    clu = RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false))
+    ldiv!(G, clu, I)
+    abs(det(clu)) / factdim
 end
 
-function scalpro(G,dim,jl,il)
-    s=0.0
-    @inbounds @simd for k=1:dim
-        s+=G[jl,k+1]*G[il,k+1]
+function scalpro(G, dim, jl, il)
+    s = 0.0
+    @inbounds @simd for k = 1:dim
+        s += G[jl, k + 1] * G[il, k + 1]
     end
     return s
 end
 
-function stiffness!(S,dim,G)
-    @inbounds for il=1:dim+1
-        S[il,il]=scalpro(G,dim,il,il)
-        @inbounds for jl=il+1:dim+1
-            S[il,jl]=scalpro(G,dim,jl,il)
-            S[jl,il]=S[il,jl]
+function stiffness!(S, dim, G)
+    @inbounds for il = 1:(dim + 1)
+        S[il, il] = scalpro(G, dim, il, il)
+        @inbounds for jl = (il + 1):(dim + 1)
+            S[il, jl] = scalpro(G, dim, jl, il)
+            S[jl, il] = S[il, jl]
         end
     end
     return S
 end
 
-function testassemble!(A_h,grid)
-    coord=grid[Coordinates]
-    cellnodes=grid[CellNodes]
-    ncells=num_cells(grid)
-    dim=size(coord,1)
-    lnodes=dim+1
-    factdim::Float64=factorial(dim)
-    S=zeros(lnodes, lnodes) # local stiffness matrix
-    C=zeros(lnodes,lnodes)  # local coordinate matrix
-    G=zeros(lnodes, lnodes) # shape function gradients
-    ipiv=zeros(Int,lnodes)
-    I=Matrix(Diagonal(ones(lnodes)))
-    ncells=size(cellnodes,2)
-    for icell=1:ncells
-        coordmatrix!(C,coord,cellnodes,icell)
-        vol=gradient!(G,C,factdim,I,ipiv)
-        stiffness!(S,dim,G)
-        for il=1:lnodes
-            i=cellnodes[il,icell]
-            rawupdateindex!(A_h,+,0.1*vol/(dim+1),i,i)
-            for jl=1:lnodes
-                j=cellnodes[jl,icell]
-                rawupdateindex!(A_h,+,vol*(S[il,jl]),i,j)
+function testassemble!(A_h, grid)
+    coord = grid[Coordinates]
+    cellnodes = grid[CellNodes]
+    ncells = num_cells(grid)
+    dim = size(coord, 1)
+    lnodes = dim + 1
+    factdim::Float64 = factorial(dim)
+    S = zeros(lnodes, lnodes) # local stiffness matrix
+    C = zeros(lnodes, lnodes)  # local coordinate matrix
+    G = zeros(lnodes, lnodes) # shape function gradients
+    ipiv = zeros(Int, lnodes)
+    I = Matrix(Diagonal(ones(lnodes)))
+    ncells = size(cellnodes, 2)
+    for icell = 1:ncells
+        coordmatrix!(C, coord, cellnodes, icell)
+        vol = gradient!(G, C, factdim, I, ipiv)
+        stiffness!(S, dim, G)
+        for il = 1:lnodes
+            i = cellnodes[il, icell]
+            rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i)
+            for jl = 1:lnodes
+                j = cellnodes[jl, icell]
+                rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j)
             end
         end
     end
     flush!(A_h)
 end
 
-function testassemble_parallel!(A_h,grid)
-    coord=grid[Coordinates]
-    cellnodes=grid[CellNodes]
-    ncells=num_cells(grid)
-    dim=size(coord,1)
-    lnodes=dim+1
-    npart=num_partitions(grid)
-    factdim::Float64=factorial(dim)
-    SS=[zeros(lnodes, lnodes) for i=1:npart] # local stiffness matrix
-    CC=[zeros(lnodes, lnodes) for i=1:npart] # local coordinate matrix
-    GG=[zeros(lnodes, lnodes) for i=1:npart] # shape function gradients
-    IP=[zeros(Int,lnodes) for i=1:npart] # shape function gradients
-    I=Matrix(Diagonal(ones(lnodes)))
-    ncells=size(cellnodes,2)
+function testassemble_parallel!(A_h, grid)
+    coord = grid[Coordinates]
+    cellnodes = grid[CellNodes]
+    ncells = num_cells(grid)
+    dim = size(coord, 1)
+    lnodes = dim + 1
+    npart = num_partitions(grid)
+    factdim::Float64 = factorial(dim)
+    SS = [zeros(lnodes, lnodes) for i = 1:npart] # local stiffness matrix
+    CC = [zeros(lnodes, lnodes) for i = 1:npart] # local coordinate matrix
+    GG = [zeros(lnodes, lnodes) for i = 1:npart] # shape function gradients
+    IP = [zeros(Int, lnodes) for i = 1:npart] # shape function gradients
+    I = Matrix(Diagonal(ones(lnodes)))
+    ncells = size(cellnodes, 2)
     for color in pcolors(grid)
-        @tasks for part in pcolor_partitions(grid,color)
-            C=CC[part]
-            S=SS[part]
-            G=GG[part]
-            ipiv=IP[part]
+        @tasks for part in pcolor_partitions(grid, color)
+            C = CC[part]
+            S = SS[part]
+            G = GG[part]
+            ipiv = IP[part]
             for icell in partition_cells(grid, part)
-                coordmatrix!(C,coord,cellnodes,icell)
-                vol=gradient!(G,C,factdim,I,ipiv)
-                stiffness!(S,dim,G)
-                for il=1:lnodes
-                    i=cellnodes[il,icell]
-                    rawupdateindex!(A_h,+,0.1*vol/(dim+1),i,i, part)
-                    for jl=1:lnodes
-                        j=cellnodes[jl,icell]
-                        rawupdateindex!(A_h,+,vol*(S[il,jl]),i,j, part)
+                coordmatrix!(C, coord, cellnodes, icell)
+                vol = gradient!(G, C, factdim, I, ipiv)
+                stiffness!(S, dim, G)
+                for il = 1:lnodes
+                    i = cellnodes[il, icell]
+                    rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i, part)
+                    for jl = 1:lnodes
+                        j = cellnodes[jl, icell]
+                        rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j, part)
                     end
                 end
             end
@@ -124,59 +122,50 @@ function testassemble_parallel!(A_h,grid)
     flush!(A_h)
 end
 
-
-
-function testassemble(grid)
-    nnodes=num_nodes(grid)
-    A_h=ExtendableSparseMatrix(nnodes,nnodes)
-    testassemble!(A_h,grid)
-    A_h.cscmatrix.nzval.=0
-    testassemble!(A_h,grid)    
+function test_correctness_build_seq(N, Tm::Type{<:AbstractSparseMatrix}; dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    A = Tm{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    testassemble!(A, grid)
+    @test sparse(A0) ≈ sparse(A)
 end
 
-
-function test_correctness_build_seq(N,Tm::Type{<:AbstractSparseMatrix}; dim=3)
-    grid=testgrid(N;dim)
-    nnodes=num_nodes(grid)
-    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
-    A=Tm{Float64,Int}(nnodes,nnodes)
-    testassemble!(A0,grid)
-    testassemble!(A,grid)
-    @test sparse(A0)≈sparse(A)
+function speedup_build_seq(N, Tm::Type{<:AbstractSparseMatrix}; dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    A = Tm{Float64,Int}(nnodes, nnodes)
+    tbase = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (reset!($A0))
+    tx = @belapsed testassemble!($A, $grid) seconds = 1 setup = (reset!($A))
+    tbase / tx
 end
 
-function speed_build_seq(N,Tm::Type{<:AbstractSparseMatrix}; dim=3)
-    grid=testgrid(N;dim)
-    nnodes=num_nodes(grid)
-    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
-    A=Tm{Float64,Int}(nnodes,nnodes)
-    tbase= @belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0))
-    tx= @belapsed testassemble!($A,$grid) seconds=1 setup=(reset!($A))
-    tbase/tx
-end
-
-
-function test_correctness_update(N,Tm::Type{<:AbstractSparseMatrix};  allnp=[10,15,20], dim=3)
-    grid=testgrid(N;dim)
-    nnodes=num_nodes(grid)
-    A=Tm{Float64,Int}(nnodes,nnodes,1)
-   
+function test_correctness_update(N,
+                                 Tm::Type{<:AbstractSparseMatrix};
+                                 Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                                 allnp=[10, 15, 20],
+                                 dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
+    A = Tm{Float64,Int}(nnodes, nnodes, 1)
 
     # Assembele without partitioning
     # this gives the "base truth" to compare with
-    testassemble_parallel!(A,grid)
+    testassemble_parallel!(A, grid)
 
     # Save the nonzeros 
-    nz=sort(copy(nonzeros(A)))
+    nz = sort(copy(nonzeros(A)))
     for np in allnp
         # Reset the nonzeros, keeping the structure intact
-        nonzeros(A).=0
+        nonzeros(A) .= 0
         # Parallel assembly whith np threads
-        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
-        reset!(A,np)
+        pgrid = partition(grid, Tp(; npart=np))
+        reset!(A, np)
         @show num_partitions_per_color(pgrid)
-        testassemble_parallel!(A,pgrid)
-        @test sort(nonzeros(A))≈nz
+        testassemble_parallel!(A, pgrid)
+        @test sort(nonzeros(A)) ≈ nz
     end
 end
 
@@ -186,124 +175,148 @@ end
 Test correctness of parallel assembly on NxN grid  during 
 build phase, assuming that no structure has been assembled.
 """
-function test_correctness_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3)
-    grid=testgrid(N;dim)
-    nnodes=num_nodes(grid)
+function test_correctness_build(N,
+                                Tm::Type{<:AbstractSparseMatrix};
+                                Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                                allnp=[10, 15, 20],
+                                dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
     # Get the "ground truth"
-    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
-    testassemble!(A0,grid)
-    nz=sort(copy(nonzeros(A0)))
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    nz = sort(copy(nonzeros(A0)))
     for np in allnp
         # Make a new matrix and assemble parallel.
         # this should result in the same nonzeros
-        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
-        A=Tm(nnodes,nnodes, num_partitions(pgrid))
+        pgrid = partition(grid, Tp(; npart=np))
+        A = Tm(nnodes, nnodes, num_partitions(pgrid))
         @show num_partitions_per_color(pgrid)
         @test check_partitioning(pgrid, cellpartonly=true)
-        testassemble_parallel!(A,pgrid)
+        testassemble_parallel!(A, pgrid)
         @test sort(nonzeros(A)) ≈ nz
     end
 end
 
-function test_correctness_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3)
-    grid=testgrid(N;dim)
-    nnodes=num_nodes(grid)
+function test_correctness_mul(N,
+                              Tm::Type{<:AbstractSparseMatrix};
+                              Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                              allnp=[10, 15, 20],
+                              dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
     # Get the "ground truth"
-    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
-    testassemble!(A0,grid)
-    b=rand(nnodes)
-    A0b=A0*b
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    b = rand(nnodes)
+    A0b = A0 * b
     for np in allnp
-        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
+        pgrid = partition(grid, Tp(; npart=np))
         @test check_partitioning(pgrid, cellpartonly=false)
-        A=Tm(nnodes,nnodes, num_partitions(pgrid))
-        ExtendableSparse.Experimental.partitioning!(A,pgrid[PColorPartitions], pgrid[PartitionNodes])
-        testassemble_parallel!(A,pgrid)
-        invp=invperm(pgrid[NodePermutation])
-        diff=norm(A0b[invp] - A*b[invp], Inf)
+        A = Tm(nnodes, nnodes, num_partitions(pgrid))
+        ExtendableSparse.Experimental.partitioning!(A, pgrid[PColorPartitions],
+                                                    pgrid[PartitionNodes])
+        testassemble_parallel!(A, pgrid)
+        invp = invperm(pgrid[NodePermutation])
+        diff = norm(A0b[invp] - A * b[invp], Inf)
         @show diff
-        @test diff<sqrt(eps())
-    end    
+        @test diff < sqrt(eps())
+    end
 end
 
-function speedup_update(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3)
-    grid=testgrid(N;dim)
-    nnodes=num_nodes(grid)
+function speedup_update(N,
+                        Tm::Type{<:AbstractSparseMatrix};
+                        Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                        allnp=[10, 15, 20],
+                        dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
     # Get the "ground truth"
-    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
-    testassemble!(A0,grid)
-    nz=copy(nonzeros(A0)) |>sort
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    nz = copy(nonzeros(A0)) |> sort
     # Get the base timing
     # During setup, set matrix entries to zero while keeping  the structure 
-    t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(nonzeros($A0).=0)
-    result=[]
-    A=Tm(nnodes,nnodes,1)
+    t0 = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (nonzeros($A0) .= 0)
+    result = []
+    A = Tm(nnodes, nnodes, 1)
     for np in allnp
         # Get the parallel timing
         # During setup, set matrix entries to zero while keeping  the structure
-        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
+        pgrid = partition(grid, Tp(; npart=np))
         @show num_partitions_per_color(pgrid)
-        reset!(A,num_partitions(pgrid))
-        testassemble_parallel!(A,pgrid)
-        t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(nonzeros($A).=0)
-        @assert sort(nonzeros(A))≈nz
-        push!(result,(np,round(t0/t,digits=2)))
+        reset!(A, num_partitions(pgrid))
+        testassemble_parallel!(A, pgrid)
+        t = @belapsed testassemble_parallel!($A, $pgrid) seconds = 1 setup = (nonzeros($A) .= 0)
+        @assert sort(nonzeros(A)) ≈ nz
+        push!(result, (np, round(t0 / t; digits=2)))
     end
     result
 end
 
-function speedup_build(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3)
-    grid=testgrid(N;dim)
-    nnodes=num_nodes(grid)
+function speedup_build(N,
+                       Tm::Type{<:AbstractSparseMatrix};
+                       Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                       allnp=[10, 15, 20],
+                       dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
     # Get the "ground truth"
-    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
-    testassemble!(A0,grid)
-    nz=nonzeros(A0)
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    nz = nonzeros(A0)
     reset!(A0)
-    testassemble!(A0,grid)
-    @assert nonzeros(A0)≈(nz)
-    nz=sort(nz)
+    testassemble!(A0, grid)
+    @assert nonzeros(A0) ≈ (nz)
+    nz = sort(nz)
+
     # Get the base timing
     # During setup, reset matrix to empty state.
-    t0=@belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0))
-    
-    result=[]
-    A=Tm(nnodes,nnodes,1)
+    t0 = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (reset!($A0))
+
+    result = []
+    A = Tm(nnodes, nnodes, 1)
     for np in allnp
         # Get the parallel timing
         # During setup, reset matrix to empty state.
-        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
-        reset!(A,num_partitions(pgrid))
+        pgrid = partition(grid, Tp(; npart=np))
+        reset!(A, num_partitions(pgrid))
         @show num_partitions_per_color(pgrid)
-        t=@belapsed testassemble_parallel!($A,$pgrid) seconds=1 setup=(reset!($A,num_partitions($pgrid)))
-        @assert sort(nonzeros(A))≈nz
-        push!(result,(np,round(t0/t,digits=2)))
+        t = @belapsed testassemble_parallel!($A, $pgrid) seconds = 1 setup = (reset!($A,
+                                                                                     num_partitions($pgrid)))
+        @assert sort(nonzeros(A)) ≈ nz
+        push!(result, (np, round(t0 / t; digits=2)))
     end
     result
 end
 
-function speedup_mul(N,Tm::Type{<:AbstractSparseMatrix}; allnp=[10,15,20], dim=3)
-    grid=testgrid(N;dim)
-    nnodes=num_nodes(grid)
+function speedup_mul(N,
+                     Tm::Type{<:AbstractSparseMatrix};
+                     Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                     allnp=[10, 15, 20],
+                     dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
     # Get the "ground truth"
-    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
-    testassemble!(A0,grid)
-    b=rand(nnodes)
-    t0=@belapsed $A0*$b seconds=1
-    A0b=A0*b
-    result=[]
-    A=Tm(nnodes,nnodes,1)
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    b = rand(nnodes)
+    t0 = @belapsed $A0 * $b seconds = 1
+    A0b = A0 * b
+    result = []
+    A = Tm(nnodes, nnodes, 1)
     for np in allnp
-        pgrid=partition(grid,PlainMetisPartitioning(npart=np))
+        pgrid = partition(grid, Tp(; npart=np))
         @show num_partitions_per_color(pgrid)
-        reset!(A,num_partitions(pgrid))
-        testassemble_parallel!(A,pgrid)
+        reset!(A, num_partitions(pgrid))
+        testassemble_parallel!(A, pgrid)
         flush!(A)
-        ExtendableSparse.Experimental.partitioning!(A,pgrid[PColorPartitions], pgrid[PartitionNodes])
-        t=@belapsed $A*$b seconds=1
-        invp=invperm(pgrid[NodePermutation])
-        @assert A0b[invp] ≈ A*b[invp]
-        push!(result,(np,round(t0/t,digits=2)))
+        ExtendableSparse.Experimental.partitioning!(A, pgrid[PColorPartitions],
+                                                    pgrid[PartitionNodes])
+        t = @belapsed $A * $b seconds = 1
+        invp = invperm(pgrid[NodePermutation])
+        @assert A0b[invp] ≈ A * b[invp]
+        push!(result, (np, round(t0 / t; digits=2)))
     end
     result
 end
@@ -336,7 +349,6 @@ function speedup_csrmul(N; dim=3)
     t0=@belapsed $A0*$b seconds=1
     A0b=A0*b
 
-
     t0x=@belapsed  A0x=sparse(transpose(sparse($A0)))
 
     A0x=sparse(transpose(sparse(A0)))
@@ -349,7 +361,7 @@ function speedup_csrmul(N; dim=3)
     t2=@belapsed mymul($A, $b) seconds=1
 
     @info t00,t0,t0x, tx,t1, t2
-    
+
     @assert A0b≈A*b
     t0/t1
 end
@@ -357,4 +369,3 @@ end
 =#
 
 end
-

From dc216a35ee6691a0990c9fb75ebcfacf192a43ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 24 Jun 2024 20:30:04 +0200
Subject: [PATCH 37/44] update compat for ExtendableGrids

---
 src/experimental/sparsematrixlnkdict.jl | 2 +-
 test/ExperimentalXParallel.jl           | 4 ++--
 test/Project.toml                       | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl
index 6ae7a6c..1d3d8e4 100644
--- a/src/experimental/sparsematrixlnkdict.jl
+++ b/src/experimental/sparsematrixlnkdict.jl
@@ -216,7 +216,7 @@ It assumes that `op(0,0)==0`. If `v` is zero a new entry
 is created nevertheless.
 """
 function rawupdateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti}
-@time    k, k0 = findindex(lnk, i, j)
+    k, k0 = findindex(lnk, i, j)
     if k > 0
         lnk.nzval[k] = op(lnk.nzval[k], v)
     else
diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl
index 03bf2ad..a3744eb 100644
--- a/test/ExperimentalXParallel.jl
+++ b/test/ExperimentalXParallel.jl
@@ -192,7 +192,7 @@ function test_correctness_build(N,
         pgrid = partition(grid, Tp(; npart=np))
         A = Tm(nnodes, nnodes, num_partitions(pgrid))
         @show num_partitions_per_color(pgrid)
-        @test check_partitioning(pgrid, cellpartonly=true)
+        @test check_partitioning(pgrid)
         testassemble_parallel!(A, pgrid)
         @test sort(nonzeros(A)) ≈ nz
     end
@@ -212,7 +212,7 @@ function test_correctness_mul(N,
     A0b = A0 * b
     for np in allnp
         pgrid = partition(grid, Tp(; npart=np))
-        @test check_partitioning(pgrid, cellpartonly=false)
+        @test check_partitioning(pgrid)
         A = Tm(nnodes, nnodes, num_partitions(pgrid))
         ExtendableSparse.Experimental.partitioning!(A, pgrid[PColorPartitions],
                                                     pgrid[PartitionNodes])
diff --git a/test/Project.toml b/test/Project.toml
index 24a28c6..97f6793 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -21,5 +21,5 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [compat]
-ExtendableGrids = "1.7"
+ExtendableGrids = "1.8"
 IterativeSolvers = "0.9"

From 18bc37f3ddc15a2bda501c491f7d0a7c4ec0dfc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 24 Jun 2024 21:52:56 +0200
Subject: [PATCH 38/44] moving things away from experimental

---
 src/ExtendableSparse.jl                       |  14 +-
 src/experimental/Experimental.jl              |  14 +-
 .../extendablesparsematrixparallel.jl         |   2 +-
 .../extendablesparsematrixscalar.jl           |   2 +-
 ...l => abstractextendablesparsematrixcsc.jl} |  70 +--
 src/matrix/abstractextension.jl               |  28 --
 src/matrix/abstractsparsematrixextension.jl   |  28 ++
 src/matrix/extendable.jl                      | 112 ++---
 .../genericmtextendablesparsematrixcsc.jl     | 122 +++++
 src/matrix/sparsematrixdilnkc.jl              | 462 ++++++++++++++++++
 test/ExperimentalXParallel.jl                 | 116 +----
 test/femtools.jl                              | 110 +++++
 test/runtests.jl                              |   2 +-
 13 files changed, 837 insertions(+), 245 deletions(-)
 rename src/matrix/{abstractextendable.jl => abstractextendablesparsematrixcsc.jl} (77%)
 delete mode 100644 src/matrix/abstractextension.jl
 create mode 100644 src/matrix/abstractsparsematrixextension.jl
 create mode 100644 src/matrix/genericmtextendablesparsematrixcsc.jl
 create mode 100644 src/matrix/sparsematrixdilnkc.jl
 create mode 100644 test/femtools.jl

diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 0d7bd5c..d7a0cd5 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -22,12 +22,20 @@ using DocStringExtensions
 import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros
 
 include("matrix/sparsematrixcsc.jl")
-include("matrix/abstractextension.jl")
+include("matrix/abstractsparsematrixextension.jl")
 include("matrix/sparsematrixlnk.jl")
-include("matrix/abstractextendable.jl")
+include("matrix/sparsematrixdilnkc.jl")
+include("matrix/abstractextendablesparsematrixcsc.jl")
 include("matrix/extendable.jl")
+include("matrix/genericmtextendablesparsematrixcsc.jl")
 
-export SparseMatrixLNK, ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset!
+const ExtendableSparseMatrix=ExtendableSparseMatrixCSC
+const MTExtendableSparseMatrixCSC=GenericMTExtendableSparseMatrixCSC{SparseMatrixDILNKC}
+MTExtendableSparseMatrixCSC(m,n,args...)=MTExtendableSparseMatrixCSC{Float64,Int64}(m,n,args...)
+
+export ExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC,GenericMTExtendableSparseMatrixCSC
+export SparseMatrixLNK, ExtendableSparseMatrix,flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset!
+export partitioning!
 
 export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet
 
diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
index 546810e..dbf14cd 100644
--- a/src/experimental/Experimental.jl
+++ b/src/experimental/Experimental.jl
@@ -5,13 +5,12 @@ using SparseArrays: AbstractSparseMatrixCSC
 import SparseArrays: nonzeros, getcolptr,nzrange
 import ExtendableSparse: flush!, reset!, rawupdateindex!, findindex
 using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash
-using ExtendableSparse:  AbstractExtendableSparseMatrix, AbstractSparseMatrixExtension
+using ExtendableSparse:  AbstractExtendableSparseMatrixCSC, AbstractSparseMatrixExtension
 using DocStringExtensions
 using Metis
 using Base.Threads
 using OhMyThreads: @tasks
-import ExtendableSparse: factorize!, update!
-
+import ExtendableSparse: factorize!, update!, partitioning!
 
 include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl"))
 
@@ -66,16 +65,15 @@ const ExtendableSparseMatrixLNK{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrix
 export ExtendableSparseMatrixLNK
 
 
-include("extendablesparsematrixparallel.jl")
-const ExtendableSparseMatrixParallelDict{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixDict{Tv,Ti},Tv,Ti}
+const ExtendableSparseMatrixParallelDict{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixDict{Tv,Ti},Tv,Ti}
 ExtendableSparseMatrixParallelDict(m,n,p)= ExtendableSparseMatrixParallelDict{Float64,Int64}(m,n,p)
-export ExtendableSparseMatrixParallelDict, partcolors!
+export ExtendableSparseMatrixParallelDict
 
-const ExtendableSparseMatrixParallelLNKX{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixLNKX{Tv,Ti},Tv,Ti}
+const ExtendableSparseMatrixParallelLNKX{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixLNKX{Tv,Ti},Tv,Ti}
 ExtendableSparseMatrixParallelLNKX(m,n,p)= ExtendableSparseMatrixParallelLNKX{Float64,Int64}(m,n,p)
 export ExtendableSparseMatrixParallelLNKX
 
-const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=ExtendableSparseMatrixXParallel{SparseMatrixLNKDict{Tv,Ti},Tv,Ti}
+const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixLNKDict{Tv,Ti},Tv,Ti}
 ExtendableSparseMatrixParallelLNKDict(m,n,p)= ExtendableSparseMatrixParallelLNKDict{Float64,Int64}(m,n,p)
 export ExtendableSparseMatrixParallelLNKDict
 
diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl
index ba70a65..cd66f92 100644
--- a/src/experimental/extendablesparsematrixparallel.jl
+++ b/src/experimental/extendablesparsematrixparallel.jl
@@ -1,4 +1,4 @@
-mutable struct ExtendableSparseMatrixXParallel{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
+mutable struct ExtendableSparseMatrixXParallel{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti}
     """
     Final matrix data
     """
diff --git a/src/experimental/extendablesparsematrixscalar.jl b/src/experimental/extendablesparsematrixscalar.jl
index 887d275..36c1dfa 100644
--- a/src/experimental/extendablesparsematrixscalar.jl
+++ b/src/experimental/extendablesparsematrixscalar.jl
@@ -1,4 +1,4 @@
-mutable struct ExtendableSparseMatrixScalar{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
+mutable struct ExtendableSparseMatrixScalar{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti}
     """
     Final matrix data
     """
diff --git a/src/matrix/abstractextendable.jl b/src/matrix/abstractextendablesparsematrixcsc.jl
similarity index 77%
rename from src/matrix/abstractextendable.jl
rename to src/matrix/abstractextendablesparsematrixcsc.jl
index dae94bb..bcf3a6a 100644
--- a/src/matrix/abstractextendable.jl
+++ b/src/matrix/abstractextendablesparsematrixcsc.jl
@@ -7,23 +7,23 @@ rawupdateindex!
 reset!: empty all internals, just keep size 
 """
 
-abstract type AbstractExtendableSparseMatrix{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end
+abstract type AbstractExtendableSparseMatrixCSC{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end
 
 """
 $(SIGNATURES)
 
 [`flush!`](@ref) and return number of nonzeros in ext.cscmatrix.
 """
-SparseArrays.nnz(ext::AbstractExtendableSparseMatrix)=nnz(sparse(ext))
+SparseArrays.nnz(ext::AbstractExtendableSparseMatrixCSC)=nnz(sparse(ext))
 
 """
 $(SIGNATURES)
 
 [`flush!`](@ref) and return nonzeros in ext.cscmatrix.
 """
-SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrix)=nonzeros(sparse(ext))
+SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrixCSC)=nonzeros(sparse(ext))
 
-Base.size(ext::AbstractExtendableSparseMatrix)=size(ext.cscmatrix)
+Base.size(ext::AbstractExtendableSparseMatrixCSC)=size(ext.cscmatrix)
 
 
 
@@ -32,7 +32,7 @@ $(SIGNATURES)
 
 Return element type.
 """
-Base.eltype(::AbstractExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti} = Tv
+Base.eltype(::AbstractExtendableSparseMatrixCSC{Tv, Ti}) where {Tv, Ti} = Tv
 
 
 
@@ -41,12 +41,12 @@ $(SIGNATURES)
 
  Create SparseMatrixCSC from ExtendableSparseMatrix
 """
-SparseArrays.SparseMatrixCSC(A::AbstractExtendableSparseMatrix)=sparse(A)
+SparseArrays.SparseMatrixCSC(A::AbstractExtendableSparseMatrixCSC)=sparse(A)
 
 
 
 
-function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrix)
+function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrixCSC)
     A=sparse(ext)
     xnnz = nnz(A)
     m, n = size(A)
@@ -77,7 +77,7 @@ $(SIGNATURES)
 
 [`flush!`](@ref) and return rowvals in ext.cscmatrix.
 """
-SparseArrays.rowvals(ext::AbstractExtendableSparseMatrix)=rowvals(sparse(ext))
+SparseArrays.rowvals(ext::AbstractExtendableSparseMatrixCSC)=rowvals(sparse(ext))
 
 
 """
@@ -85,7 +85,7 @@ $(SIGNATURES)
 
 [`flush!`](@ref) and return colptr of  in ext.cscmatrix.
 """
-SparseArrays.getcolptr(ext::AbstractExtendableSparseMatrix)=getcolptr(sparse(ext))
+SparseArrays.getcolptr(ext::AbstractExtendableSparseMatrixCSC)=getcolptr(sparse(ext))
 
     
 """
@@ -93,11 +93,11 @@ $(SIGNATURES)
 
 [`flush!`](@ref) and return findnz(ext.cscmatrix).
 """
-SparseArrays.findnz(ext::AbstractExtendableSparseMatrix)=findnz(sparse(ext))
+SparseArrays.findnz(ext::AbstractExtendableSparseMatrixCSC)=findnz(sparse(ext))
 
 
 @static if VERSION >= v"1.7"
-    SparseArrays._checkbuffers(ext::AbstractExtendableSparseMatrix)=  SparseArrays._checkbuffers(sparse(ext))
+    SparseArrays._checkbuffers(ext::AbstractExtendableSparseMatrixCSC)=  SparseArrays._checkbuffers(sparse(ext))
 end
 
 """
@@ -107,7 +107,7 @@ end
 are allowed  in the Julia sysimage and the floating point type of the matrix is Float64 or Complex64.
 In that case, Julias standard `\` is called, which is realized via UMFPACK.
 """
-function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrix{Tv, Ti},
+function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrixCSC{Tv, Ti},
                           b::AbstractVector) where {Tv, Ti}
     SparspakLU(sparse(ext)) \ b
 end
@@ -119,7 +119,7 @@ $(SIGNATURES)
 [`\\`](@ref) for Symmetric{ExtendableSparse}
 """
 function LinearAlgebra.:\(symm_ext::Symmetric{Tm, T},
-                          b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrix{Tm,Ti}}
+                          b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrixCSC{Tm,Ti}}
     Symmetric(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ...
 end
 
@@ -129,19 +129,19 @@ $(SIGNATURES)
 [`\\`](@ref) for Hermitian{ExtendableSparse}
 """
 function LinearAlgebra.:\(symm_ext::Hermitian{Tm, T},
-                          b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrix{Tm,Ti}}
+                          b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrixCSC{Tm,Ti}}
     Hermitian(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ...
 end
 
 if USE_GPL_LIBS
     for (Tv) in (:Float64, :ComplexF64)
-        @eval begin function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrix{$Tv, Ti},
+        @eval begin function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrixCSC{$Tv, Ti},
                                               B::AbstractVector) where {Ti}
             sparse(ext) \ B
         end end
 
         @eval begin function LinearAlgebra.:\(symm_ext::Symmetric{$Tv,
-                                                                  AbstractExtendableSparseMatrix{
+                                                                  AbstractExtendableSparseMatrixCSC{
                                                                       $Tv,
                                                                       Ti
                                                                   }},
@@ -151,7 +151,7 @@ if USE_GPL_LIBS
         end end
 
         @eval begin function LinearAlgebra.:\(symm_ext::Hermitian{$Tv,
-                                                                  AbstractExtendableSparseMatrix{
+                                                                  AbstractExtendableSparseMatrixCSC{
                                                                       $Tv,
                                                                       Ti
                                                                   }},
@@ -167,7 +167,7 @@ $(SIGNATURES)
 
 [`flush!`](@ref) and ldiv with ext.cscmatrix
 """
-function LinearAlgebra.ldiv!(r, ext::AbstractExtendableSparseMatrix, x)
+function LinearAlgebra.ldiv!(r, ext::AbstractExtendableSparseMatrixCSC, x)
     LinearAlgebra.ldiv!(r, sparse(ext), x)
 end
 
@@ -176,7 +176,7 @@ $(SIGNATURES)
 
 [`flush!`](@ref) and multiply with ext.cscmatrix
 """
-function LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrix, x)
+function LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrixCSC, x)
     LinearAlgebra.mul!(r, sparse(ext), x)
 end
 
@@ -185,7 +185,7 @@ $(SIGNATURES)
 
 [`flush!`](@ref) and calculate norm from cscmatrix
 """
-function LinearAlgebra.norm(A::AbstractExtendableSparseMatrix, p::Real = 2)
+function LinearAlgebra.norm(A::AbstractExtendableSparseMatrixCSC, p::Real = 2)
     return LinearAlgebra.norm(sparse(A), p)
 end
 
@@ -194,7 +194,7 @@ $(SIGNATURES)
 
 [`flush!`](@ref) and calculate opnorm from cscmatrix
 """
-function LinearAlgebra.opnorm(A::AbstractExtendableSparseMatrix, p::Real = 2)
+function LinearAlgebra.opnorm(A::AbstractExtendableSparseMatrixCSC, p::Real = 2)
     return LinearAlgebra.opnorm(sparse(A), p)
 end
 
@@ -203,7 +203,7 @@ $(SIGNATURES)
 
 [`flush!`](@ref) and calculate cond from cscmatrix
 """
-function LinearAlgebra.cond(A::AbstractExtendableSparseMatrix, p::Real = 2)
+function LinearAlgebra.cond(A::AbstractExtendableSparseMatrixCSC, p::Real = 2)
     return LinearAlgebra.cond(sparse(A), p)
 end
 
@@ -212,7 +212,7 @@ $(SIGNATURES)
 
 [`flush!`](@ref) and check for symmetry of cscmatrix
 """
-function LinearAlgebra.issymmetric(A::AbstractExtendableSparseMatrix)
+function LinearAlgebra.issymmetric(A::AbstractExtendableSparseMatrixCSC)
     return LinearAlgebra.issymmetric(sparse(A))
 end
     
@@ -221,29 +221,29 @@ end
 
     
 
-function Base.:+(A::T, B::T) where T<:AbstractExtendableSparseMatrix
+function Base.:+(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC
     T(sparse(A) + sparse(B))
 end
 
-function Base.:-(A::T, B::T) where T<:AbstractExtendableSparseMatrix
+function Base.:-(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC
     T(sparse(A) - sparse(B))
 end
 
-function Base.:*(A::T, B::T) where T<:AbstractExtendableSparseMatrix
+function Base.:*(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC
     T(sparse(A) * sparse(B))
 end
 
 """
 $(SIGNATURES)
 """
-function Base.:*(d::Diagonal, ext::T)where T<:AbstractExtendableSparseMatrix
+function Base.:*(d::Diagonal, ext::T)where T<:AbstractExtendableSparseMatrixCSC
     return T(d * sparse(ext))
 end
 
 """
 $(SIGNATURES)
 """
-function Base.:*(ext::T, d::Diagonal) where  T<:AbstractExtendableSparseMatrix
+function Base.:*(ext::T, d::Diagonal) where  T<:AbstractExtendableSparseMatrixCSC
     return T(sparse(ext) * d)
 end
 
@@ -253,7 +253,7 @@ $(SIGNATURES)
 
 Add SparseMatrixCSC matrix and [`ExtendableSparseMatrix`](@ref)  ext.
 """
-function Base.:+(ext::AbstractExtendableSparseMatrix, csc::SparseMatrixCSC)
+function Base.:+(ext::AbstractExtendableSparseMatrixCSC, csc::SparseMatrixCSC)
     return sparse(ext) + csc
 end
 
@@ -263,7 +263,7 @@ $(SIGNATURES)
 
 Subtract  SparseMatrixCSC matrix from  [`ExtendableSparseMatrix`](@ref)  ext.
 """
-function Base.:-(ext::AbstractExtendableSparseMatrix, csc::SparseMatrixCSC)
+function Base.:-(ext::AbstractExtendableSparseMatrixCSC, csc::SparseMatrixCSC)
     return sparse(ext) - csc
 end
 
@@ -272,28 +272,28 @@ $(SIGNATURES)
 
 Subtract  [`ExtendableSparseMatrix`](@ref)  ext from  SparseMatrixCSC.
 """
-function Base.:-(csc::SparseMatrixCSC, ext::AbstractExtendableSparseMatrix)
+function Base.:-(csc::SparseMatrixCSC, ext::AbstractExtendableSparseMatrixCSC)
     return csc - sparse(ext)
 end
 
 """
 $(SIGNATURES)
 """
-function SparseArrays.dropzeros!(ext::AbstractExtendableSparseMatrix)
+function SparseArrays.dropzeros!(ext::AbstractExtendableSparseMatrixCSC)
     dropzeros!(sparse(ext))
 end
 
 
 
-function mark_dirichlet(A::AbstractExtendableSparseMatrix;penalty=1.0e20)
+function mark_dirichlet(A::AbstractExtendableSparseMatrixCSC;penalty=1.0e20)
     mark_dirichlet(sparse(A);penalty)
 end
 
-function eliminate_dirichlet(A::T,dirichlet) where T<:AbstractExtendableSparseMatrix
+function eliminate_dirichlet(A::T,dirichlet) where T<:AbstractExtendableSparseMatrixCSC
    T(eliminate_dirichlet(sparse(A),dirichlet))
 end
 
-function eliminate_dirichlet!(A::AbstractExtendableSparseMatrix,dirichlet)
+function eliminate_dirichlet!(A::AbstractExtendableSparseMatrixCSC,dirichlet)
     eliminate_dirichlet!(sparse(A),dirichlet)
     A
 end
diff --git a/src/matrix/abstractextension.jl b/src/matrix/abstractextension.jl
deleted file mode 100644
index 378e54a..0000000
--- a/src/matrix/abstractextension.jl
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-    $(TYPEDEF)
-
-Abstract type for sparse matrix extension.
-
-Subtypes T_ext must implement:
-
-
-Constructor T_ext(m,n)
-SparseArrays.nnz(ext::T_ext)
-Base.size(ext::T_ext)
-
-Base.+(ext::T_ext, csc)
-  - Add extension matrix and csc matrix, return csc matrix
-
-sum!(nodeparts::Vector{Ti},extmatrices::Vector{T_ext}, cscmatrix)
-  - Add csc matrix and extension matrices (one per partition) and return csc matrix
-  - Fill nodeparts (already initialized at input) with information which partition was used to assemble node.
-    i.e. if entry [i,j] comes from extmatrixes[p], set nodeparts[j]=p .
-
-    This information may be used by matrix-vector multiplication and preconditioners
-
-rawupdateindex!(ext::Text, op, v, i, j) where {Tv, Ti}
-  - Set ext[i,j]+=v, possibly insert entry into matrix.
-
-
-"""
-abstract type AbstractSparseMatrixExtension{Tv, Ti} <: AbstractSparseMatrix{Tv,Ti} end
diff --git a/src/matrix/abstractsparsematrixextension.jl b/src/matrix/abstractsparsematrixextension.jl
new file mode 100644
index 0000000..d8070fc
--- /dev/null
+++ b/src/matrix/abstractsparsematrixextension.jl
@@ -0,0 +1,28 @@
+"""
+    $(TYPEDEF)
+
+Abstract type for sparse matrix extension.
+
+Subtypes T_ext must implement:
+
+Constructor T_ext(m,n)
+SparseArrays.nnz(ext::T_ext)
+Base.size(ext::T_ext)
+
+
+Base.sum(extmatrices::Vector{T_ext}, csx)
+  - Add csx matrix and extension matrices (one per partition) and return csx matrix
+
+rawupdateindex!(ext::Text, op, v, i, j) where {Tv, Ti}
+  - Set ext[i,j]+=v, possibly insert entry into matrix.
+
+
+Optional:
+
+Base.+(ext::T_ext, csx)
+  - Add extension matrix and csc/csr matrix, return csx matrix
+
+"""
+abstract type AbstractSparseMatrixExtension{Tv, Ti} <: AbstractSparseMatrix{Tv,Ti} end
+
+Base.:+(ext::AbstractSparseMatrixExtension, csx) = sum([ext],csx) 
diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl
index 2d8a908..a9debf7 100644
--- a/src/matrix/extendable.jl
+++ b/src/matrix/extendable.jl
@@ -7,7 +7,7 @@ either in cscmatrix, or in lnkmatrix, never in both.
 
 $(TYPEDFIELDS)
 """
-mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrix{Tv, Ti}
+mutable struct ExtendableSparseMatrixCSC{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti}
     """
     Final matrix data
     """
@@ -27,92 +27,92 @@ end
 
 """
 ```
-ExtendableSparseMatrix(Tv,Ti,m,n)
-ExtendableSparseMatrix(Tv,m,n)
-ExtendableSparseMatrix(m,n)
+ExtendableSparseMatrixCSC(Tv,Ti,m,n)
+ExtendableSparseMatrixCSC(Tv,m,n)
+ExtendableSparseMatrixCSC(m,n)
 ```
-Create empty ExtendableSparseMatrix. This is equivalent to `spzeros(m,n)` for
+Create empty ExtendableSparseMatrixCSC. This is equivalent to `spzeros(m,n)` for
 `SparseMartrixCSC`.
 
 """
 
-function ExtendableSparseMatrix{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
-    ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0)
+function ExtendableSparseMatrixCSC{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
+    ExtendableSparseMatrixCSC{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0)
 end
 
-function ExtendableSparseMatrix(valuetype::Type{Tv},
+function ExtendableSparseMatrixCSC(valuetype::Type{Tv},
                                 indextype::Type{Ti},
                                 m,
                                 n) where {Tv, Ti <: Integer}
-    ExtendableSparseMatrix{Tv, Ti}(m, n)
+    ExtendableSparseMatrixCSC{Tv, Ti}(m, n)
 end
 
-function ExtendableSparseMatrix(valuetype::Type{Tv}, m, n) where {Tv}
-    ExtendableSparseMatrix{Tv, Int}(m, n)
+function ExtendableSparseMatrixCSC(valuetype::Type{Tv}, m, n) where {Tv}
+    ExtendableSparseMatrixCSC{Tv, Int}(m, n)
 end
 
-ExtendableSparseMatrix(m, n) = ExtendableSparseMatrix{Float64, Int}(m, n)
+ExtendableSparseMatrixCSC(m, n) = ExtendableSparseMatrixCSC{Float64, Int}(m, n)
 
 """
 $(SIGNATURES)
 
-Create ExtendableSparseMatrix from SparseMatrixCSC
+Create ExtendableSparseMatrixCSC from SparseMatrixCSC
 """
-function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
-    ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc))
+function ExtendableSparseMatrixCSC(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
+    ExtendableSparseMatrixCSC{Tv, Ti}(csc, nothing, phash(csc))
 end
 
-function ExtendableSparseMatrix{Tv,Ti}(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
-    ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc))
+function ExtendableSparseMatrixCSC{Tv,Ti}(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
+    ExtendableSparseMatrixCSC{Tv, Ti}(csc, nothing, phash(csc))
 end
 
 """
 $(SIGNATURES)
 
- Create ExtendableSparseMatrix from Diagonal
+ Create ExtendableSparseMatrixCSC from Diagonal
 """
-ExtendableSparseMatrix(D::Diagonal) = ExtendableSparseMatrix(sparse(D))
+ExtendableSparseMatrixCSC(D::Diagonal) = ExtendableSparseMatrixCSC(sparse(D))
 
 """
 $(SIGNATURES)
 
- Create ExtendableSparseMatrix from AbstractMatrix, dropping all zero entries.
+ Create ExtendableSparseMatrixCSC from AbstractMatrix, dropping all zero entries.
  This is the equivalent to `sparse(A)`.
 """
-ExtendableSparseMatrix(A::AbstractMatrix) = ExtendableSparseMatrix(sparse(A))
+ExtendableSparseMatrixCSC(A::AbstractMatrix) = ExtendableSparseMatrixCSC(sparse(A))
 
 """
-    ExtendableSparseMatrix(I,J,V)
-    ExtendableSparseMatrix(I,J,V,m,n)
-    ExtendableSparseMatrix(I,J,V,combine::Function)
-    ExtendableSparseMatrix(I,J,V,m,n,combine::Function)
+    ExtendableSparseMatrixCSC(I,J,V)
+    ExtendableSparseMatrixCSC(I,J,V,m,n)
+    ExtendableSparseMatrixCSC(I,J,V,combine::Function)
+    ExtendableSparseMatrixCSC(I,J,V,m,n,combine::Function)
 
-Create ExtendableSparseMatrix from triplet (COO) data.
+Create ExtendableSparseMatrixCSC from triplet (COO) data.
 """
-ExtendableSparseMatrix(I, J, V::AbstractVector) = ExtendableSparseMatrix(sparse(I, J, V))
+ExtendableSparseMatrixCSC(I, J, V::AbstractVector) = ExtendableSparseMatrixCSC(sparse(I, J, V))
 
-function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n)
-    ExtendableSparseMatrix(sparse(I, J, V, m, n))
+function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, m, n)
+    ExtendableSparseMatrixCSC(sparse(I, J, V, m, n))
 end
 
-function ExtendableSparseMatrix(I, J, V::AbstractVector, combine::Function)
-    ExtendableSparseMatrix(sparse(I, J, V, combine))
+function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, combine::Function)
+    ExtendableSparseMatrixCSC(sparse(I, J, V, combine))
 end
 
-function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n, combine::Function)
-    ExtendableSparseMatrix(sparse(I, J, V, m, n, combine))
+function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, m, n, combine::Function)
+    ExtendableSparseMatrixCSC(sparse(I, J, V, m, n, combine))
 end
 
 # THese are probably too much...
-# function Base.transpose(A::ExtendableSparseMatrix)
+# function Base.transpose(A::ExtendableSparseMatrixCSC)
 #     flush!(A)
-#     ExtendableSparseMatrix(Base.transpose(sparse(A)))
+#     ExtendableSparseMatrixCSC(Base.transpose(sparse(A)))
 # end
-# function Base.adjoint(A::ExtendableSparseMatrix)
+# function Base.adjoint(A::ExtendableSparseMatrixCSC)
 #     flush!(A)
-#     ExtendableSparseMatrix(Base.adjoint(sparse(A)))
+#     ExtendableSparseMatrixCSC(Base.adjoint(sparse(A)))
 # end
-# function SparseArrays.sparse(text::LinearAlgebra.Transpose{Tv,ExtendableSparseMatrix{Tv,Ti}}) where {Tv,Ti}
+# function SparseArrays.sparse(text::LinearAlgebra.Transpose{Tv,ExtendableSparseMatrixCSC{Tv,Ti}}) where {Tv,Ti}
 #     transpose(sparse(parent(text)))
 # end
 
@@ -123,12 +123,12 @@ $(SIGNATURES)
 
 Create similar but emtpy extendableSparseMatrix
 """
-function Base.similar(m::ExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti}
-    ExtendableSparseMatrix{Tv, Ti}(size(m)...)
+function Base.similar(m::ExtendableSparseMatrixCSC{Tv, Ti}) where {Tv, Ti}
+    ExtendableSparseMatrixCSC{Tv, Ti}(size(m)...)
 end
 
-function Base.similar(m::ExtendableSparseMatrix{Tv, Ti}, ::Type{T}) where {Tv, Ti, T}
-    ExtendableSparseMatrix{T, Ti}(size(m)...)
+function Base.similar(m::ExtendableSparseMatrixCSC{Tv, Ti}, ::Type{T}) where {Tv, Ti, T}
+    ExtendableSparseMatrixCSC{T, Ti}(size(m)...)
 end
 
 """
@@ -140,7 +140,7 @@ search during acces:
 
 ```@example
 using ExtendableSparse # hide
-A=ExtendableSparseMatrix(3,3)
+A=ExtendableSparseMatrixCSC(3,3)
 A[1,2]+=0.1
 A
 ```
@@ -148,7 +148,7 @@ A
 ```@example
 using ExtendableSparse # hide
 
-A=ExtendableSparseMatrix(3,3)
+A=ExtendableSparseMatrixCSC(3,3)
 updateindex!(A,+,0.1,1,2)
 A
 ```
@@ -156,7 +156,7 @@ A
 If `v` is zero, no new entry is created.
 """
 
-function updateindex!(ext::ExtendableSparseMatrix{Tv, Ti},
+function updateindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti},
                       op,
                       v,
                       i,
@@ -178,7 +178,7 @@ $(SIGNATURES)
 Like [`updateindex!`](@ref) but without 
 checking if v is zero.
 """
-function rawupdateindex!(ext::ExtendableSparseMatrix{Tv, Ti},
+function rawupdateindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti},
                          op,
                          v,
                          i,
@@ -201,7 +201,7 @@ $(SIGNATURES)
 Find index in CSC matrix and set value if it exists. Otherwise,
 set index in extension if `v` is nonzero.
 """
-function Base.setindex!(ext::ExtendableSparseMatrix{Tv, Ti},
+function Base.setindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti},
                         v::Union{Number,AbstractVecOrMat},
                         i::Integer,
                         j::Integer) where {Tv, Ti}
@@ -222,7 +222,7 @@ $(SIGNATURES)
 Find index in CSC matrix and return value, if it exists.
 Otherwise, return value from extension.
 """
-function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti},
+function Base.getindex(ext::ExtendableSparseMatrixCSC{Tv, Ti},
                        i::Integer,
                        j::Integer) where {Tv, Ti <: Integer}
     k = findindex(ext.cscmatrix, i, j)
@@ -244,7 +244,7 @@ $(SIGNATURES)
 If there are new entries in extension, create new CSC matrix by adding the
 cscmatrix and linked list matrix and reset the linked list based extension.
 """
-function flush!(ext::ExtendableSparseMatrix)
+function flush!(ext::ExtendableSparseMatrixCSC)
     if ext.lnkmatrix != nothing && nnz(ext.lnkmatrix) > 0
         ext.cscmatrix = ext.lnkmatrix + ext.cscmatrix
         ext.lnkmatrix = nothing
@@ -254,7 +254,7 @@ function flush!(ext::ExtendableSparseMatrix)
 end
 
 
-function SparseArrays.sparse(ext::ExtendableSparseMatrix)
+function SparseArrays.sparse(ext::ExtendableSparseMatrixCSC)
     flush!(ext)
     ext.cscmatrix
 end
@@ -265,7 +265,7 @@ $(SIGNATURES)
 
 Reset ExtenableSparseMatrix into state similar to that after creation.
 """
-function reset!(A::ExtendableSparseMatrix)
+function reset!(A::ExtendableSparseMatrixCSC)
     A.cscmatrix=spzeros(size(A)...)
     A.lnkmatrix=nothing
 end
@@ -275,11 +275,11 @@ end
 """
 $(SIGNATURES)
 """
-function Base.copy(S::ExtendableSparseMatrix)
+function Base.copy(S::ExtendableSparseMatrixCSC)
     if isnothing(S.lnkmatrix)
-        ExtendableSparseMatrix(copy(S.cscmatrix), nothing,S.phash)
+        ExtendableSparseMatrixCSC(copy(S.cscmatrix), nothing,S.phash)
     else
-        ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash)
+        ExtendableSparseMatrixCSC(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash)
     end
 end
 
@@ -288,7 +288,7 @@ end
 
 Create a pointblock matrix.
 """
-function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti}
+function pointblock(A0::ExtendableSparseMatrixCSC{Tv,Ti},blocksize) where {Tv,Ti}
     A=SparseMatrixCSC(A0)
     colptr=A.colptr
     rowval=A.rowval
@@ -298,7 +298,7 @@ function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti}
     nblock=n÷blocksize
     b=SMatrix{blocksize,blocksize}(block)
     Tb=typeof(b)
-    Ab=ExtendableSparseMatrix{Tb,Ti}(nblock,nblock)
+    Ab=ExtendableSparseMatrixCSC{Tb,Ti}(nblock,nblock)
     
     
     for i=1:n
diff --git a/src/matrix/genericmtextendablesparsematrixcsc.jl b/src/matrix/genericmtextendablesparsematrixcsc.jl
new file mode 100644
index 0000000..5e52fcd
--- /dev/null
+++ b/src/matrix/genericmtextendablesparsematrixcsc.jl
@@ -0,0 +1,122 @@
+mutable struct GenericMTExtendableSparseMatrixCSC{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti}
+    """
+    Final matrix data
+        """
+    cscmatrix::SparseMatrixCSC{Tv, Ti}
+
+    """
+        Vector of dictionaries for new entries
+    """
+    xmatrices::Vector{Tm}
+
+    colparts::Vector{Ti}
+    partnodes::Vector{Ti}
+end
+
+function GenericMTExtendableSparseMatrixCSC{Tm, Tv, Ti}(n,m,p::Integer=1) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti}
+    GenericMTExtendableSparseMatrixCSC(spzeros(Tv, Ti, m, n),
+                                [Tm(m,n) for i=1:p],
+                                Ti[1,2],
+                                Ti[1,n+1],
+                                )
+end
+
+function partitioning!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti}, colparts, partnodes) where {Tm, Tv, Ti}
+    ext.partnodes=partnodes
+    ext.colparts=colparts
+    ext
+end
+
+
+function reset!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti},p::Integer) where {Tm,Tv,Ti}
+    m,n=size(ext.cscmatrix)
+    ext.cscmatrix=spzeros(Tv, Ti, m, n)
+    ext.xmatrices=[Tm(m,n) for i=1:p]
+    ext.colparts=Ti[1,2]
+    ext.partnodes=Ti[1,n+1]
+    ext
+end
+
+function reset!(ext::GenericMTExtendableSparseMatrixCSC)
+    reset!(ext,length(ext.xmatrices))
+end
+
+
+function flush!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti}) where{Tm,Tv,Ti}
+    ext.cscmatrix=Base.sum(ext.xmatrices, ext.cscmatrix)
+    np=length(ext.xmatrices)
+    (m,n)=size(ext.cscmatrix)
+    ext.xmatrices=[Tm(m,n) for i=1:np]
+    ext
+end
+
+
+function SparseArrays.sparse(ext::GenericMTExtendableSparseMatrixCSC)
+    flush!(ext)
+    ext.cscmatrix
+end
+
+function Base.setindex!(ext::GenericMTExtendableSparseMatrixCSC,
+                        v::Union{Number,AbstractVecOrMat},
+                        i::Integer,
+                        j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = v
+    else
+        error("use rawupdateindex! for new entries into GenericMTExtendableSparseMatrixCSC")
+    end
+end
+
+function Base.getindex(ext::GenericMTExtendableSparseMatrixCSC,
+                       i::Integer,
+                       j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        return ext.cscmatrix.nzval[k]
+    elseif sum(nnz,ext.xmatrices) == 0
+        return zero(eltype(ext.cscmatrix))
+    else
+        error("flush! GenericMTExtendableSparseMatrixCSC before using getindex")
+    end
+end
+
+function rawupdateindex!(ext::GenericMTExtendableSparseMatrixCSC,
+                         op,
+                         v,
+                         i,
+                         j,
+                         tid=1)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        rawupdateindex!(ext.xmatrices[tid],op,v,i,j)
+    end
+end
+
+# Needed in 1.9
+function Base.:*(ext::GenericMTExtendableSparseMatrixCSC{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA
+    mul!(similar(x),ext,x)
+end
+
+function LinearAlgebra.mul!(r, ext::GenericMTExtendableSparseMatrixCSC, x)
+    flush!(ext)
+    A=ext.cscmatrix
+    colparts=ext.colparts
+    partnodes=ext.partnodes
+    rows = SparseArrays.rowvals(A)
+    vals = nonzeros(A)
+    r.=zero(eltype(ext))
+    m,n=size(A)
+    for icol=1:length(colparts)-1
+        @tasks for ip in colparts[icol]:colparts[icol+1]-1
+            @inbounds for inode in  partnodes[ip]:partnodes[ip+1]-1
+                @inbounds for i in nzrange(A,inode)
+                    r[rows[i]]+=vals[i]*x[inode]
+                end
+            end
+        end
+    end
+    r
+end
diff --git a/src/matrix/sparsematrixdilnkc.jl b/src/matrix/sparsematrixdilnkc.jl
new file mode 100644
index 0000000..d467c3d
--- /dev/null
+++ b/src/matrix/sparsematrixdilnkc.jl
@@ -0,0 +1,462 @@
+"""
+        $(TYPEDEF)
+    
+    Modification of SparseMatrixLNK where the pointer to first index of
+column j is stored in a dictionary.
+    """
+mutable struct SparseMatrixDILNKC{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti}
+    """
+        Number of rows
+        """
+    m::Ti
+    
+    """
+    Number of columns
+    """
+    n::Ti
+
+    """
+    Number of nonzeros
+    """
+    nnz::Ti
+
+    """
+    Length of arrays
+    """
+    nentries::Ti
+
+    """
+    Linked list of column entries. Initial length is n,
+    it grows with each new entry.
+
+    colptr[index] contains the next
+    index in the list or zero, in the later case terminating the list which
+    starts at index 1<=j<=n for each column j.
+    """
+    colptr::Vector{Ti}
+
+    """
+    Dictionary to store start indices of columns
+    """
+    colstart::Dict{Ti,Ti}
+
+    """
+    Row numbers. For each index it contains the zero (initial state)
+    or the row numbers corresponding to the column entry list in colptr.
+    """
+    rowval::Vector{Ti}
+
+    """
+    Nonzero entry values correspondin to each pair
+    (colptr[index],rowval[index])
+    """
+    nzval::Vector{Tv}
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+function SparseMatrixDILNKC{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
+    SparseMatrixDILNKC{Tv, Ti}(m, n, 0, 0,  zeros(Ti,10), Dict{Ti,Ti}(), zeros(Ti,10), zeros(Ti,10))
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+function SparseMatrixDILNKC(valuetype::Type{Tv}, indextype::Type{Ti}, m,
+                         n) where {Tv, Ti <: Integer}
+    SparseMatrixDILNKC{Tv, Ti}(m, n)
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+SparseMatrixDILNKC(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixDILNKC(Tv, Int, m, n)
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+SparseMatrixDILNKC(m, n) = SparseMatrixDILNKC(Float64, m, n)
+
+"""
+$(SIGNATURES)
+    
+Constructor from SparseMatrixCSC.
+
+"""
+function SparseMatrixDILNKC(csc::SparseArrays.SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <:
+                                                                               Integer}
+    lnk = SparseMatrixDILNKC{Tv, Ti}(csc.m, csc.n)
+    for j = 1:(csc.n)
+        for k = csc.colptr[j]:(csc.colptr[j + 1] - 1)
+            lnk[csc.rowval[k], j] = csc.nzval[k]
+        end
+    end
+    lnk
+end
+
+function findindex(lnk::SparseMatrixDILNKC, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    k = get(lnk.colstart, j, 0)
+    if k==0
+        return 0,0
+    end
+    k0 = k
+    while k > 0
+        if lnk.rowval[k] == i
+            return k, 0
+        end
+        k0 = k
+        k = lnk.colptr[k]
+    end
+    return 0, k0
+end
+
+"""
+$(SIGNATURES)
+    
+Return value stored for entry or zero if not found
+"""
+function Base.getindex(lnk::SparseMatrixDILNKC{Tv, Ti}, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k == 0
+        return zero(Tv)
+    else
+        return lnk.nzval[k]
+    end
+end
+
+function addentry!(lnk::SparseMatrixDILNKC, i, j, k, k0)
+    # increase number of entries
+    lnk.nentries += 1
+    if length(lnk.nzval) < lnk.nentries
+        newsize = Int(ceil(5.0 * lnk.nentries / 4.0))
+        resize!(lnk.nzval, newsize)
+        resize!(lnk.rowval, newsize)
+        resize!(lnk.colptr, newsize)
+    end
+    
+    if k0==0
+        lnk.colstart[j]=lnk.nentries
+    end
+
+    # Append entry if not found
+    lnk.rowval[lnk.nentries] = i
+
+    # Shift the end of the list
+    lnk.colptr[lnk.nentries] = 0
+
+    if k0>0
+        lnk.colptr[k0] = lnk.nentries
+    end
+    
+    # Update number of nonzero entries
+    lnk.nnz += 1
+    return lnk.nentries
+end
+
+"""
+$(SIGNATURES)
+    
+Update value of existing entry, otherwise extend matrix if v is nonzero.
+"""
+function Base.setindex!(lnk::SparseMatrixDILNKC, v, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = v
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = v
+    end
+    return lnk
+end
+
+"""
+$(SIGNATURES)
+
+Update element of the matrix  with operation `op`. 
+It assumes that `op(0,0)==0`. If `v` is zero, no new 
+entry is created.
+"""
+function updateindex!(lnk::SparseMatrixDILNKC{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+"""
+$(SIGNATURES)
+
+Update element of the matrix  with operation `op`. 
+It assumes that `op(0,0)==0`. If `v` is zero a new entry
+is created nevertheless.
+"""
+function rawupdateindex!(lnk::SparseMatrixDILNKC{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+    else
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+"""
+$(SIGNATURES)
+
+Return tuple containing size of the matrix.
+"""
+Base.size(lnk::SparseMatrixDILNKC) = (lnk.m, lnk.n)
+
+"""
+$(SIGNATURES)
+
+Return number of nonzero entries.
+"""
+SparseArrays.nnz(lnk::SparseMatrixDILNKC) = lnk.nnz
+
+
+"""
+    $(SIGNATURES)
+Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V.
+"""
+function add_via_COO(lnk::SparseMatrixDILNKC{Tv, Ti},
+                     csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
+    (;colptr,nzval,rowval,m,n)=csc
+    l=nnz(lnk)+nnz(csc)
+    I=Vector{Ti}(undef,l)
+    J=Vector{Ti}(undef,l)
+    V=Vector{Tv}(undef,l)
+    i=1
+    if nnz(csc)>0
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+    end
+    for (j,k) in lnk.colstart
+        while k>0
+            I[i]=lnk.rowval[k]
+            J[i]=j
+            V[i]=lnk.nzval[k]
+            k=lnk.colptr[k]
+            i=i+1
+        end
+    end
+    @static if VERSION>=v"1.10"
+        return SparseArrays.sparse!(I,J,V,m,n,+)
+    else
+        return SparseArrays.sparse(I,J,V,m,n,+)
+    end
+end
+
+
+"""
+    $(SIGNATURES)
+Add lnk and csc without creation of intermediate data.
+(to be fixed)
+"""
+function add_directly(lnk::SparseMatrixDILNKC{Tv, Ti},
+                      csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
+    @assert(csc.m==lnk.m)
+    @assert(csc.n==lnk.n)
+
+    # overallocate arrays in order to avoid
+    # presumably slower push!
+    xnnz = nnz(csc) + nnz(lnk)
+    colptr = Vector{Ti}(undef, csc.n + 1)
+    rowval = Vector{Ti}(undef, xnnz)
+    nzval = Vector{Tv}(undef, xnnz)
+
+    # Detect the maximum column length of lnk
+    lnk_maxcol = 0
+    for (j,k) in lnk.colstart
+        lcol = zero(Ti)
+        while k > 0
+            lcol += 1
+            k = lnk.colptr[k]
+        end
+        lnk_maxcol = max(lcol, lnk_maxcol)
+    end
+
+    # pre-allocate column  data
+    col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol]
+
+    inz = 1 # counts the nonzero entries in the new matrix
+
+    in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1])
+
+    in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col)
+
+    # loop over all columns
+    for j = 1:(csc.n)
+        # Copy extension entries into col and sort them
+        k = get(lnk.colstart, j, 0)
+        l_lnk_col = 0
+        while k > 0
+            if lnk.rowval[k] > 0
+                l_lnk_col += 1
+                col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k])
+            end
+            k = lnk.colptr[k]
+        end
+        sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward)
+
+        # jointly sort lnk and csc entries  into new matrix data
+        # this could be replaced in a more transparent manner by joint sorting:
+        # make a joint array for csc and lnk col, sort them.
+        # Will this be faster? 
+
+        colptr[j] = inz
+        jlnk = one(Ti) # counts the entries in col
+        jcsc = csc.colptr[j]  # counts entries in csc
+
+        while true
+            if in_csc_col(jcsc, j) &&
+               (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval ||
+                !in_lnk_col(jlnk, l_lnk_col))
+                # Insert entries from csc into new structure
+                rowval[inz] = csc.rowval[jcsc]
+                nzval[inz] = csc.nzval[jcsc]
+                jcsc += 1
+                inz += 1
+            elseif in_csc_col(jcsc, j) &&
+                   (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval)
+                # Add up entries from csc and lnk
+                rowval[inz] = csc.rowval[jcsc]
+                nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval
+                jcsc += 1
+                inz += 1
+                jlnk += 1
+            elseif in_lnk_col(jlnk, l_lnk_col)
+                # Insert entries from lnk res. col into new structure
+                rowval[inz] = col[jlnk].rowval
+                nzval[inz] = col[jlnk].nzval
+                jlnk += 1
+                inz += 1
+            else
+                break
+            end
+        end
+    end
+    colptr[csc.n + 1] = inz
+    resize!(rowval, inz - 1)
+    resize!(nzval, inz - 1)
+    SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval)
+end
+
+
+
+"""
+    $(SIGNATURES)
+
+Add SparseMatrixCSC matrix and [`SparseMatrixDILNKC`](@ref)  lnk, returning a SparseMatrixCSC
+"""
+Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = add_directly(lnk, csc)
+
+function Base.sum(lnkdictmatrices::Vector{SparseMatrixDILNKC{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+    lnew=sum(nnz,lnkdictmatrices)
+    if lnew>0
+        (;colptr,nzval,rowval,m,n)=cscmatrix
+        l=lnew+nnz(cscmatrix)
+        I=Vector{Ti}(undef,l)
+        J=Vector{Ti}(undef,l)
+        V=Vector{Tv}(undef,l)
+        i=1
+        
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+
+        ip=1
+        for lnk in lnkdictmatrices
+            for (j,k) in lnk.colstart
+                while k>0
+                    I[i]=lnk.rowval[k]
+                    J[i]=j
+                    V[i]=lnk.nzval[k]
+                    k=lnk.colptr[k]
+                    i=i+1
+                end
+            end
+            ip=ip+1
+        end
+        @static if VERSION>=v"1.10"
+            return SparseArrays.sparse!(I,J,V,m,n,+)
+        else
+            return SparseArrays.sparse(I,J,V,m,n,+)
+        end
+    end
+    return cscmatrix
+end
+        
+function reset!(m::SparseMatrixDILNKC{Tv,Ti}) where {Tv,Ti}
+    m.nnz=0
+    m.nentries=0
+    m.colptr=zeros(Ti,10)
+    m.colstart::Dict{Ti,Ti}
+    m.rowval=zeros(Ti,10)
+    m.nzval=zeros(Ti,10)
+    m
+end
+
+
+"""
+$(SIGNATURES)
+    
+Constructor from SparseMatrixDILNKC.
+
+"""
+function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixDILNKC)::SparseMatrixCSC
+    csc = spzeros(lnk.m, lnk.n)
+    lnk + csc
+end
+
+function SparseArrays.sparse(lnk::SparseMatrixDILNKC)
+    lnk + spzeros(lnk.m, lnk.n)
+end
+
+function Base.copy(S::SparseMatrixDILNKC)
+    SparseMatrixDILNKC(size(S, 1),
+                        size(S, 2),
+                        S.nnz,
+                        S.nentries,
+                        copy(S.colptr),
+                        copy(S.colstart),
+                        copy(S.rowvals),
+                        copy(S.nzval))
+end
diff --git a/test/ExperimentalXParallel.jl b/test/ExperimentalXParallel.jl
index a3744eb..128d93b 100644
--- a/test/ExperimentalXParallel.jl
+++ b/test/ExperimentalXParallel.jl
@@ -11,116 +11,8 @@ using ExtendableSparse, ExtendableGrids, Metis
 using LinearAlgebra
 using BenchmarkTools
 using Test
-using OhMyThreads: @tasks
-using RecursiveFactorization
 
-function testgrid(N; dim=3)
-    X = range(0, 1; length=N^(1.0 / dim) |> ceil |> Int)
-    simplexgrid((X for i = 1:dim)...)
-end
-
-function coordmatrix!(C, coord, cellnodes, k)
-    spacedim=size(coord,1)
-    celldim=size(cellnodes,1)
-    @inbounds for jj = 1:celldim
-        C[1, jj] = 1
-        @inbounds for ii = 1:spacedim
-            C[ii + 1, jj] = coord[ii, cellnodes[jj, k]]
-        end
-    end
-end
-
-function gradient!(G, C, factdim, I, ipiv)
-    clu = RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false))
-    ldiv!(G, clu, I)
-    abs(det(clu)) / factdim
-end
-
-function scalpro(G, dim, jl, il)
-    s = 0.0
-    @inbounds @simd for k = 1:dim
-        s += G[jl, k + 1] * G[il, k + 1]
-    end
-    return s
-end
-
-function stiffness!(S, dim, G)
-    @inbounds for il = 1:(dim + 1)
-        S[il, il] = scalpro(G, dim, il, il)
-        @inbounds for jl = (il + 1):(dim + 1)
-            S[il, jl] = scalpro(G, dim, jl, il)
-            S[jl, il] = S[il, jl]
-        end
-    end
-    return S
-end
-
-function testassemble!(A_h, grid)
-    coord = grid[Coordinates]
-    cellnodes = grid[CellNodes]
-    ncells = num_cells(grid)
-    dim = size(coord, 1)
-    lnodes = dim + 1
-    factdim::Float64 = factorial(dim)
-    S = zeros(lnodes, lnodes) # local stiffness matrix
-    C = zeros(lnodes, lnodes)  # local coordinate matrix
-    G = zeros(lnodes, lnodes) # shape function gradients
-    ipiv = zeros(Int, lnodes)
-    I = Matrix(Diagonal(ones(lnodes)))
-    ncells = size(cellnodes, 2)
-    for icell = 1:ncells
-        coordmatrix!(C, coord, cellnodes, icell)
-        vol = gradient!(G, C, factdim, I, ipiv)
-        stiffness!(S, dim, G)
-        for il = 1:lnodes
-            i = cellnodes[il, icell]
-            rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i)
-            for jl = 1:lnodes
-                j = cellnodes[jl, icell]
-                rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j)
-            end
-        end
-    end
-    flush!(A_h)
-end
-
-function testassemble_parallel!(A_h, grid)
-    coord = grid[Coordinates]
-    cellnodes = grid[CellNodes]
-    ncells = num_cells(grid)
-    dim = size(coord, 1)
-    lnodes = dim + 1
-    npart = num_partitions(grid)
-    factdim::Float64 = factorial(dim)
-    SS = [zeros(lnodes, lnodes) for i = 1:npart] # local stiffness matrix
-    CC = [zeros(lnodes, lnodes) for i = 1:npart] # local coordinate matrix
-    GG = [zeros(lnodes, lnodes) for i = 1:npart] # shape function gradients
-    IP = [zeros(Int, lnodes) for i = 1:npart] # shape function gradients
-    I = Matrix(Diagonal(ones(lnodes)))
-    ncells = size(cellnodes, 2)
-    for color in pcolors(grid)
-        @tasks for part in pcolor_partitions(grid, color)
-            C = CC[part]
-            S = SS[part]
-            G = GG[part]
-            ipiv = IP[part]
-            for icell in partition_cells(grid, part)
-                coordmatrix!(C, coord, cellnodes, icell)
-                vol = gradient!(G, C, factdim, I, ipiv)
-                stiffness!(S, dim, G)
-                for il = 1:lnodes
-                    i = cellnodes[il, icell]
-                    rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i, part)
-                    for jl = 1:lnodes
-                        j = cellnodes[jl, icell]
-                        rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j, part)
-                    end
-                end
-            end
-        end
-    end
-    flush!(A_h)
-end
+include("femtools.jl")
 
 function test_correctness_build_seq(N, Tm::Type{<:AbstractSparseMatrix}; dim=3)
     grid = testgrid(N; dim)
@@ -214,7 +106,7 @@ function test_correctness_mul(N,
         pgrid = partition(grid, Tp(; npart=np))
         @test check_partitioning(pgrid)
         A = Tm(nnodes, nnodes, num_partitions(pgrid))
-        ExtendableSparse.Experimental.partitioning!(A, pgrid[PColorPartitions],
+        partitioning!(A, pgrid[PColorPartitions],
                                                     pgrid[PartitionNodes])
         testassemble_parallel!(A, pgrid)
         invp = invperm(pgrid[NodePermutation])
@@ -311,8 +203,8 @@ function speedup_mul(N,
         reset!(A, num_partitions(pgrid))
         testassemble_parallel!(A, pgrid)
         flush!(A)
-        ExtendableSparse.Experimental.partitioning!(A, pgrid[PColorPartitions],
-                                                    pgrid[PartitionNodes])
+        partitioning!(A, pgrid[PColorPartitions],
+                      pgrid[PartitionNodes])
         t = @belapsed $A * $b seconds = 1
         invp = invperm(pgrid[NodePermutation])
         @assert A0b[invp] ≈ A * b[invp]
diff --git a/test/femtools.jl b/test/femtools.jl
new file mode 100644
index 0000000..8c7e652
--- /dev/null
+++ b/test/femtools.jl
@@ -0,0 +1,110 @@
+using OhMyThreads: @tasks
+using RecursiveFactorization
+
+function testgrid(N; dim=3)
+    X = range(0, 1; length=N^(1.0 / dim) |> ceil |> Int)
+    simplexgrid((X for i = 1:dim)...)
+end
+
+function coordmatrix!(C, coord, cellnodes, k)
+    spacedim=size(coord,1)
+    celldim=size(cellnodes,1)
+    @inbounds for jj = 1:celldim
+        C[1, jj] = 1
+        @inbounds for ii = 1:spacedim
+            C[ii + 1, jj] = coord[ii, cellnodes[jj, k]]
+        end
+    end
+end
+
+function gradient!(G, C, factdim, I, ipiv)
+    clu = RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false))
+    ldiv!(G, clu, I)
+    abs(det(clu)) / factdim
+end
+
+function scalpro(G, dim, jl, il)
+    s = 0.0
+    @inbounds @simd for k = 1:dim
+        s += G[jl, k + 1] * G[il, k + 1]
+    end
+    return s
+end
+
+function stiffness!(S, dim, G)
+    @inbounds for il = 1:(dim + 1)
+        S[il, il] = scalpro(G, dim, il, il)
+        @inbounds for jl = (il + 1):(dim + 1)
+            S[il, jl] = scalpro(G, dim, jl, il)
+            S[jl, il] = S[il, jl]
+        end
+    end
+    return S
+end
+
+function testassemble!(A_h, grid)
+    coord = grid[Coordinates]
+    cellnodes = grid[CellNodes]
+    ncells = num_cells(grid)
+    dim = size(coord, 1)
+    lnodes = dim + 1
+    factdim::Float64 = factorial(dim)
+    S = zeros(lnodes, lnodes) # local stiffness matrix
+    C = zeros(lnodes, lnodes)  # local coordinate matrix
+    G = zeros(lnodes, lnodes) # shape function gradients
+    ipiv = zeros(Int, lnodes)
+    I = Matrix(Diagonal(ones(lnodes)))
+    ncells = size(cellnodes, 2)
+    for icell = 1:ncells
+        coordmatrix!(C, coord, cellnodes, icell)
+        vol = gradient!(G, C, factdim, I, ipiv)
+        stiffness!(S, dim, G)
+        for il = 1:lnodes
+            i = cellnodes[il, icell]
+            rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i)
+            for jl = 1:lnodes
+                j = cellnodes[jl, icell]
+                rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j)
+            end
+        end
+    end
+    flush!(A_h)
+end
+
+function testassemble_parallel!(A_h, grid)
+    coord = grid[Coordinates]
+    cellnodes = grid[CellNodes]
+    ncells = num_cells(grid)
+    dim = size(coord, 1)
+    lnodes = dim + 1
+    npart = num_partitions(grid)
+    factdim::Float64 = factorial(dim)
+    SS = [zeros(lnodes, lnodes) for i = 1:npart] # local stiffness matrix
+    CC = [zeros(lnodes, lnodes) for i = 1:npart] # local coordinate matrix
+    GG = [zeros(lnodes, lnodes) for i = 1:npart] # shape function gradients
+    IP = [zeros(Int, lnodes) for i = 1:npart] # shape function gradients
+    I = Matrix(Diagonal(ones(lnodes)))
+    ncells = size(cellnodes, 2)
+    for color in pcolors(grid)
+        @tasks for part in pcolor_partitions(grid, color)
+            C = CC[part]
+            S = SS[part]
+            G = GG[part]
+            ipiv = IP[part]
+            for icell in partition_cells(grid, part)
+                coordmatrix!(C, coord, cellnodes, icell)
+                vol = gradient!(G, C, factdim, I, ipiv)
+                stiffness!(S, dim, G)
+                for il = 1:lnodes
+                    i = cellnodes[il, icell]
+                    rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i, part)
+                    for jl = 1:lnodes
+                        j = cellnodes[jl, icell]
+                        rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j, part)
+                    end
+                end
+            end
+        end
+    end
+    flush!(A_h)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 856b8b2..01bdce2 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -18,7 +18,7 @@ using ForwardDiff
         end
     end
 
-    for Tm in [ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict]
+    for Tm in [MTExtendableSparseMatrixCSC,ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict]
         for N in [10000,20000]
             ExperimentalXParallel.test_correctness_update(N,Tm, dim=2)
             ExperimentalXParallel.test_correctness_build(N,Tm, dim=2)

From 8d7b2165952c9021d870e328b1ab4e0425b3e133 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Mon, 1 Jul 2024 10:20:52 +0200
Subject: [PATCH 39/44] Fix MT/ST stuff for VoronoiFVM (in single threaded
 mode)

Add STExtendableSparseMatrixCSC (temporarily)
---
 src/ExtendableSparse.jl                       |  9 +-
 .../abstractextendablesparsematrixcsc.jl      |  1 -
 .../genericextendablesparsematrixcsc.jl       | 91 +++++++++++++++++++
 .../genericmtextendablesparsematrixcsc.jl     | 20 +++-
 src/matrix/sparsematrixdilnkc.jl              | 10 +-
 5 files changed, 122 insertions(+), 9 deletions(-)
 create mode 100644 src/matrix/genericextendablesparsematrixcsc.jl

diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index d7a0cd5..a1fd4e9 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -28,12 +28,17 @@ include("matrix/sparsematrixdilnkc.jl")
 include("matrix/abstractextendablesparsematrixcsc.jl")
 include("matrix/extendable.jl")
 include("matrix/genericmtextendablesparsematrixcsc.jl")
+include("matrix/genericextendablesparsematrixcsc.jl")
 
 const ExtendableSparseMatrix=ExtendableSparseMatrixCSC
-const MTExtendableSparseMatrixCSC=GenericMTExtendableSparseMatrixCSC{SparseMatrixDILNKC}
+const MTExtendableSparseMatrixCSC{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixDILNKC{Tv,Ti},Tv,Ti}
 MTExtendableSparseMatrixCSC(m,n,args...)=MTExtendableSparseMatrixCSC{Float64,Int64}(m,n,args...)
 
-export ExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC,GenericMTExtendableSparseMatrixCSC
+const STExtendableSparseMatrixCSC{Tv,Ti}=GenericExtendableSparseMatrixCSC{SparseMatrixDILNKC{Tv,Ti},Tv,Ti}
+STExtendableSparseMatrixCSC(m,n,args...)=STExtendableSparseMatrixCSC{Float64,Int64}(m,n,args...)
+
+
+export ExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC, STExtendableSparseMatrixCSC, GenericMTExtendableSparseMatrixCSC
 export SparseMatrixLNK, ExtendableSparseMatrix,flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset!
 export partitioning!
 
diff --git a/src/matrix/abstractextendablesparsematrixcsc.jl b/src/matrix/abstractextendablesparsematrixcsc.jl
index bcf3a6a..491ebfb 100644
--- a/src/matrix/abstractextendablesparsematrixcsc.jl
+++ b/src/matrix/abstractextendablesparsematrixcsc.jl
@@ -297,4 +297,3 @@ function eliminate_dirichlet!(A::AbstractExtendableSparseMatrixCSC,dirichlet)
     eliminate_dirichlet!(sparse(A),dirichlet)
     A
 end
-
diff --git a/src/matrix/genericextendablesparsematrixcsc.jl b/src/matrix/genericextendablesparsematrixcsc.jl
new file mode 100644
index 0000000..c741283
--- /dev/null
+++ b/src/matrix/genericextendablesparsematrixcsc.jl
@@ -0,0 +1,91 @@
+mutable struct GenericExtendableSparseMatrixCSC{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti}
+    """
+    Final matrix data
+    """
+    cscmatrix::SparseMatrixCSC{Tv, Ti}
+    
+    """
+    Matrix for new entries
+    """
+    xmatrix::Tm
+end
+
+
+function GenericExtendableSparseMatrixCSC{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti<:Integer}
+    GenericExtendableSparseMatrixCSC(spzeros(Tv, Ti, m, n),
+                                 Tm(m,n)
+                                 )
+end
+
+
+function reset!(ext::GenericExtendableSparseMatrixCSC{Tm,Tv,Ti}) where {Tm,Tv,Ti}
+    m,n=size(ext.cscmatrix)
+    ext.cscmatrix=spzeros(Tv, Ti, m, n)
+    ext.xmatrix=Tm(m,n)
+    ext
+end
+
+
+function flush!(ext::GenericExtendableSparseMatrixCSC{Tm,Tv,Ti}) where{Tm,Tv,Ti}
+    if nnz(ext.xmatrix)>0
+        ext.cscmatrix=ext.xmatrix+ext.cscmatrix
+        ext.xmatrix=Tm(size(ext.cscmatrix)...)
+    end
+    ext
+end
+    
+function SparseArrays.sparse(ext::GenericExtendableSparseMatrixCSC)
+    flush!(ext)
+    ext.cscmatrix
+end
+
+function Base.setindex!(ext::GenericExtendableSparseMatrixCSC,
+                        v::Union{Number,AbstractVecOrMat},
+                        i::Integer,
+                        j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = v
+    else
+        setindex!(ext.xmatrix,v,i,j)
+    end
+end
+
+
+function Base.getindex(ext::GenericExtendableSparseMatrixCSC,
+                       i::Integer,
+                       j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k]
+    else
+        getindex(ext.xmatrix,i,j)
+    end
+end
+
+function rawupdateindex!(ext::GenericExtendableSparseMatrixCSC,
+                         op,
+                         v,
+                         i,
+                         j)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        rawupdateindex!(ext.xmatrix,op,v,i,j)
+    end
+end
+
+function updateindex!(ext::GenericExtendableSparseMatrixCSC,
+                         op,
+                         v,
+                         i,
+                         j)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        updateindex!(ext.xmatrix,op,v,i,j)
+    end
+end
+
diff --git a/src/matrix/genericmtextendablesparsematrixcsc.jl b/src/matrix/genericmtextendablesparsematrixcsc.jl
index 5e52fcd..91c805d 100644
--- a/src/matrix/genericmtextendablesparsematrixcsc.jl
+++ b/src/matrix/genericmtextendablesparsematrixcsc.jl
@@ -1,7 +1,7 @@
 mutable struct GenericMTExtendableSparseMatrixCSC{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti}
     """
     Final matrix data
-        """
+    """
     cscmatrix::SparseMatrixCSC{Tv, Ti}
 
     """
@@ -95,6 +95,24 @@ function rawupdateindex!(ext::GenericMTExtendableSparseMatrixCSC,
     end
 end
 
+
+function updateindex!(ext::GenericMTExtendableSparseMatrixCSC,
+                      op,
+                      v,
+                      i,
+                      j,
+                      tid=1)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        updateindex!(ext.xmatrices[tid],op,v,i,j)
+    end
+end
+
+
+
+
 # Needed in 1.9
 function Base.:*(ext::GenericMTExtendableSparseMatrixCSC{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA
     mul!(similar(x),ext,x)
diff --git a/src/matrix/sparsematrixdilnkc.jl b/src/matrix/sparsematrixdilnkc.jl
index d467c3d..a2cdea8 100644
--- a/src/matrix/sparsematrixdilnkc.jl
+++ b/src/matrix/sparsematrixdilnkc.jl
@@ -381,7 +381,8 @@ end
 
 Add SparseMatrixCSC matrix and [`SparseMatrixDILNKC`](@ref)  lnk, returning a SparseMatrixCSC
 """
-Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = add_directly(lnk, csc)
+#Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = add_directly(lnk, csc)
+Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = sum([lnk],csc)
 
 function Base.sum(lnkdictmatrices::Vector{SparseMatrixDILNKC{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
     lnew=sum(nnz,lnkdictmatrices)
@@ -395,14 +396,13 @@ function Base.sum(lnkdictmatrices::Vector{SparseMatrixDILNKC{Tv,Ti}}, cscmatrix:
         
         for icsc=1:length(colptr)-1
             for j=colptr[icsc]:colptr[icsc+1]-1
-                I[i]=icsc
-                J[i]=rowval[j]
+                I[i]=rowval[j]
+                J[i]=icsc
                 V[i]=nzval[j]
                 i=i+1
             end            
         end
 
-        ip=1
         for lnk in lnkdictmatrices
             for (j,k) in lnk.colstart
                 while k>0
@@ -413,8 +413,8 @@ function Base.sum(lnkdictmatrices::Vector{SparseMatrixDILNKC{Tv,Ti}}, cscmatrix:
                     i=i+1
                 end
             end
-            ip=ip+1
         end
+        @assert l==i-1
         @static if VERSION>=v"1.10"
             return SparseArrays.sparse!(I,J,V,m,n,+)
         else

From 4437971ca1754fa120d92d646cbff7aff17aea1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Tue, 2 Jul 2024 23:54:32 +0200
Subject: [PATCH 40/44] additional methods for VoronoiFVM: nnznew

---
 src/ExtendableSparse.jl                          | 2 +-
 src/matrix/extendable.jl                         | 3 ++-
 src/matrix/genericextendablesparsematrixcsc.jl   | 2 ++
 src/matrix/genericmtextendablesparsematrixcsc.jl | 3 +++
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index a1fd4e9..0dd8536 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -39,7 +39,7 @@ STExtendableSparseMatrixCSC(m,n,args...)=STExtendableSparseMatrixCSC{Float64,Int
 
 
 export ExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC, STExtendableSparseMatrixCSC, GenericMTExtendableSparseMatrixCSC
-export SparseMatrixLNK, ExtendableSparseMatrix,flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset!
+export SparseMatrixLNK, ExtendableSparseMatrix,flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset!, nnznew
 export partitioning!
 
 export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet
diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl
index a9debf7..d4c35b5 100644
--- a/src/matrix/extendable.jl
+++ b/src/matrix/extendable.jl
@@ -182,7 +182,8 @@ function rawupdateindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti},
                          op,
                          v,
                          i,
-                         j) where {Tv, Ti <: Integer}
+                         j,
+                         part=1) where {Tv, Ti <: Integer}
     k = findindex(ext.cscmatrix, i, j)
     if k > 0
         ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
diff --git a/src/matrix/genericextendablesparsematrixcsc.jl b/src/matrix/genericextendablesparsematrixcsc.jl
index c741283..457413e 100644
--- a/src/matrix/genericextendablesparsematrixcsc.jl
+++ b/src/matrix/genericextendablesparsematrixcsc.jl
@@ -18,6 +18,8 @@ function GenericExtendableSparseMatrixCSC{Tm, Tv, Ti}(m::Integer,n::Integer) whe
 end
 
 
+nnznew(ext::GenericExtendableSparseMatrixCSC)=nnz(ext.xmatrix)
+
 function reset!(ext::GenericExtendableSparseMatrixCSC{Tm,Tv,Ti}) where {Tm,Tv,Ti}
     m,n=size(ext.cscmatrix)
     ext.cscmatrix=spzeros(Tv, Ti, m, n)
diff --git a/src/matrix/genericmtextendablesparsematrixcsc.jl b/src/matrix/genericmtextendablesparsematrixcsc.jl
index 91c805d..88a4d68 100644
--- a/src/matrix/genericmtextendablesparsematrixcsc.jl
+++ b/src/matrix/genericmtextendablesparsematrixcsc.jl
@@ -81,6 +81,9 @@ function Base.getindex(ext::GenericMTExtendableSparseMatrixCSC,
     end
 end
 
+nnznew(ext::GenericMTExtendableSparseMatrixCSC)=sum(nnz,ext.xmatrices)
+
+
 function rawupdateindex!(ext::GenericMTExtendableSparseMatrixCSC,
                          op,
                          v,

From 90dc3d739d62a8e30d4aee9129c411748fcec223 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Sun, 7 Jul 2024 23:02:22 +0200
Subject: [PATCH 41/44] Restructure tests ExtndableGrids v1.9

---
 .gitignore                                    |  2 +-
 src/experimental/Experimental.jl              | 37 -------------------
 .../sparsematrixdict.jl                       |  0
 test/ExperimentalParallel.jl                  |  2 +-
 test/Project.toml                             |  2 +-
 test/runtests.jl                              | 17 +++++----
 ...erimentalXParallel.jl => test_parallel.jl} | 14 +++----
 7 files changed, 19 insertions(+), 55 deletions(-)
 rename src/{experimental => matrix}/sparsematrixdict.jl (100%)
 rename test/{ExperimentalXParallel.jl => test_parallel.jl} (93%)

diff --git a/.gitignore b/.gitignore
index 3ea90ad..068167e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 QUARRY
 docs/build
 *~
-Manifest.toml
+Manifest*.toml
 .repl_history
 quarry
 
diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
index dbf14cd..71995de 100644
--- a/src/experimental/Experimental.jl
+++ b/src/experimental/Experimental.jl
@@ -39,43 +39,6 @@ export     ILUAMPreconditioner,    PILUAMPreconditioner
 export     reorderlinsys, nnz_noflush
 
 
-include("sparsematrixdict.jl")
-export SparseMatrixDict
-
-include("sparsematrixlnkx.jl")
-export SparseMatrixLNKX
-
-include("sparsematrixlnkdict.jl")
-export SparseMatrixLNKDict
-
-include("extendablesparsematrixscalar.jl")
-export ExtendableSparseMatrixScalar
-
-const ExtendableSparseMatrixDict{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixDict{Tv,Ti},Tv,Ti}
-export ExtendableSparseMatrixDict
-
-
-const ExtendableSparseMatrixLNKDict{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNKDict{Tv,Ti},Tv,Ti}
-export ExtendableSparseMatrixLNKDict
-
-const ExtendableSparseMatrixLNKX{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNKX{Tv,Ti},Tv,Ti}
-export ExtendableSparseMatrixLNKX
-
-const ExtendableSparseMatrixLNK{Tv,Ti}=ExtendableSparseMatrixScalar{SparseMatrixLNK{Tv,Ti},Tv,Ti}
-export ExtendableSparseMatrixLNK
-
-
-const ExtendableSparseMatrixParallelDict{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixDict{Tv,Ti},Tv,Ti}
-ExtendableSparseMatrixParallelDict(m,n,p)= ExtendableSparseMatrixParallelDict{Float64,Int64}(m,n,p)
-export ExtendableSparseMatrixParallelDict
-
-const ExtendableSparseMatrixParallelLNKX{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixLNKX{Tv,Ti},Tv,Ti}
-ExtendableSparseMatrixParallelLNKX(m,n,p)= ExtendableSparseMatrixParallelLNKX{Float64,Int64}(m,n,p)
-export ExtendableSparseMatrixParallelLNKX
-
-const ExtendableSparseMatrixParallelLNKDict{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixLNKDict{Tv,Ti},Tv,Ti}
-ExtendableSparseMatrixParallelLNKDict(m,n,p)= ExtendableSparseMatrixParallelLNKDict{Float64,Int64}(m,n,p)
-export ExtendableSparseMatrixParallelLNKDict
 
 end
 
diff --git a/src/experimental/sparsematrixdict.jl b/src/matrix/sparsematrixdict.jl
similarity index 100%
rename from src/experimental/sparsematrixdict.jl
rename to src/matrix/sparsematrixdict.jl
diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallel.jl
index 7fe1029..45b05a9 100644
--- a/test/ExperimentalParallel.jl
+++ b/test/ExperimentalParallel.jl
@@ -195,7 +195,7 @@ function partassemble!(A,X,Y,nt=1;d=0.1)
 end
 
 
-function partassemble!(A::Union{ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict,ExtendableSparseMatrixParallelLNKX},X,Y,nt=1;d=0.1, reset=true)
+function partassemble!(A::Union{MTExtendableSparseMatrixCSC},X,Y,nt=1;d=0.1, reset=true)
     Nx=length(X)
     Ny=length(Y)
 
diff --git a/test/Project.toml b/test/Project.toml
index 97f6793..9ef7608 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -21,5 +21,5 @@ Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [compat]
-ExtendableGrids = "1.8"
+ExtendableGrids = "1.9"
 IterativeSolvers = "0.9"
diff --git a/test/runtests.jl b/test/runtests.jl
index 01bdce2..3af3ce4 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,19 +10,20 @@ using MultiFloats
 using ForwardDiff
 
 
-@testset "ExperimentalXParallel" begin
-    include("ExperimentalXParallel.jl")
-    for Tm in [ExtendableSparseMatrixLNK,ExtendableSparseMatrixLNKX,ExtendableSparseMatrixDict,ExtendableSparseMatrixLNKDict]
+@testset "Parallel" begin
+    include("test_parallel.jl")
+
+    for Tm in [STExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC, ExtendableSparseMatrix]
         for N in [10000,20000]
-            ExperimentalXParallel.test_correctness_build_seq(N,Tm, dim=2)
+            test_parallel.test_correctness_build_seq(N,Tm, dim=2)
         end
     end
 
-    for Tm in [MTExtendableSparseMatrixCSC,ExtendableSparseMatrixParallelDict,ExtendableSparseMatrixParallelLNKDict]
+    for Tm in [MTExtendableSparseMatrixCSC]
         for N in [10000,20000]
-            ExperimentalXParallel.test_correctness_update(N,Tm, dim=2)
-            ExperimentalXParallel.test_correctness_build(N,Tm, dim=2)
-            ExperimentalXParallel.test_correctness_mul(N,Tm,dim=2)
+            test_parallel.test_correctness_update(N,Tm, dim=2)
+            test_parallel.test_correctness_build(N,Tm, dim=2)
+            test_parallel.test_correctness_mul(N,Tm,dim=2)
         end
     end
 end
diff --git a/test/ExperimentalXParallel.jl b/test/test_parallel.jl
similarity index 93%
rename from test/ExperimentalXParallel.jl
rename to test/test_parallel.jl
index 128d93b..de26229 100644
--- a/test/ExperimentalXParallel.jl
+++ b/test/test_parallel.jl
@@ -1,4 +1,4 @@
-module ExperimentalXParallel
+module test_parallel
 
 using ExtendableSparse, SparseArrays, ExtendableSparse.Experimental
 using BenchmarkTools
@@ -53,7 +53,7 @@ function test_correctness_update(N,
         # Reset the nonzeros, keeping the structure intact
         nonzeros(A) .= 0
         # Parallel assembly whith np threads
-        pgrid = partition(grid, Tp(; npart=np))
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
         reset!(A, np)
         @show num_partitions_per_color(pgrid)
         testassemble_parallel!(A, pgrid)
@@ -81,7 +81,7 @@ function test_correctness_build(N,
     for np in allnp
         # Make a new matrix and assemble parallel.
         # this should result in the same nonzeros
-        pgrid = partition(grid, Tp(; npart=np))
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
         A = Tm(nnodes, nnodes, num_partitions(pgrid))
         @show num_partitions_per_color(pgrid)
         @test check_partitioning(pgrid)
@@ -103,7 +103,7 @@ function test_correctness_mul(N,
     b = rand(nnodes)
     A0b = A0 * b
     for np in allnp
-        pgrid = partition(grid, Tp(; npart=np))
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
         @test check_partitioning(pgrid)
         A = Tm(nnodes, nnodes, num_partitions(pgrid))
         partitioning!(A, pgrid[PColorPartitions],
@@ -135,7 +135,7 @@ function speedup_update(N,
     for np in allnp
         # Get the parallel timing
         # During setup, set matrix entries to zero while keeping  the structure
-        pgrid = partition(grid, Tp(; npart=np))
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
         @show num_partitions_per_color(pgrid)
         reset!(A, num_partitions(pgrid))
         testassemble_parallel!(A, pgrid)
@@ -171,7 +171,7 @@ function speedup_build(N,
     for np in allnp
         # Get the parallel timing
         # During setup, reset matrix to empty state.
-        pgrid = partition(grid, Tp(; npart=np))
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
         reset!(A, num_partitions(pgrid))
         @show num_partitions_per_color(pgrid)
         t = @belapsed testassemble_parallel!($A, $pgrid) seconds = 1 setup = (reset!($A,
@@ -198,7 +198,7 @@ function speedup_mul(N,
     result = []
     A = Tm(nnodes, nnodes, 1)
     for np in allnp
-        pgrid = partition(grid, Tp(; npart=np))
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
         @show num_partitions_per_color(pgrid)
         reset!(A, num_partitions(pgrid))
         testassemble_parallel!(A, pgrid)

From f0f0f2b39ca7fe21994362969dbc11afa283d2b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Sun, 7 Jul 2024 23:06:42 +0200
Subject: [PATCH 42/44] remove experimental code which has been moved to matrix

---
 .../extendablesparsematrixparallel.jl         | 131 -----
 .../extendablesparsematrixscalar.jl           |  77 ---
 src/experimental/sparsematrixlnkdict.jl       | 461 ------------------
 src/experimental/sparsematrixlnkx.jl          | 448 -----------------
 4 files changed, 1117 deletions(-)
 delete mode 100644 src/experimental/extendablesparsematrixparallel.jl
 delete mode 100644 src/experimental/extendablesparsematrixscalar.jl
 delete mode 100644 src/experimental/sparsematrixlnkdict.jl
 delete mode 100644 src/experimental/sparsematrixlnkx.jl

diff --git a/src/experimental/extendablesparsematrixparallel.jl b/src/experimental/extendablesparsematrixparallel.jl
deleted file mode 100644
index cd66f92..0000000
--- a/src/experimental/extendablesparsematrixparallel.jl
+++ /dev/null
@@ -1,131 +0,0 @@
-mutable struct ExtendableSparseMatrixXParallel{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti}
-    """
-    Final matrix data
-    """
-    cscmatrix::SparseMatrixCSC{Tv, Ti}
-
-    """
-        Vector of dictionaries for new entries
-    """
-    xmatrices::Vector{Tm}
-
-    colparts::Vector{Ti}
-    partnodes::Vector{Ti}
-end
-
-
-function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m,p::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti}
-    
-    ExtendableSparseMatrixXParallel(spzeros(Tv, Ti, m, n),
-                                    [Tm(m,n) for i=1:p],
-                                    Ti[1,2],
-                                    Ti[1,n+1],
-                                    )
-end
-
-function partitioning!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}, colparts, partnodes) where {Tm, Tv, Ti}
-    ext.partnodes=partnodes
-    ext.colparts=colparts
-    ext
-end
-
-function ExtendableSparseMatrixXParallel{Tm, Tv, Ti}(n,m, pc::Vector) where{Tm, Tv, Ti}
-    ext=ExtendableSparseMatrixXParallel(m,n,length(pc))
-end
-
-
-function reset!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti},p::Integer) where {Tm,Tv,Ti}
-    m,n=size(ext.cscmatrix)
-    ext.cscmatrix=spzeros(Tv, Ti, m, n)
-    ext.xmatrices=[Tm(m,n) for i=1:p]
-    ext.colparts=Ti[1,2]
-    ext.partnodes=Ti[1,n+1]
-    ext
-end
-
-function reset!(ext::ExtendableSparseMatrixXParallel)
-    reset!(ext,length(ext.xmatrices))
-end
-
-
-function flush!(ext::ExtendableSparseMatrixXParallel{Tm,Tv,Ti}) where{Tm,Tv,Ti}
-    ext.cscmatrix=Base.sum(ext.xmatrices, ext.cscmatrix)
-    np=length(ext.xmatrices)
-    (m,n)=size(ext.cscmatrix)
-    ext.xmatrices=[Tm(m,n) for i=1:np]
-    ext
-end
-
-
-function SparseArrays.sparse(ext::ExtendableSparseMatrixXParallel)
-    flush!(ext)
-    ext.cscmatrix
-end
-
-
-
-function Base.setindex!(ext::ExtendableSparseMatrixXParallel,
-                        v::Union{Number,AbstractVecOrMat},
-                        i::Integer,
-                        j::Integer)
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        ext.cscmatrix.nzval[k] = v
-    else
-        error("use rawupdateindex! for new entries into ExtendableSparseMatrixXParallel")
-    end
-end
-
-
-function Base.getindex(ext::ExtendableSparseMatrixXParallel,
-                       i::Integer,
-                       j::Integer)
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        return ext.cscmatrix.nzval[k]
-    elseif sum(nnz,ext.xmatrices) == 0
-        return zero(eltype(ext.cscmatrix))
-    else
-        error("flush! ExtendableSparseMatrixXParallel before using getindex")
-    end
-end
-
-function rawupdateindex!(ext::ExtendableSparseMatrixXParallel,
-                         op,
-                         v,
-                         i,
-                         j,
-                         tid)
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
-    else
-        rawupdateindex!(ext.xmatrices[tid],op,v,i,j)
-    end
-end
-
-# Needed in 1.9
-function Base.:*(ext::ExtendableSparse.Experimental.ExtendableSparseMatrixXParallel{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA
-    mul!(similar(x),ext,x)
-end
-
-function LinearAlgebra.mul!(r, ext::ExtendableSparseMatrixXParallel, x)
-    flush!(ext)
-    A=ext.cscmatrix
-    colparts=ext.colparts
-    partnodes=ext.partnodes
-    rows = SparseArrays.rowvals(A)
-    vals = nonzeros(A)
-    r.=zero(eltype(ext))
-    m,n=size(A)
-    for icol=1:length(colparts)-1
-        @tasks for ip in colparts[icol]:colparts[icol+1]-1
-            @inbounds for inode in  partnodes[ip]:partnodes[ip+1]-1
-                @inbounds for i in nzrange(A,inode)
-                    r[rows[i]]+=vals[i]*x[inode]
-                end
-            end
-        end
-    end
-    r
-end
diff --git a/src/experimental/extendablesparsematrixscalar.jl b/src/experimental/extendablesparsematrixscalar.jl
deleted file mode 100644
index 36c1dfa..0000000
--- a/src/experimental/extendablesparsematrixscalar.jl
+++ /dev/null
@@ -1,77 +0,0 @@
-mutable struct ExtendableSparseMatrixScalar{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti}
-    """
-    Final matrix data
-    """
-    cscmatrix::SparseMatrixCSC{Tv, Ti}
-    
-    """
-    Matrix for new entries
-    """
-    xmatrix::Tm
-end
-
-
-function ExtendableSparseMatrixScalar{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti<:Integer}
-    ExtendableSparseMatrixScalar(spzeros(Tv, Ti, m, n),
-                                 Tm(m,n)
-                                 )
-end
-
-
-function reset!(ext::ExtendableSparseMatrixScalar{Tm,Tv,Ti}) where {Tm,Tv,Ti}
-    m,n=size(ext.cscmatrix)
-    ext.cscmatrix=spzeros(Tv, Ti, m, n)
-    ext.xmatrix=Tm(m,n)
-    ext
-end
-
-
-function flush!(ext::ExtendableSparseMatrixScalar{Tm,Tv,Ti}) where{Tm,Tv,Ti}
-    if nnz(ext.xmatrix)>0
-        ext.cscmatrix=ext.xmatrix+ext.cscmatrix
-        ext.xmatrix=Tm(size(ext.cscmatrix)...)
-    end
-    ext
-end
-    
-function SparseArrays.sparse(ext::ExtendableSparseMatrixScalar)
-    flush!(ext)
-    ext.cscmatrix
-end
-
-function Base.setindex!(ext::ExtendableSparseMatrixScalar,
-                        v::Union{Number,AbstractVecOrMat},
-                        i::Integer,
-                        j::Integer)
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        ext.cscmatrix.nzval[k] = v
-    else
-        setindex!(ext.xmatrix,v,i,j)
-    end
-end
-
-
-function Base.getindex(ext::ExtendableSparseMatrixScalar,
-                       i::Integer,
-                       j::Integer)
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        ext.cscmatrix.nzval[k]
-    else
-        getindex(ext.xmatrix,i,j)
-    end
-end
-
-function rawupdateindex!(ext::ExtendableSparseMatrixScalar,
-                         op,
-                         v,
-                         i,
-                         j)
-    k = findindex(ext.cscmatrix, i, j)
-    if k > 0
-        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
-    else
-        rawupdateindex!(ext.xmatrix,op,v,i,j)
-    end
-end
diff --git a/src/experimental/sparsematrixlnkdict.jl b/src/experimental/sparsematrixlnkdict.jl
deleted file mode 100644
index 1d3d8e4..0000000
--- a/src/experimental/sparsematrixlnkdict.jl
+++ /dev/null
@@ -1,461 +0,0 @@
-"""
-    $(TYPEDEF)
-
-Modification of SparseMatrixLNK where the pointer to first index of
-column j is stored in a dictionary.
-"""
-mutable struct SparseMatrixLNKDict{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti}
-    """
-    Number of rows
-    """
-    m::Ti
-
-    """
-    Number of columns
-    """
-    n::Ti
-
-    """
-    Number of nonzeros
-    """
-    nnz::Ti
-
-    """
-    Length of arrays
-    """
-    nentries::Ti
-
-    """
-    Linked list of column entries. Initial length is n,
-    it grows with each new entry.
-
-    colptr[index] contains the next
-    index in the list or zero, in the later case terminating the list which
-    starts at index 1<=j<=n for each column j.
-    """
-    colptr::Vector{Ti}
-
-    """
-    Dictionary to store start indices of columns
-    """
-    colstart::Dict{Ti,Ti}
-
-    """
-    Row numbers. For each index it contains the zero (initial state)
-    or the row numbers corresponding to the column entry list in colptr.
-    """
-    rowval::Vector{Ti}
-
-    """
-    Nonzero entry values correspondin to each pair
-    (colptr[index],rowval[index])
-    """
-    nzval::Vector{Tv}
-end
-
-"""
-$(SIGNATURES)
-    
-Constructor of empty matrix.
-"""
-function SparseMatrixLNKDict{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
-    SparseMatrixLNKDict{Tv, Ti}(m, n, 0, 0,  zeros(Ti,10), Dict{Ti,Ti}(), zeros(Ti,10), zeros(Ti,10))
-end
-
-"""
-$(SIGNATURES)
-    
-Constructor of empty matrix.
-"""
-function SparseMatrixLNKDict(valuetype::Type{Tv}, indextype::Type{Ti}, m,
-                         n) where {Tv, Ti <: Integer}
-    SparseMatrixLNKDict{Tv, Ti}(m, n)
-end
-
-"""
-$(SIGNATURES)
-    
-Constructor of empty matrix.
-"""
-SparseMatrixLNKDict(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixLNKDict(Tv, Int, m, n)
-
-"""
-$(SIGNATURES)
-    
-Constructor of empty matrix.
-"""
-SparseMatrixLNKDict(m, n) = SparseMatrixLNKDict(Float64, m, n)
-
-"""
-$(SIGNATURES)
-    
-Constructor from SparseMatrixCSC.
-
-"""
-function SparseMatrixLNKDict(csc::SparseArrays.SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <:
-                                                                               Integer}
-    lnk = SparseMatrixLNKDict{Tv, Ti}(csc.m, csc.n)
-    for j = 1:(csc.n)
-        for k = csc.colptr[j]:(csc.colptr[j + 1] - 1)
-            lnk[csc.rowval[k], j] = csc.nzval[k]
-        end
-    end
-    lnk
-end
-
-function findindex(lnk::SparseMatrixLNKDict, i, j)
-    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
-        throw(BoundsError(lnk, (i, j)))
-    end
-
-    k = get(lnk.colstart, j, 0)
-    if k==0
-        return 0,0
-    end
-    k0 = k
-    while k > 0
-        if lnk.rowval[k] == i
-            return k, 0
-        end
-        k0 = k
-        k = lnk.colptr[k]
-    end
-    return 0, k0
-end
-
-"""
-$(SIGNATURES)
-    
-Return value stored for entry or zero if not found
-"""
-function Base.getindex(lnk::SparseMatrixLNKDict{Tv, Ti}, i, j) where {Tv, Ti}
-    k, k0 = findindex(lnk, i, j)
-    if k == 0
-        return zero(Tv)
-    else
-        return lnk.nzval[k]
-    end
-end
-
-function addentry!(lnk::SparseMatrixLNKDict, i, j, k, k0)
-    # increase number of entries
-    lnk.nentries += 1
-    if length(lnk.nzval) < lnk.nentries
-        newsize = Int(ceil(5.0 * lnk.nentries / 4.0))
-        resize!(lnk.nzval, newsize)
-        resize!(lnk.rowval, newsize)
-        resize!(lnk.colptr, newsize)
-    end
-    
-    if k0==0
-        lnk.colstart[j]=lnk.nentries
-    end
-
-    # Append entry if not found
-    lnk.rowval[lnk.nentries] = i
-
-    # Shift the end of the list
-    lnk.colptr[lnk.nentries] = 0
-
-    if k0>0
-        lnk.colptr[k0] = lnk.nentries
-    end
-    
-    # Update number of nonzero entries
-    lnk.nnz += 1
-    return lnk.nentries
-end
-
-"""
-$(SIGNATURES)
-    
-Update value of existing entry, otherwise extend matrix if v is nonzero.
-"""
-function Base.setindex!(lnk::SparseMatrixLNKDict, v, i, j)
-    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
-        throw(BoundsError(lnk, (i, j)))
-    end
-
-    k, k0 = findindex(lnk, i, j)
-    if k > 0
-        lnk.nzval[k] = v
-        return lnk
-    end
-    if !iszero(v)
-        k = addentry!(lnk, i, j, k, k0)
-        lnk.nzval[k] = v
-    end
-    return lnk
-end
-
-"""
-$(SIGNATURES)
-
-Update element of the matrix  with operation `op`. 
-It assumes that `op(0,0)==0`. If `v` is zero, no new 
-entry is created.
-"""
-function updateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti}
-    k, k0 = findindex(lnk, i, j)
-    if k > 0
-        lnk.nzval[k] = op(lnk.nzval[k], v)
-        return lnk
-    end
-    if !iszero(v)
-        k = addentry!(lnk, i, j, k, k0)
-        lnk.nzval[k] = op(zero(Tv), v)
-    end
-    lnk
-end
-
-"""
-$(SIGNATURES)
-
-Update element of the matrix  with operation `op`. 
-It assumes that `op(0,0)==0`. If `v` is zero a new entry
-is created nevertheless.
-"""
-function rawupdateindex!(lnk::SparseMatrixLNKDict{Tv, Ti}, op, v, i, j) where {Tv, Ti}
-    k, k0 = findindex(lnk, i, j)
-    if k > 0
-        lnk.nzval[k] = op(lnk.nzval[k], v)
-    else
-        k = addentry!(lnk, i, j, k, k0)
-        lnk.nzval[k] = op(zero(Tv), v)
-    end
-    lnk
-end
-
-"""
-$(SIGNATURES)
-
-Return tuple containing size of the matrix.
-"""
-Base.size(lnk::SparseMatrixLNKDict) = (lnk.m, lnk.n)
-
-"""
-$(SIGNATURES)
-
-Return number of nonzero entries.
-"""
-SparseArrays.nnz(lnk::SparseMatrixLNKDict) = lnk.nnz
-
-
-"""
-    $(SIGNATURES)
-Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V.
-"""
-function add_via_COO(lnk::SparseMatrixLNKDict{Tv, Ti},
-                     csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
-    (;colptr,nzval,rowval,m,n)=csc
-    l=nnz(lnk)+nnz(csc)
-    I=Vector{Ti}(undef,l)
-    J=Vector{Ti}(undef,l)
-    V=Vector{Tv}(undef,l)
-    i=1
-    if nnz(csc)>0
-        for icsc=1:length(colptr)-1
-            for j=colptr[icsc]:colptr[icsc+1]-1
-                I[i]=icsc
-                J[i]=rowval[j]
-                V[i]=nzval[j]
-                i=i+1
-            end            
-        end
-    end
-    for (j,k) in lnk.colstart
-        while k>0
-            I[i]=lnk.rowval[k]
-            J[i]=j
-            V[i]=lnk.nzval[k]
-            k=lnk.colptr[k]
-            i=i+1
-        end
-    end
-    @static if VERSION>=v"1.10"
-        return SparseArrays.sparse!(I,J,V,m,n,+)
-    else
-        return SparseArrays.sparse(I,J,V,m,n,+)
-    end
-end
-
-
-"""
-    $(SIGNATURES)
-Add lnk and csc without creation of intermediate data.
-"""
-function add_directly(lnk::SparseMatrixLNKDict{Tv, Ti},
-                      csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
-    @assert(csc.m==lnk.m)
-    @assert(csc.n==lnk.n)
-
-    # overallocate arrays in order to avoid
-    # presumably slower push!
-    xnnz = nnz(csc) + nnz(lnk)
-    colptr = Vector{Ti}(undef, csc.n + 1)
-    rowval = Vector{Ti}(undef, xnnz)
-    nzval = Vector{Tv}(undef, xnnz)
-
-    # Detect the maximum column length of lnk
-    lnk_maxcol = 0
-    for (j,k) in lnk.colstart
-        lcol = zero(Ti)
-        while k > 0
-            lcol += 1
-            k = lnk.colptr[k]
-        end
-        lnk_maxcol = max(lcol, lnk_maxcol)
-    end
-
-    # pre-allocate column  data
-    col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol]
-
-    inz = 1 # counts the nonzero entries in the new matrix
-
-    in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1])
-
-    in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col)
-
-    # loop over all columns
-    for j = 1:(csc.n)
-        # Copy extension entries into col and sort them
-        k = get(lnk.colstart, j, 0)
-        l_lnk_col = 0
-        while k > 0
-            if lnk.rowval[k] > 0
-                l_lnk_col += 1
-                col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k])
-            end
-            k = lnk.colptr[k]
-        end
-        sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward)
-
-        # jointly sort lnk and csc entries  into new matrix data
-        # this could be replaced in a more transparent manner by joint sorting:
-        # make a joint array for csc and lnk col, sort them.
-        # Will this be faster? 
-
-        colptr[j] = inz
-        jlnk = one(Ti) # counts the entries in col
-        jcsc = csc.colptr[j]  # counts entries in csc
-
-        while true
-            if in_csc_col(jcsc, j) &&
-               (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval ||
-                !in_lnk_col(jlnk, l_lnk_col))
-                # Insert entries from csc into new structure
-                rowval[inz] = csc.rowval[jcsc]
-                nzval[inz] = csc.nzval[jcsc]
-                jcsc += 1
-                inz += 1
-            elseif in_csc_col(jcsc, j) &&
-                   (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval)
-                # Add up entries from csc and lnk
-                rowval[inz] = csc.rowval[jcsc]
-                nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval
-                jcsc += 1
-                inz += 1
-                jlnk += 1
-            elseif in_lnk_col(jlnk, l_lnk_col)
-                # Insert entries from lnk res. col into new structure
-                rowval[inz] = col[jlnk].rowval
-                nzval[inz] = col[jlnk].nzval
-                jlnk += 1
-                inz += 1
-            else
-                break
-            end
-        end
-    end
-    colptr[csc.n + 1] = inz
-    resize!(rowval, inz - 1)
-    resize!(nzval, inz - 1)
-    SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval)
-end
-
-
-
-"""
-    $(SIGNATURES)
-
-Add SparseMatrixCSC matrix and [`SparseMatrixLNKDict`](@ref)  lnk, returning a SparseMatrixCSC
-"""
-Base.:+(lnk::SparseMatrixLNKDict, csc::SparseMatrixCSC) = add_directly(lnk, csc)
-
-function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    lnew=sum(nnz,lnkdictmatrices)
-    if lnew>0
-        (;colptr,nzval,rowval,m,n)=cscmatrix
-        l=lnew+nnz(cscmatrix)
-        I=Vector{Ti}(undef,l)
-        J=Vector{Ti}(undef,l)
-        V=Vector{Tv}(undef,l)
-        i=1
-        
-        for icsc=1:length(colptr)-1
-            for j=colptr[icsc]:colptr[icsc+1]-1
-                I[i]=icsc
-                J[i]=rowval[j]
-                V[i]=nzval[j]
-                i=i+1
-            end            
-        end
-
-        ip=1
-        for lnk in lnkdictmatrices
-            for (j,k) in lnk.colstart
-                while k>0
-                    I[i]=lnk.rowval[k]
-                    J[i]=j
-                    V[i]=lnk.nzval[k]
-                    k=lnk.colptr[k]
-                    i=i+1
-                end
-            end
-            ip=ip+1
-        end
-        @static if VERSION>=v"1.10"
-            return SparseArrays.sparse!(I,J,V,m,n,+)
-        else
-            return SparseArrays.sparse(I,J,V,m,n,+)
-        end
-    end
-    return cscmatrix
-end
-        
-function reset!(m::SparseMatrixLNKDict{Tv,Ti}) where {Tv,Ti}
-    m.nnz=0
-    m.nentries=0
-    m.colptr=zeros(Ti,10)
-    m.colstart::Dict{Ti,Ti}
-    m.rowval=zeros(Ti,10)
-    m.nzval=zeros(Ti,10)
-    m
-end
-
-
-"""
-$(SIGNATURES)
-    
-Constructor from SparseMatrixLNKDict.
-
-"""
-function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixLNKDict)::SparseMatrixCSC
-    csc = spzeros(lnk.m, lnk.n)
-    lnk + csc
-end
-
-function SparseArrays.sparse(lnk::SparseMatrixLNKDict)
-    lnk + spzeros(lnk.m, lnk.n)
-end
-
-function Base.copy(S::SparseMatrixLNKDict)
-    SparseMatrixLNKDict(size(S, 1),
-                        size(S, 2),
-                        S.nnz,
-                        S.nentries,
-                        copy(S.colptr),
-                        copy(S.colstart),
-                        copy(S.rowvals),
-                        copy(S.nzval))
-end
diff --git a/src/experimental/sparsematrixlnkx.jl b/src/experimental/sparsematrixlnkx.jl
deleted file mode 100644
index bd2cdcf..0000000
--- a/src/experimental/sparsematrixlnkx.jl
+++ /dev/null
@@ -1,448 +0,0 @@
-"""
-    $(TYPEDEF)
-
-Modification of SparseMatrixLNK where the pointer to first index of
-column j is stored in a dictionary.
-"""
-mutable struct SparseMatrixLNKX{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti}
-    """
-    Number of rows
-    """
-    m::Ti
-
-    """
-    Number of columns
-    """
-    n::Ti
-
-    """
-    Number of nonzeros
-    """
-    nnz::Ti
-
-    """
-    Length of arrays
-    """
-    nentries::Ti
-
-    """
-    Linked list of column entries. Initial length is n,
-    it grows with each new entry.
-
-    colptr[index] contains the next
-    index in the list or zero, in the later case terminating the list which
-    starts at index 1<=j<=n for each column j.
-    """
-    colptr::Vector{Ti}
-
-    """
-    Start indices of columns
-    """
-    colstart::Vector{Ti}
-
-    """
-    Row numbers. For each index it contains the zero (initial state)
-    or the row numbers corresponding to the column entry list in colptr.
-    """
-    rowval::Vector{Ti}
-
-    """
-    Nonzero entry values correspondin to each pair
-    (colptr[index],rowval[index])
-    """
-    nzval::Vector{Tv}
-end
-
-"""
-$(SIGNATURES)
-    
-Constructor of empty matrix.
-"""
-function SparseMatrixLNKX{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
-    SparseMatrixLNKX{Tv, Ti}(m, n, 0, 0,  zeros(Ti,10), zeros(Ti,n), zeros(Ti,10), zeros(Ti,10))
-end
-
-"""
-$(SIGNATURES)
-    
-Constructor of empty matrix.
-"""
-function SparseMatrixLNKX(valuetype::Type{Tv}, indextype::Type{Ti}, m,
-                         n) where {Tv, Ti <: Integer}
-    SparseMatrixLNKX{Tv, Ti}(m, n)
-end
-
-"""
-$(SIGNATURES)
-    
-Constructor of empty matrix.
-"""
-SparseMatrixLNKX(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixLNKX(Tv, Int, m, n)
-
-"""
-$(SIGNATURES)
-    
-Constructor of empty matrix.
-"""
-SparseMatrixLNKX(m, n) = SparseMatrixLNKX(Float64, m, n)
-
-
-function findindex(lnk::SparseMatrixLNKX, i, j)
-    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
-        throw(BoundsError(lnk, (i, j)))
-    end
-
-    k =lnk.colstart[j]
-    if k==0
-        return 0,0
-    end
-    k0 = k
-    while k > 0
-        if lnk.rowval[k] == i
-            return k, 0
-        end
-        k0 = k
-        k = lnk.colptr[k]
-    end
-    return 0, k0
-end
-
-"""
-$(SIGNATURES)
-    
-Return value stored for entry or zero if not found
-"""
-function Base.getindex(lnk::SparseMatrixLNKX{Tv, Ti}, i, j) where {Tv, Ti}
-    k, k0 = findindex(lnk, i, j)
-    if k == 0
-        return zero(Tv)
-    else
-        return lnk.nzval[k]
-    end
-end
-
-function addentry!(lnk::SparseMatrixLNKX, i, j, k, k0)
-    # increase number of entries
-    lnk.nentries += 1
-    if length(lnk.nzval) < lnk.nentries
-        newsize = Int(ceil(5.0 * lnk.nentries / 4.0))
-        resize!(lnk.nzval, newsize)
-        resize!(lnk.rowval, newsize)
-        resize!(lnk.colptr, newsize)
-    end
-    
-    if k0==0
-        lnk.colstart[j]=lnk.nentries
-    end
-
-    # Append entry if not found
-    lnk.rowval[lnk.nentries] = i
-
-    # Shift the end of the list
-    lnk.colptr[lnk.nentries] = 0
-
-    if k0>0
-        lnk.colptr[k0] = lnk.nentries
-    end
-    
-    # Update number of nonzero entries
-    lnk.nnz += 1
-    return lnk.nentries
-end
-
-"""
-$(SIGNATURES)
-    
-Update value of existing entry, otherwise extend matrix if v is nonzero.
-"""
-function Base.setindex!(lnk::SparseMatrixLNKX, v, i, j)
-    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
-        throw(BoundsError(lnk, (i, j)))
-    end
-
-    k, k0 = findindex(lnk, i, j)
-    if k > 0
-        lnk.nzval[k] = v
-        return lnk
-    end
-    if !iszero(v)
-        k = addentry!(lnk, i, j, k, k0)
-        lnk.nzval[k] = v
-    end
-    return lnk
-end
-
-"""
-$(SIGNATURES)
-
-Update element of the matrix  with operation `op`. 
-It assumes that `op(0,0)==0`. If `v` is zero, no new 
-entry is created.
-"""
-function updateindex!(lnk::SparseMatrixLNKX{Tv, Ti}, op, v, i, j) where {Tv, Ti}
-    k, k0 = findindex(lnk, i, j)
-    if k > 0
-        lnk.nzval[k] = op(lnk.nzval[k], v)
-        return lnk
-    end
-    if !iszero(v)
-        k = addentry!(lnk, i, j, k, k0)
-        lnk.nzval[k] = op(zero(Tv), v)
-    end
-    lnk
-end
-
-"""
-$(SIGNATURES)
-
-Update element of the matrix  with operation `op`. 
-It assumes that `op(0,0)==0`. If `v` is zero a new entry
-is created nevertheless.
-"""
-function rawupdateindex!(lnk::SparseMatrixLNKX{Tv, Ti}, op, v, i, j) where {Tv, Ti}
-    k, k0 = findindex(lnk, i, j)
-    if k > 0
-        lnk.nzval[k] = op(lnk.nzval[k], v)
-    else
-        k = addentry!(lnk, i, j, k, k0)
-        lnk.nzval[k] = op(zero(Tv), v)
-    end
-    lnk
-end
-
-"""
-$(SIGNATURES)
-
-Return tuple containing size of the matrix.
-"""
-Base.size(lnk::SparseMatrixLNKX) = (lnk.m, lnk.n)
-
-"""
-$(SIGNATURES)
-
-Return number of nonzero entries.
-"""
-SparseArrays.nnz(lnk::SparseMatrixLNKX) = lnk.nnz
-
-"""
-$(SIGNATURES)
-
-Dummy flush! method for SparseMatrixLNKX. Just
-used in test methods
-"""
-function flush!(lnk::SparseMatrixLNKX{Tv, Ti}) where {Tv, Ti}
-    return lnk
-end
-
-"""
-    $(SIGNATURES)
-Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V.
-"""
-function add_via_COO(lnk::SparseMatrixLNKX{Tv, Ti},
-                     csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
-    (;colptr,nzval,rowval,m,n)=csc
-    l=nnz(lnk)+nnz(csc)
-    I=Vector{Ti}(undef,l)
-    J=Vector{Ti}(undef,l)
-    V=Vector{Tv}(undef,l)
-    i=1
-    if nnz(csc)>0
-        for icsc=1:length(colptr)-1
-            for j=colptr[icsc]:colptr[icsc+1]-1
-                I[i]=icsc
-                J[i]=rowval[j]
-                V[i]=nzval[j]
-                i=i+1
-            end            
-        end
-    end
-    for j=1:n
-        k=lnk.colstart[j]
-        while k>0
-            I[i]=lnk.rowval[k]
-            J[i]=j
-            V[i]=lnk.nzval[k]
-            k=lnk.colptr[k]
-            i=i+1
-        end
-    end
-    @static if VERSION>=v"1.10"
-        return SparseArrays.sparse!(I,J,V,m,n,+)
-    else
-        return SparseArrays.sparse(I,J,V,m,n,+)
-    end
-end
-
-
-"""
-    $(SIGNATURES)
-Add lnk and csc without creation of intermediate data.
-"""
-function add_directly(lnk::SparseMatrixLNKX{Tv, Ti},
-                      csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
-    @assert(csc.m==lnk.m)
-    @assert(csc.n==lnk.n)
-
-    # overallocate arrays in order to avoid
-    # presumably slower push!
-    xnnz = nnz(csc) + nnz(lnk)
-    colptr = Vector{Ti}(undef, csc.n + 1)
-    rowval = Vector{Ti}(undef, xnnz)
-    nzval = Vector{Tv}(undef, xnnz)
-
-    # Detect the maximum column length of lnk
-    lnk_maxcol = 0
-    for j=1:lnk.n
-        k=lnk.colstart[j]
-        lcol = zero(Ti)
-        while k > 0
-            lcol += 1
-            k = lnk.colptr[k]
-        end
-        lnk_maxcol = max(lcol, lnk_maxcol)
-    end
-
-    # pre-allocate column  data
-    col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol]
-
-    inz = 1 # counts the nonzero entries in the new matrix
-
-    in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1])
-
-    in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col)
-
-    # loop over all columns
-    for j = 1:(csc.n)
-        # Copy extension entries into col and sort them
-        k = lnk.colstart[j]
-        l_lnk_col = 0
-        while k > 0
-            if lnk.rowval[k] > 0
-                l_lnk_col += 1
-                col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k])
-            end
-            k = lnk.colptr[k]
-        end
-        sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward)
-
-        # jointly sort lnk and csc entries  into new matrix data
-        # this could be replaced in a more transparent manner by joint sorting:
-        # make a joint array for csc and lnk col, sort them.
-        # Will this be faster? 
-
-        colptr[j] = inz
-        jlnk = one(Ti) # counts the entries in col
-        jcsc = csc.colptr[j]  # counts entries in csc
-
-        while true
-            if in_csc_col(jcsc, j) &&
-               (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval ||
-                !in_lnk_col(jlnk, l_lnk_col))
-                # Insert entries from csc into new structure
-                rowval[inz] = csc.rowval[jcsc]
-                nzval[inz] = csc.nzval[jcsc]
-                jcsc += 1
-                inz += 1
-            elseif in_csc_col(jcsc, j) &&
-                   (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval)
-                # Add up entries from csc and lnk
-                rowval[inz] = csc.rowval[jcsc]
-                nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval
-                jcsc += 1
-                inz += 1
-                jlnk += 1
-            elseif in_lnk_col(jlnk, l_lnk_col)
-                # Insert entries from lnk res. col into new structure
-                rowval[inz] = col[jlnk].rowval
-                nzval[inz] = col[jlnk].nzval
-                jlnk += 1
-                inz += 1
-            else
-                break
-            end
-        end
-    end
-    colptr[csc.n + 1] = inz
-    resize!(rowval, inz - 1)
-    resize!(nzval, inz - 1)
-    SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval)
-end
-
-
-
-"""
-    $(SIGNATURES)
-
-Add SparseMatrixCSC matrix and [`SparseMatrixLNKX`](@ref)  lnk, returning a SparseMatrixCSC
-"""
-Base.:+(lnk::SparseMatrixLNKX, csc::SparseMatrixCSC) = add_directly(lnk, csc)
-
-function Base.sum(lnkdictmatrices::Vector{SparseMatrixLNKX{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    lnew=sum(nnz,lnkdictmatrices)
-    if lnew>0
-        (;colptr,nzval,rowval,m,n)=cscmatrix
-        l=lnew+nnz(cscmatrix)
-        I=Vector{Ti}(undef,l)
-        J=Vector{Ti}(undef,l)
-        V=Vector{Tv}(undef,l)
-        i=1
-        
-        for icsc=1:length(colptr)-1
-            for j=colptr[icsc]:colptr[icsc+1]-1
-                I[i]=icsc
-                J[i]=rowval[j]
-                V[i]=nzval[j]
-                i=i+1
-            end            
-        end
-
-        ip=1
-        for lnk in lnkdictmatrices
-            for j=1:n
-                k=lnk.colstart[j]
-                while k>0
-                    I[i]=lnk.rowval[k]
-                    J[i]=j
-                    V[i]=lnk.nzval[k]
-                    k=lnk.colptr[k]
-                    i=i+1
-                end
-            end
-            ip=ip+1
-        end
-        @static if VERSION>=v"1.10"
-            return SparseArrays.sparse!(I,J,V,m,n,+)
-        else
-            return SparseArrays.sparse(I,J,V,m,n,+)
-        end
-    end
-    return cscmatrix
-end
-        
-
-
-"""
-$(SIGNATURES)
-    
-Constructor from SparseMatrixLNKX.
-
-"""
-function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixLNKX)::SparseMatrixCSC
-    csc = spzeros(lnk.m, lnk.n)
-    lnk + csc
-end
-
-function SparseArrays.sparse(lnk::SparseMatrixLNKX)
-    lnk + spzeros(lnk.m, lnk.n)
-end
-
-function Base.copy(S::SparseMatrixLNKX)
-    SparseMatrixLNKX(size(S, 1),
-                        size(S, 2),
-                        S.nnz,
-                        S.nentries,
-                        copy(S.colptr),
-                        copy(S.colstart),
-                        copy(S.rowvals),
-                        copy(S.nzval))
-end

From 41d23ccfda4fe1cf5f4f00e8b2ce7044cefca428 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Sun, 7 Jul 2024 23:25:34 +0200
Subject: [PATCH 43/44] ExplicitImports

---
 Project.toml            |  2 +-
 src/ExtendableSparse.jl | 37 +++++++++++--------------------------
 test/Project.toml       |  2 ++
 test/runtests.jl        |  5 +++++
 4 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/Project.toml b/Project.toml
index 0372ecb..546b08a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -40,7 +40,7 @@ Pardiso = "0.5.1"
 Requires = "1.1.3"
 Sparspak = "0.3.6"
 StaticArrays = "1.5.24"
-julia = "1.6"
+julia = "1.9"
 
 [extras]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 0dd8536..d348d00 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -1,13 +1,17 @@
 module ExtendableSparse
-using SparseArrays,StaticArrays
-using LinearAlgebra
-using Sparspak
-using ILUZero
+
+using DocStringExtensions: DocStringExtensions, SIGNATURES, TYPEDEF,TYPEDFIELDS
+using ILUZero: ILUZero, ldiv!, nnz
 using OhMyThreads: @tasks
+using LinearAlgebra: LinearAlgebra, Diagonal, Hermitian, Symmetric, Tridiagonal,
+    cholesky, cholesky!, convert, lu!, mul!, norm, transpose
+using SparseArrays: SparseArrays, AbstractSparseMatrix, SparseMatrixCSC,
+    dropzeros!, findnz, nzrange, sparse, spzeros
+using Sparspak: Sparspak, sparspaklu, sparspaklu!
+using StaticArrays: StaticArrays, SMatrix, SVector
+using SuiteSparse: SuiteSparse
+import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros
 
-if  !isdefined(Base, :get_extension)
-    using Requires
-end
 
 # Define our own constant here in order to be able to
 # test things at least a little bit..
@@ -17,9 +21,7 @@ if USE_GPL_LIBS
     using SuiteSparse
 end
 
-using DocStringExtensions
 
-import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros
 
 include("matrix/sparsematrixcsc.jl")
 include("matrix/abstractsparsematrixextension.jl")
@@ -71,23 +73,6 @@ export AbstractFactorization, LUFactorization, CholeskyFactorization, SparspakLU
 export issolver
 export factorize!, update!
 
-@static if  !isdefined(Base, :get_extension)
-    function __init__()
-        @require Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2"  begin
-            include("../ext/ExtendableSparsePardisoExt.jl")
-        end
-        @require IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895"  begin
-            include("../ext/ExtendableSparseIncompleteLUExt.jl")
-        end
-        @require AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c" begin
-            include("../ext/ExtendableSparseAlgebraicMultigridExt.jl")
-        end
-        @require AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" begin
-            include("../ext/ExtendableSparseAMGCLWrapExt.jl")
-        end
-    end
-end
-
 """
 ```
 ILUTPreconditioner(;droptol=1.0e-3)
diff --git a/test/Project.toml b/test/Project.toml
index 9ef7608..76e3ae8 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,8 +1,10 @@
 [deps]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
 AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c"
+Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
+ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
 ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f"
diff --git a/test/runtests.jl b/test/runtests.jl
index 3af3ce4..b154d1d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -8,7 +8,12 @@ using BenchmarkTools
 
 using MultiFloats
 using ForwardDiff
+using ExplicitImports
 
+@testset "ExplicitImports" begin
+    @test ExplicitImports.check_no_implicit_imports(ExtendableSparse, allow_unanalyzable=(ExtendableSparse.Experimental,)) === nothing
+    @test ExplicitImports.check_no_stale_explicit_imports(ExtendableSparse, allow_unanalyzable=(ExtendableSparse.Experimental,)) === nothing
+end
 
 @testset "Parallel" begin
     include("test_parallel.jl")

From 24f73ab641398d0c2853d966938df06570cd19cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Fuhrmann?= <juergen-fuhrmann@web.de>
Date: Tue, 16 Jul 2024 22:47:20 +0200
Subject: [PATCH 44/44] fix some docstrings, AMGCLWrap dependency

---
 Project.toml                                |  2 +-
 docs/src/internal.md                        | 11 ++++++++++
 src/matrix/abstractsparsematrixextension.jl | 23 +++++++--------------
 src/matrix/sparsematrixdilnkc.jl            | 10 +++++++++
 4 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/Project.toml b/Project.toml
index 546b08a..5bf880c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -31,7 +31,7 @@ ExtendableSparseIncompleteLUExt = "IncompleteLU"
 ExtendableSparsePardisoExt = "Pardiso"
 
 [compat]
-AMGCLWrap = "0.3.1,0.4"
+AMGCLWrap = "0.4"
 AlgebraicMultigrid = "0.4,0.5,0.6"
 DocStringExtensions = "0.8, 0.9"
 ILUZero = "0.2"
diff --git a/docs/src/internal.md b/docs/src/internal.md
index 853dbce..71a10f3 100644
--- a/docs/src/internal.md
+++ b/docs/src/internal.md
@@ -13,6 +13,17 @@ Pages = ["sparsematrixlnk.jl"]
 Modules = [ExtendableSparse]
 Pages = ["sparsematrixcsc.jl"]
 ```
+## New API 
+Under development - aimed at multithreading
+```@autodocs
+Modules = [ExtendableSparse]
+Pages = ["abstractsparsematrixextension.jl",
+    "abstractextendablesparsematrixcsc.jl",
+    "sparsematrixdilnkc.jl",
+    "genericextendablesparsematrixcsc.jl",
+    "genericmtextendablesparsematrixcsc.jl"]
+```
+
 
 ## Misc methods
 
diff --git a/src/matrix/abstractsparsematrixextension.jl b/src/matrix/abstractsparsematrixextension.jl
index d8070fc..c206483 100644
--- a/src/matrix/abstractsparsematrixextension.jl
+++ b/src/matrix/abstractsparsematrixextension.jl
@@ -5,22 +5,13 @@ Abstract type for sparse matrix extension.
 
 Subtypes T_ext must implement:
 
-Constructor T_ext(m,n)
-SparseArrays.nnz(ext::T_ext)
-Base.size(ext::T_ext)
-
-
-Base.sum(extmatrices::Vector{T_ext}, csx)
-  - Add csx matrix and extension matrices (one per partition) and return csx matrix
-
-rawupdateindex!(ext::Text, op, v, i, j) where {Tv, Ti}
-  - Set ext[i,j]+=v, possibly insert entry into matrix.
-
-
-Optional:
-
-Base.+(ext::T_ext, csx)
-  - Add extension matrix and csc/csr matrix, return csx matrix
+- Constructor `T_ext(m,n)`
+- `SparseArrays.nnz(ext::T_ext)`
+- `Base.size(ext::T_ext)`
+- `Base.sum(extmatrices::Vector{T_ext}, csx)`:  add csr or csc matrix and extension matrices (one per partition) and return csx matrix
+- `Base.+(ext::T_ext, csx)` (optional)  - Add extension matrix and csc/csr matrix, return csx matrix
+- `rawupdateindex!(ext::Text, op, v, i, j, tid) where {Tv, Ti}`: Set `ext[i,j]op=v`, possibly insert new entry into matrix. `tid` is a
+task or partition id
 
 """
 abstract type AbstractSparseMatrixExtension{Tv, Ti} <: AbstractSparseMatrix{Tv,Ti} end
diff --git a/src/matrix/sparsematrixdilnkc.jl b/src/matrix/sparsematrixdilnkc.jl
index a2cdea8..ea58534 100644
--- a/src/matrix/sparsematrixdilnkc.jl
+++ b/src/matrix/sparsematrixdilnkc.jl
@@ -103,6 +103,11 @@ function SparseMatrixDILNKC(csc::SparseArrays.SparseMatrixCSC{Tv, Ti}) where {Tv
     lnk
 end
 
+"""
+$(SIGNATURES)
+    
+Find index in matrix.
+"""
 function findindex(lnk::SparseMatrixDILNKC, i, j)
     if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
         throw(BoundsError(lnk, (i, j)))
@@ -137,6 +142,11 @@ function Base.getindex(lnk::SparseMatrixDILNKC{Tv, Ti}, i, j) where {Tv, Ti}
     end
 end
 
+"""
+    $(SIGNATURES)
+
+Add entry.
+"""
 function addentry!(lnk::SparseMatrixDILNKC, i, j, k, k0)
     # increase number of entries
     lnk.nentries += 1