diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
index 02aa07d..5458760 100644
--- a/.JuliaFormatter.toml
+++ b/.JuliaFormatter.toml
@@ -1,4 +1,7 @@
-style = "sciml"
+style = "yas"
 always_for_in = false
-separate_kwargs_with_semicolon = true
 format_markdown = true
+import_to_using = false
+pipe_to_function_call = false
+short_to_long_function_def = false
+always_use_return = false
\ No newline at end of file
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2ff5995..882d933 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,15 +15,26 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
+          - '1.9' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
           - '1'   # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
           - 'nightly'
         os:
           - ubuntu-latest
-          - macos-latest
           - windows-latest
+          - macos-latest # arm
+          - macOS-13 # intel
         arch:
           - x64
+          - aarch64
+        exclude:
+          - os: ubuntu-latest
+            arch: aarch64
+          - os: windows-latest
+            arch: aarch64
+          - os: macOS-13 
+            arch: aarch64
+          - os: macos-latest
+            arch: x64
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v1
@@ -42,6 +53,8 @@ jobs:
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
+        env:
+          JULIA_NUM_THREADS: 4
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v3
   docs:
diff --git a/.gitignore b/.gitignore
index 3ea90ad..068167e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 QUARRY
 docs/build
 *~
-Manifest.toml
+Manifest*.toml
 .repl_history
 quarry
 
diff --git a/Project.toml b/Project.toml
index 65f17f1..5bf880c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,13 +1,15 @@
 name = "ExtendableSparse"
 uuid = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3"
 authors = ["Juergen Fuhrmann <juergen.fuhrmann@wias-berlin.de>"]
-version = "1.4.1"
+version = "1.5.0"
 
 [deps]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
+OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
@@ -29,7 +31,7 @@ ExtendableSparseIncompleteLUExt = "IncompleteLU"
 ExtendableSparsePardisoExt = "Pardiso"
 
 [compat]
-AMGCLWrap = "0.3.1,0.4"
+AMGCLWrap = "0.4"
 AlgebraicMultigrid = "0.4,0.5,0.6"
 DocStringExtensions = "0.8, 0.9"
 ILUZero = "0.2"
@@ -38,7 +40,7 @@ Pardiso = "0.5.1"
 Requires = "1.1.3"
 Sparspak = "0.3.6"
 StaticArrays = "1.5.24"
-julia = "1.6"
+julia = "1.9"
 
 [extras]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
diff --git a/docs/make.jl b/docs/make.jl
index 3a70bf5..5dbe7c4 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -5,6 +5,7 @@ function mkdocs()
     makedocs(; sitename = "ExtendableSparse.jl",
              modules = [ExtendableSparse],
              doctest = false,
+             warnonly = true,
              clean = false,
              authors = "J. Fuhrmann",
              repo = "https://github.com/j-fu/ExtendableSparse.jl",
diff --git a/docs/src/internal.md b/docs/src/internal.md
index 853dbce..71a10f3 100644
--- a/docs/src/internal.md
+++ b/docs/src/internal.md
@@ -13,6 +13,17 @@ Pages = ["sparsematrixlnk.jl"]
 Modules = [ExtendableSparse]
 Pages = ["sparsematrixcsc.jl"]
 ```
+## New API 
+Under development - aimed at multithreading
+```@autodocs
+Modules = [ExtendableSparse]
+Pages = ["abstractsparsematrixextension.jl",
+    "abstractextendablesparsematrixcsc.jl",
+    "sparsematrixdilnkc.jl",
+    "genericextendablesparsematrixcsc.jl",
+    "genericmtextendablesparsematrixcsc.jl"]
+```
+
 
 ## Misc methods
 
diff --git a/src/ExtendableSparse.jl b/src/ExtendableSparse.jl
index 700bbda..d348d00 100644
--- a/src/ExtendableSparse.jl
+++ b/src/ExtendableSparse.jl
@@ -1,12 +1,17 @@
 module ExtendableSparse
-using SparseArrays,StaticArrays
-using LinearAlgebra
-using Sparspak
-using ILUZero
 
-if  !isdefined(Base, :get_extension)
-    using Requires
-end
+using DocStringExtensions: DocStringExtensions, SIGNATURES, TYPEDEF,TYPEDFIELDS
+using ILUZero: ILUZero, ldiv!, nnz
+using OhMyThreads: @tasks
+using LinearAlgebra: LinearAlgebra, Diagonal, Hermitian, Symmetric, Tridiagonal,
+    cholesky, cholesky!, convert, lu!, mul!, norm, transpose
+using SparseArrays: SparseArrays, AbstractSparseMatrix, SparseMatrixCSC,
+    dropzeros!, findnz, nzrange, sparse, spzeros
+using Sparspak: Sparspak, sparspaklu, sparspaklu!
+using StaticArrays: StaticArrays, SMatrix, SVector
+using SuiteSparse: SuiteSparse
+import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros
+
 
 # Define our own constant here in order to be able to
 # test things at least a little bit..
@@ -17,58 +22,57 @@ if USE_GPL_LIBS
 end
 
 
-using DocStringExtensions
-
-import SparseArrays: AbstractSparseMatrixCSC, rowvals, getcolptr, nonzeros
 
 include("matrix/sparsematrixcsc.jl")
+include("matrix/abstractsparsematrixextension.jl")
 include("matrix/sparsematrixlnk.jl")
+include("matrix/sparsematrixdilnkc.jl")
+include("matrix/abstractextendablesparsematrixcsc.jl")
 include("matrix/extendable.jl")
+include("matrix/genericmtextendablesparsematrixcsc.jl")
+include("matrix/genericextendablesparsematrixcsc.jl")
+
+const ExtendableSparseMatrix=ExtendableSparseMatrixCSC
+const MTExtendableSparseMatrixCSC{Tv,Ti}=GenericMTExtendableSparseMatrixCSC{SparseMatrixDILNKC{Tv,Ti},Tv,Ti}
+MTExtendableSparseMatrixCSC(m,n,args...)=MTExtendableSparseMatrixCSC{Float64,Int64}(m,n,args...)
+
+const STExtendableSparseMatrixCSC{Tv,Ti}=GenericExtendableSparseMatrixCSC{SparseMatrixDILNKC{Tv,Ti},Tv,Ti}
+STExtendableSparseMatrixCSC(m,n,args...)=STExtendableSparseMatrixCSC{Float64,Int64}(m,n,args...)
 
-export SparseMatrixLNK,
-       ExtendableSparseMatrix, flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse
+
+export ExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC, STExtendableSparseMatrixCSC, GenericMTExtendableSparseMatrixCSC
+export SparseMatrixLNK, ExtendableSparseMatrix,flush!, nnz, updateindex!, rawupdateindex!, colptrs, sparse, reset!, nnznew
+export partitioning!
 
 export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet
 
 include("factorizations/factorizations.jl")
 
+include("experimental/Experimental.jl")
+
+include("factorizations/simple_iteration.jl")
+export simple, simple!
+
+include("matrix/sprand.jl")
+export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark
+
+export rawupdateindex!, updateindex!
+
+
+
+
 export JacobiPreconditioner,
     ILU0Preconditioner,
     ILUZeroPreconditioner,
     PointBlockILUZeroPreconditioner,
     ParallelJacobiPreconditioner,
     ParallelILU0Preconditioner,
-    BlockPreconditioner,allow_views,
-    reorderlinsys
+    BlockPreconditioner,allow_views
 
 export AbstractFactorization, LUFactorization, CholeskyFactorization, SparspakLU
 export issolver
 export factorize!, update!
 
-include("factorizations/simple_iteration.jl")
-export simple, simple!
-
-include("matrix/sprand.jl")
-export sprand!, sprand_sdd!, fdrand, fdrand!, fdrand_coo, solverbenchmark
-
-
-@static if  !isdefined(Base, :get_extension)
-    function __init__()
-        @require Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2"  begin
-            include("../ext/ExtendableSparsePardisoExt.jl")
-        end
-        @require IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895"  begin
-            include("../ext/ExtendableSparseIncompleteLUExt.jl")
-        end
-        @require AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c" begin
-            include("../ext/ExtendableSparseAlgebraicMultigridExt.jl")
-        end
-        @require AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288" begin
-            include("../ext/ExtendableSparseAMGCLWrapExt.jl")
-        end
-    end
-end
-
 """
 ```
 ILUTPreconditioner(;droptol=1.0e-3)
diff --git a/src/experimental/Experimental.jl b/src/experimental/Experimental.jl
new file mode 100644
index 0000000..71995de
--- /dev/null
+++ b/src/experimental/Experimental.jl
@@ -0,0 +1,44 @@
+module Experimental
+using ExtendableSparse, SparseArrays
+using LinearAlgebra
+using SparseArrays: AbstractSparseMatrixCSC
+import SparseArrays: nonzeros, getcolptr,nzrange
+import ExtendableSparse: flush!, reset!, rawupdateindex!, findindex
+using ExtendableSparse: ColEntry, AbstractPreconditioner, @makefrommatrix, phash
+using ExtendableSparse:  AbstractExtendableSparseMatrixCSC, AbstractSparseMatrixExtension
+using DocStringExtensions
+using Metis
+using Base.Threads
+using OhMyThreads: @tasks
+import ExtendableSparse: factorize!, update!, partitioning!
+
+include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "ExtendableSparseParallel.jl"))
+
+include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "ilu_Al-Kurdi_Mittal.jl"))
+#using .ILUAM
+include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "pilu_Al-Kurdi_Mittal.jl"))
+#using .PILUAM
+
+include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel" ,"iluam.jl"))
+include(joinpath(@__DIR__, "ExtendableSparseMatrixParallel", "piluam.jl"))
+
+@eval begin
+    @makefrommatrix ILUAMPreconditioner
+    @makefrommatrix PILUAMPreconditioner
+end
+
+function factorize!(p::PILUAMPreconditioner, A::ExtendableSparseMatrixParallel)
+    p.A = A
+    update!(p)
+    p
+end
+                
+export ExtendableSparseMatrixParallel, SuperSparseMatrixLNK
+export addtoentry!, reset!, dummy_assembly!, preparatory_multi_ps_less_reverse, fr, addtoentry!,  compare_matrices_light
+export     ILUAMPreconditioner,    PILUAMPreconditioner
+export     reorderlinsys, nnz_noflush
+
+
+
+end
+
diff --git a/src/experimental/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl b/src/experimental/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
new file mode 100644
index 0000000..08268ee
--- /dev/null
+++ b/src/experimental/ExtendableSparseMatrixParallel/ExtendableSparseParallel.jl
@@ -0,0 +1,464 @@
+include("supersparse.jl")
+include("preparatory.jl")
+#include("prep_time.jl")
+
+mutable struct ExtendableSparseMatrixParallel{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti}
+    """
+    Final matrix data
+    """
+    cscmatrix::SparseMatrixCSC{Tv, Ti}
+
+    """
+    Linked list structures holding data of extension, one for each thread
+    """
+    lnkmatrices::Vector{SuperSparseMatrixLNK{Tv, Ti}}
+
+    """
+    Number of Nodes per Threads
+    """
+	nnts::Vector{Ti}
+    
+    """
+    sortednodesperthread[i,j] = local index of the j-th global column in the i-th LNK matrix
+    (this is used e.g. when assembling the matrix)
+    """
+    sortednodesperthread::Matrix{Ti}
+    
+    """
+    depth+1 x nn matrix,
+    old_noderegions[i,j] = region in which node j is, in level i 
+    old refers to the fact that j is the 'old index' (i.e. grid index, not matrix index, see 'new_indices')
+    """
+    old_noderegions::Matrix{Ti}
+    
+    """
+    cellsforpart[i] is a vector containing all cells in the i-th region
+    cellsforpart has length nt*depth + 1
+    """
+    cellsforpart::Vector{Vector{Ti}}
+    
+    """
+    globalindices[i][j] = index in the global (ESMP & CSC) matrix of the j-th column of the i-th LNK matrix
+    (this maps the local indices (in the LNKs) to the global indices (ESMP & CSC)) 
+    """
+    globalindices::Vector{Vector{Ti}}
+    
+    """
+    For some applications such as the parallel ILU preconditioner, a block form is necessary.
+    Thus, the columns are reordered and the A[i,i] does not correspond to the i-th node of the grid,
+    but A[new_indices[i], new_indices[i]] does
+    """
+    new_indices::Vector{Ti}
+    
+    """
+    Reverse: rev_new_indices[new_indices[i]] = i, for all i
+    """
+    rev_new_indices::Vector{Ti}
+    
+    """
+    starts[i] gives the first column of the i-th region, i.e. starts[1] = 1
+    starts has length nt*depth + 1
+    """
+    start::Vector{Ti}
+
+    """
+    cellparts[i] = region of the i-th cell
+    """
+    cellparts::Vector{Ti}
+    
+    """
+    Number of threads
+    """
+    nt::Ti
+    
+    """
+    How often is the separator partitioned? (if never: depth = 1)
+    """
+    depth::Ti
+
+    phash::UInt64
+
+    """
+    Number of rows / number of nodes in grid 
+    """
+    n::Ti
+
+    """
+    Number of columns / number of nodes in grid (only works for square matrices)
+    """
+    m::Ti
+    
+    
+end
+
+
+"""
+$(SIGNATURES)
+
+`ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct = true) where {Tv, Ti <: Integer}`
+
+Create an ExtendableSparseMatrixParallel based on a grid.
+The grid is specified by nc (number of cells), nn (number of nodes) and the `mat_cell_node` (i.e. grid[CellNodes] if ExtendableGrids is used). 
+Here, `mat_cell_node[k,i]` is the i-th node in the k-th cell.
+The matrix structure is made for parallel computations with `nt` threads.
+`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again
+`block_struct=true` means, the matrix should be reordered two have a block structure, this is necessary for parallel ILU, for `false`, the matrix is not reordered 
+"""
+function ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct = true) where {Tv, Ti <: Integer}
+	nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, depth = preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth, Ti; block_struct)
+	csc = spzeros(Tv, Ti, nn, nn)
+	lnk = [SuperSparseMatrixLNK{Tv, Ti}(nn, nnts[tid]) for tid=1:nt]
+	ExtendableSparseMatrixParallel{Tv, Ti}(csc, lnk, nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, nt, depth, phash(csc), csc.n, csc.m)
+end
+
+
+"""
+$(SIGNATURES)
+
+`addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}`
+
+`A[i,j] += v`
+This function should be used, if the thread in which the entry appears is known (`tid`).
+If the thread is not known, use `addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false)`, this function calculates `tid`.
+If you know that the entry is not yet known to the CSC structure, set `known_that_unknown=true`.
+"""
+function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}
+	if known_that_unknown
+		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
+		return
+	end
+	
+	if updatentryCSC2!(A.cscmatrix, i, j, v)
+	else
+		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
+	end
+end
+
+
+"""
+$(SIGNATURES)
+
+`addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer}`
+
+A[i,j] += v, using any partition.
+If the partition should be specified (for parallel use), use 
+`function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, tid, v; known_that_unknown=false) where {Tv, Ti <: Integer}`.
+"""
+function addtoentry!(A::ExtendableSparseMatrixParallel{Tv, Ti}, i, j, v; known_that_unknown=false) where {Tv, Ti <: Integer}
+	if known_that_unknown
+		level, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]])
+		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
+		return
+	end
+	
+	if updatentryCSC2!(A.cscmatrix, i, j, v)
+	else
+		_, tid = last_nz(A.old_noderegions[:, A.rev_new_indices[j]])
+		A.lnkmatrices[tid][i, A.sortednodesperthread[tid, j]] += v
+	end
+end
+
+#---------------------------------
+
+"""
+$(SIGNATURES)
+`updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j) where {Tv, Ti <: Integer`
+
+Update element of the matrix  with operation `op`.
+Use this method if the 'thread of the element' is not known, otherwise use `updateindex!(ext, op, v, i, j, tid)`. 
+"""
+function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                      op,
+                      v,
+                      i,
+                      j) where {Tv, Ti <: Integer}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+        return
+    else
+    	level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
+		updateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j])
+    end
+    ext
+end
+
+"""
+$(SIGNATURES)
+`updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j, tid) where {Tv, Ti <: Integer`
+
+Update element of the matrix  with operation `op`.
+Use this method if the 'thread of the element' is known, otherwise use `updateindex!(ext, op, v, i, j)`. 
+"""
+function updateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                      op,
+                      v,
+                      i,
+                      j,
+                      tid) where {Tv, Ti <: Integer}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+        return
+    else
+    	updateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j])
+    end
+    ext
+end
+
+"""
+$(SIGNATURES)
+`rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j) where {Tv, Ti <: Integer}`
+
+Like [`updateindex!`](@ref) but without checking if v is zero.
+Use this method if the 'thread of the element' is not known.
+"""
+function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                         op,
+                         v,
+                         i,
+                         j) where {Tv, Ti <: Integer}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
+	    rawupdateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j])
+    end
+    ext
+end
+
+"""
+$(SIGNATURES)
+`rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, op, v, i, j, tid) where {Tv, Ti <: Integer}`
+
+Like [`updateindex!`](@ref) but without checking if v is zero.
+Use this method if the 'thread of the element' is known
+"""
+function rawupdateindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                         op,
+                         v,
+                         i,
+                         j, 
+                         tid) where {Tv, Ti <: Integer}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        rawupdateindex!(ext.lnkmatrices[tid], op, v, i, ext.sortednodesperthread[tid, j])
+    end
+    ext
+end
+
+"""
+$(SIGNATURES)
+``Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti}, i::Integer, j::Integer) where {Tv, Ti <: Integer`
+
+Find index in CSC matrix and return value, if it exists.
+Otherwise, return value from extension.
+"""
+function Base.getindex(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                       i::Integer,
+                       j::Integer) where {Tv, Ti <: Integer}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        return ext.cscmatrix.nzval[k]
+    end
+    
+    level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
+	ext.lnkmatrices[tid][i, ext.sortednodesperthread[tid, j]]
+    
+end
+
+"""
+$(SIGNATURES)
+`Base.setindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti}, v::Union{Number,AbstractVecOrMat}, i::Integer, j::Integer) where {Tv, Ti}`
+
+Find index in CSC matrix and set value if it exists. Otherwise,
+set index in extension if `v` is nonzero.
+"""
+function Base.setindex!(ext::ExtendableSparseMatrixParallel{Tv, Ti},
+                        v::Union{Number,AbstractVecOrMat},
+                        i::Integer,
+                        j::Integer) where {Tv, Ti}
+    k = ExtendableSparse.findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = v
+    else
+		level, tid = last_nz(ext.old_noderegions[:, ext.rev_new_indices[j]])
+		#@info typeof(tid), typeof(j)
+		jj = ext.sortednodesperthread[tid, j]
+		ext.lnkmatrices[tid][i, jj] = v
+    end
+end
+
+
+
+#------------------------------------
+
+"""
+$(SIGNATURES)
+
+Reset matrix, such that CSC and LNK have no non-zero entries.
+"""
+function reset!(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer}
+	A.cscmatrix = spzeros(Tv, Ti, A.n, A.m)
+	A.lnkmatrices = [SuperSparseMatrixLNK{Tv, Ti}(A.n, A.nnts[tid]) for tid=1:A.nt]
+end
+
+"""
+$(SIGNATURES)
+
+Compute number of non-zero elements, after flush.
+"""
+function nnz_flush(ext::ExtendableSparseMatrixParallel)
+    flush!(ext)
+    return nnz(ext.cscmatrix)
+end
+
+"""
+$(SIGNATURES)
+
+Compute number of non-zero elements, without flush.
+"""
+function nnz_noflush(ext::ExtendableSparseMatrixParallel)
+    return nnz(ext.cscmatrix), sum([ext.lnkmatrices[i].nnz for i=1:ext.nt])
+end
+	
+function matrixindextype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer}
+	Ti
+end
+
+function matrixvaluetype(A::ExtendableSparseMatrixParallel{Tv, Ti}) where {Tv, Ti <: Integer}
+	Tv
+end
+
+
+"""
+$(SIGNATURES)
+
+Show matrix, without flushing
+"""
+function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrixParallel)
+    #flush!(ext)
+    xnnzCSC, xnnzLNK = nnz_noflush(ext)
+    m, n = size(ext)
+    print(io,
+          m,
+          "×",
+          n,
+          " ",
+          typeof(ext),
+          " with ",
+          xnnzCSC,
+          " stored ",
+          xnnzCSC == 1 ? "entry" : "entries",
+          " in CSC and ",
+          xnnzLNK,
+          " stored ",
+          xnnzLNK == 1 ? "entry" : "entries",
+          " in LNK. CSC:")
+
+    if !haskey(io, :compact)
+        io = IOContext(io, :compact => true)
+    end
+
+    if !(m == 0 || n == 0 || xnnzCSC == 0)
+        print(io, ":\n")
+        Base.print_array(IOContext(io), ext.cscmatrix)
+    end
+end
+
+"""
+`function entryexists2(CSC, i, j)`
+
+Find out if CSC already has an nonzero entry at i,j without any allocations
+"""
+function entryexists2(CSC, i, j) #find out if CSC already has an nonzero entry at i,j
+	#vals = 
+	#ids = CSC.colptr[j]:(CSC.colptr[j+1]-1)
+	i in view(CSC.rowval, CSC.colptr[j]:(CSC.colptr[j+1]-1))
+end
+
+"""
+$(SIGNATURES)
+
+Find out if i,j is non-zero entry in CSC, if yes, update entry with += v and return `true`, if not return `false`
+"""
+function updatentryCSC2!(CSC::SparseArrays.SparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer, v) where {Tv, Ti <: Integer}
+	p1 = CSC.colptr[j]
+	p2 = CSC.colptr[j+1]-1
+
+	searchk = searchsortedfirst(view(CSC.rowval, p1:p2), i) + p1 - 1
+	
+	if (searchk <= p2) && (CSC.rowval[searchk] == i)
+		CSC.nzval[searchk] += v
+		return true
+	else
+		return false
+	end
+end
+
+
+
+
+Base.size(A::ExtendableSparseMatrixParallel) = (A.cscmatrix.m, A.cscmatrix.n)
+include("struct_flush.jl")
+
+
+
+import LinearAlgebra.mul!
+
+"""
+$(SIGNATURES)
+```function LinearAlgebra.mul!(y, A, x)```
+
+This overwrites the mul! function for A::ExtendableSparseMatrixParallel
+"""
+function LinearAlgebra.mul!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv, Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer}
+    #@info "my matvec"
+    _, nnzLNK = nnz_noflush(A)
+    @assert nnzLNK == 0
+    #mul!(y, A.cscmatrix, x)
+    matvec!(y, A, x)
+end
+
+
+"""
+$(SIGNATURES)
+```function matvec!(y, A, x)```
+
+y <- A*x, where y and x are vectors and A is an ExtendableSparseMatrixParallel
+this computation is done in parallel, it has the same result as y = A.cscmatrix*x
+"""
+function matvec!(y::AbstractVector{Tv}, A::ExtendableSparseMatrixParallel{Tv,Ti}, x::AbstractVector{Tv}) where {Tv, Ti<:Integer}
+    nt = A.nt
+    depth = A.depth
+    colptr = A.cscmatrix.colptr
+    nzv = A.cscmatrix.nzval
+    rv  = A.cscmatrix.rowval
+
+    LinearAlgebra._rmul_or_fill!(y, 0.0)
+    
+    for level=1:depth
+        @threads for tid::Int64=1:nt
+            for col::Int64=A.start[(level-1)*nt+tid]:A.start[(level-1)*nt+tid+1]-1
+                for row::Int64=colptr[col]:colptr[col+1]-1 # in nzrange(A, col)
+                    y[rv[row]] += nzv[row]*x[col]
+                end
+            end
+        end
+    end
+
+
+
+    @threads for tid=1:1
+        for col::Int64=A.start[depth*nt+1]:A.start[depth*nt+2]-1
+            for row::Int64=colptr[col]:colptr[col+1]-1 #nzrange(A, col)
+                y[rv[row]] += nzv[row]*x[col]
+            end
+        end
+    end
+    
+    y
+end
diff --git a/src/experimental/ExtendableSparseMatrixParallel/ilu_Al-Kurdi_Mittal.jl b/src/experimental/ExtendableSparseMatrixParallel/ilu_Al-Kurdi_Mittal.jl
new file mode 100644
index 0000000..ad2207d
--- /dev/null
+++ b/src/experimental/ExtendableSparseMatrixParallel/ilu_Al-Kurdi_Mittal.jl
@@ -0,0 +1,267 @@
+#module ILUAM
+#using LinearAlgebra, SparseArrays
+
+import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
+
+#@info "ILUAM"
+
+mutable struct ILUAMPrecon{T,N}
+
+	diag::AbstractVector
+    nzval::AbstractVector
+	A::AbstractMatrix
+	
+end
+
+
+function iluAM!(ILU::ILUAMPrecon{Tv,Ti}, A::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <:Integer}
+	diag = ILU.diag
+	nzval = ILU.nzval
+
+	nzval = copy(A.nzval)
+	diag  = Vector{Ti}(undef, n)
+	ILU.A = A
+	colptr = A.colptr
+	rowval = A.rowval
+	n = A.n
+	point = zeros(Ti, n)
+
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1
+			if rowval[v] == j
+				diag[j] = v
+				break
+			end
+			#elseif rowval[v] 
+		end
+	end
+	
+	# compute L and U
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
+			point[rowval[v]] = v
+		end
+		
+		for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			#nzval[v] /= nzval[diag[i]]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		
+		for v=colptr[j]:colptr[j+1]-1
+			point[rowval[v]] = zero(Ti)
+		end
+	end
+
+end
+
+function iluAM(A::SparseMatrixCSC{Tv,Ti}) where {Tv, Ti <:Integer}
+	#@info "iluAM"
+	nzval = copy(A.nzval)
+	colptr = A.colptr
+	rowval = A.rowval
+	#nzval  = ILU.nzval
+	n = A.n # number of columns
+	point = zeros(Ti, n) #Vector{Ti}(undef, n)
+	diag  = Vector{Ti}(undef, n)
+	
+	# find diagonal entries
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1
+			if rowval[v] == j
+				diag[j] = v
+				break
+			end
+			#elseif rowval[v] 
+		end
+	end
+
+	#@info diag[1:20]'
+	#@info diag[end-20:end]'
+	
+	# compute L and U
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1  ## start at colptr[j]+1 ??
+			point[rowval[v]] = v
+		end
+		
+		for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			#nzval[v] /= nzval[diag[i]]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		
+		for v=colptr[j]:colptr[j+1]-1
+			point[rowval[v]] = zero(Ti)
+		end
+	end
+	#nzval, diag
+	ILUAMPrecon{Tv,Ti}(diag, nzval, A)
+end
+
+function forward_subst_old!(y, v, nzval, diag, A)
+	#@info "fso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])"
+	n      = A.n
+	colptr = A.colptr
+	rowval = A.rowval
+	
+	for i in eachindex(y)
+        y[i] = zero(Float64)
+    end
+
+	#y .= 0
+	@inbounds for j=1:n
+		y[j] += v[j]
+		for v=diag[j]+1:colptr[j+1]-1
+			y[rowval[v]] -= nzval[v]*y[j]
+		end
+	end
+	y
+end
+
+
+function backward_subst_old!(x, y, nzval, diag, A)
+	#@info "bso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])"
+	n      = A.n
+	colptr = A.colptr
+	rowval = A.rowval
+	@inbounds for j=n:-1:1
+		x[j] = y[j] / nzval[diag[j]] 
+		
+		for i=colptr[j]:diag[j]-1
+			y[rowval[i]] -= nzval[i]*x[j]
+		end
+		
+	end
+	x
+end
+
+
+function ldiv!(x, ILU::ILUAMPrecon, b)
+	#t = @elapsed begin
+	#@info "iluam ldiv 1"
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, A)
+	backward_subst_old!(x, y, nzval, diag, A)
+	#@info "ILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)), nnz(A) #, A[10,10]
+	#, b[1], x[1], y[1]#maximum(abs.(b)), maximum(abs.(x))
+	#end
+	#println("$t") #@info t
+	x
+end
+
+function ldiv!(ILU::ILUAMPrecon, b)
+	#@info "iluam ldiv 2"
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, A)
+	backward_subst_old!(b, y, nzval, diag, A)
+	b
+end
+
+function \(ilu::ILUAMPrecon{T,N}, b) where {T,N<:Integer}
+    x = copy(b)
+    ldiv!(x, ilu, b)
+	x
+end
+
+function nnz(ilu::ILUAMPrecon{T,N}) where {T,N<:Integer}
+    length(ilu.nzval)
+end
+
+#=
+function forward_subst!(y, v, ilu) #::ILUAMPrecon{T,N}) where {T,N<:Integer}
+	@info "fw"
+	n = ilu.A.n
+	nzval  = ilu.nzval
+	diag   = ilu.diag
+	colptr = ilu.A.colptr
+	rowval = ilu.A.rowval
+	
+	for i in eachindex(y)
+        y[i] = zero(Float64)
+    end
+
+	#y .= 0
+	@inbounds for j=1:n
+		y[j] += v[j]
+		for v=diag[j]+1:colptr[j+1]-1
+			y[rowval[v]] -= nzval[v]*y[j]
+		end
+	end
+	y
+end
+
+function backward_subst!(x, y, ilu) #::ILUAMPrecon{T,N}) where {T,N<:Integer}
+    @info "bw"
+	n = ilu.A.n
+	nzval = ilu.nzval
+	diag = ilu.diag
+	colptr = ilu.A.colptr
+	rowval = ilu.A.rowval
+	#wrk = copy(y)
+	@inbounds for j=n:-1:1
+		x[j] = y[j] / nzval[diag[j]] 
+		
+		for i=colptr[j]:diag[j]-1
+			y[rowval[i]] -= nzval[i]*x[j]
+		end
+		
+	end
+	x
+end
+
+function iluam_subst(ILU::ILUAMPrecon, b)
+	y = copy(b)
+	forward_subst!(y, b, ILU)
+	z = copy(b)
+	backward_subst!(z, y, ILU)
+	z
+end
+
+
+
+function iluam_subst_old(ILU::ILUAMPrecon, b)
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, A)
+	z = copy(b)
+	backward_subst_old!(z, y, nzval, diag, A)
+	#backward_subst!(z, y, ILU)
+	z
+end
+=#
+
+
+
+#end
\ No newline at end of file
diff --git a/src/experimental/ExtendableSparseMatrixParallel/iluam.jl b/src/experimental/ExtendableSparseMatrixParallel/iluam.jl
new file mode 100644
index 0000000..24b75be
--- /dev/null
+++ b/src/experimental/ExtendableSparseMatrixParallel/iluam.jl
@@ -0,0 +1,35 @@
+mutable struct ILUAMPreconditioner <: AbstractPreconditioner
+    A::ExtendableSparseMatrix
+    factorization::ILUAMPrecon
+    phash::UInt64
+    function ILUAMPreconditioner()
+        p = new()
+        p.phash = 0
+        p
+    end
+end
+
+"""
+```
+ILUAMPreconditioner()
+ILUAMPreconditioner(matrix)
+```
+Incomplete LU preconditioner with zero fill-in using ... . This preconditioner
+also calculates and stores updates to the off-diagonal entries and thus delivers better convergence than  the [`ILU0Preconditioner`](@ref).
+"""
+function ILUAMPreconditioner end
+
+function update!(p::ILUAMPreconditioner)
+    flush!(p.A)
+    if p.A.phash != p.phash
+        p.factorization = iluAM(p.A.cscmatrix)
+        p.phash=p.A.phash
+    else
+        iluam!(p.factorization, p.A.cscmatrix)
+    end
+    p
+end
+
+allow_views(::ILUAMPreconditioner)=true
+allow_views(::Type{ILUAMPreconditioner})=true
+
diff --git a/src/experimental/ExtendableSparseMatrixParallel/pilu_Al-Kurdi_Mittal.jl b/src/experimental/ExtendableSparseMatrixParallel/pilu_Al-Kurdi_Mittal.jl
new file mode 100644
index 0000000..ad9529b
--- /dev/null
+++ b/src/experimental/ExtendableSparseMatrixParallel/pilu_Al-Kurdi_Mittal.jl
@@ -0,0 +1,351 @@
+#module PILUAM
+#using Base.Threads
+#using LinearAlgebra, SparseArrays
+
+import LinearAlgebra.ldiv!, LinearAlgebra.\, SparseArrays.nnz 
+
+#@info "PILUAM"
+
+mutable struct PILUAMPrecon{T,N}
+
+	diag::AbstractVector
+    nzval::AbstractVector
+	A::AbstractMatrix
+	start::AbstractVector
+	nt::Integer
+	depth::Integer
+	
+end
+
+function use_vector_par(n, nt, Ti)
+	point = [Vector{Ti}(undef, n) for tid=1:nt]
+	@threads for tid=1:nt
+		point[tid] = zeros(Ti, n)
+	end
+	point
+end
+
+function compute_lu!(nzval, point, j0, j1, tid, rowval, colptr, diag, Ti)
+    for j=j0:j1-1
+        for v=colptr[j]:colptr[j+1]-1
+            point[tid][rowval[v]] = v
+        end
+        
+        for v=colptr[j]:diag[j]-1
+            i = rowval[v]
+            for w=diag[i]+1:colptr[i+1]-1
+                k = point[tid][rowval[w]]
+                if k>0
+                    nzval[k] -= nzval[v]*nzval[w]
+                end
+            end
+        end
+        
+        for v=diag[j]+1:colptr[j+1]-1
+            nzval[v] /= nzval[diag[j]]
+        end
+        
+        for v=colptr[j]:colptr[j+1]-1
+            point[tid][rowval[v]] = zero(Ti)
+        end
+    end
+end
+
+function piluAM!(ILU::PILUAMPrecon{Tv,Ti}, A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer}
+	#@info "piluAM!"
+	diag = ILU.diag
+	nzval = ILU.nzval
+	ILU.A = A
+	start = ILU.start
+	
+	ILU.nt = A.nt
+	nt = A.nt
+	
+	ILU.depth = A.depth
+	depth = A.depth
+	
+	
+	colptr = A.cscmatrix.colptr
+	rowval = A.cscmatrix.rowval
+	n = A.cscmatrix.n # number of columns
+	diag  = Vector{Ti}(undef, n)
+	nzval  = Vector{Tv}(undef, length(rowval)) #copy(A.nzval)
+	point = use_vector_par(n, A.nt, Int32)
+
+
+	@threads for tid=1:depth*nt+1
+		for j=start[tid]:start[tid+1]-1
+			for v=colptr[j]:colptr[j+1]-1
+				nzval[v] = A.cscmatrix.nzval[v]
+				if rowval[v] == j
+					diag[j] = v
+				end
+				#elseif rowval[v] 
+			end
+		end
+	end
+
+	for level=1:depth
+		@threads for tid=1:nt
+			for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1
+				for v=colptr[j]:colptr[j+1]-1
+					point[tid][rowval[v]] = v
+				end
+				
+				for v=colptr[j]:diag[j]-1
+					i = rowval[v]
+					for w=diag[i]+1:colptr[i+1]-1
+						k = point[tid][rowval[w]]
+						if k>0
+							nzval[k] -= nzval[v]*nzval[w]
+						end
+					end
+				end
+				
+				for v=diag[j]+1:colptr[j+1]-1
+					nzval[v] /= nzval[diag[j]]
+				end
+				
+				for v=colptr[j]:colptr[j+1]-1
+					point[tid][rowval[v]] = zero(Ti)
+				end
+			end
+		end
+	end
+	
+	#point = zeros(Ti, n) #Vector{Ti}(undef, n)
+	for j=start[depth*nt+1]:start[depth*nt+2]-1
+		for v=colptr[j]:colptr[j+1]-1
+			point[1][rowval[v]] = v
+		end
+		
+		for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[1][rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		for v=colptr[j]:colptr[j+1]-1
+			point[1][rowval[v]] = zero(Ti)
+		end
+	end
+
+end
+
+function piluAM(A::ExtendableSparseMatrixParallel{Tv,Ti}) where {Tv, Ti <:Integer}
+	start = A.start
+	nt = A.nt
+	depth = A.depth
+	
+	colptr = A.cscmatrix.colptr
+	rowval = A.cscmatrix.rowval
+	nzval  = Vector{Tv}(undef, length(rowval)) #copy(A.nzval)
+	n = A.cscmatrix.n # number of columns
+	diag  = Vector{Ti}(undef, n)
+	point = use_vector_par(n, A.nt, Int32)
+	
+	# find diagonal entries
+	#
+	
+	#=
+	for j=1:n
+		for v=colptr[j]:colptr[j+1]-1
+			nzval[v] = A.cscmatrix.nzval[v]
+			if rowval[v] == j
+				diag[j] = v
+				#break
+			end
+			#elseif rowval[v] 
+		end
+	end
+	=#
+
+
+
+	@threads for tid=1:depth*nt+1
+		for j=start[tid]:start[tid+1]-1
+			for v=colptr[j]:colptr[j+1]-1
+				nzval[v] = A.cscmatrix.nzval[v]
+				if rowval[v] == j
+					diag[j] = v
+				end
+				#elseif rowval[v] 
+			end
+		end
+	end
+
+
+	#@info diag[1:20]'
+	#@info diag[end-20:end]'	
+
+	for level=1:depth
+		@threads for tid=1:nt
+			for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1
+				for v=colptr[j]:colptr[j+1]-1
+					point[tid][rowval[v]] = v
+				end
+				
+				for v=colptr[j]:diag[j]-1
+					i = rowval[v]
+					for w=diag[i]+1:colptr[i+1]-1
+						k = point[tid][rowval[w]]
+						if k>0
+							nzval[k] -= nzval[v]*nzval[w]
+						end
+					end
+				end
+				
+				for v=diag[j]+1:colptr[j+1]-1
+					nzval[v] /= nzval[diag[j]]
+				end
+				
+				for v=colptr[j]:colptr[j+1]-1
+					point[tid][rowval[v]] = zero(Ti)
+				end
+			end
+		end
+	end
+	
+	#point = zeros(Ti, n) #Vector{Ti}(undef, n)
+	for j=start[depth*nt+1]:start[depth*nt+2]-1
+		for v=colptr[j]:colptr[j+1]-1
+			point[1][rowval[v]] = v
+		end
+		
+		for v=colptr[j]:diag[j]-1
+			i = rowval[v]
+			for w=diag[i]+1:colptr[i+1]-1
+				k = point[1][rowval[w]]
+				if k>0
+					nzval[k] -= nzval[v]*nzval[w]
+				end
+			end
+		end
+		
+		for v=diag[j]+1:colptr[j+1]-1
+			nzval[v] /= nzval[diag[j]]
+		end
+		
+		for v=colptr[j]:colptr[j+1]-1
+			point[1][rowval[v]] = zero(Ti)
+		end
+	end
+
+	#nzval, diag
+	PILUAMPrecon{Tv,Ti}(diag, nzval, A.cscmatrix, start, nt, depth)
+end
+
+function forward_subst_old!(y, v, nzval, diag, start, nt, depth, A)
+	#@info "pfso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])"
+	#@info "fwo"
+	n = A.n
+	colptr = A.colptr
+	rowval = A.rowval
+	
+	y .= 0
+	
+	for level=1:depth
+		@threads for tid=1:nt
+			@inbounds for j=start[(level-1)*nt+tid]:start[(level-1)*nt+tid+1]-1
+				y[j] += v[j]
+				for v=diag[j]+1:colptr[j+1]-1
+					y[rowval[v]] -= nzval[v]*y[j]
+				end
+			end
+		end
+	end
+	
+	@inbounds for j=start[depth*nt+1]:start[depth*nt+2]-1
+		y[j] += v[j]
+		for v=diag[j]+1:colptr[j+1]-1
+			y[rowval[v]] -= nzval[v]*y[j]
+		end
+	end
+	
+end
+
+
+function backward_subst_old!(x, y, nzval, diag, start, nt, depth, A)
+	#@info "pbso, $(sum(nzval)), $(sum(nzval.^2)), $(sum(diag)), $(A[1,1])"
+	
+	#@info "bwo"
+	n = A.n
+	colptr = A.colptr
+	rowval = A.rowval
+	#wrk = copy(y)
+	
+	
+	@inbounds for j=start[depth*nt+2]-1:-1:start[depth*nt+1]
+		x[j] = y[j] / nzval[diag[j]] 
+		
+		for i=colptr[j]:diag[j]-1
+			y[rowval[i]] -= nzval[i]*x[j]
+		end
+		
+	end
+	
+	for level=depth:-1:1
+		@threads for tid=1:nt
+			@inbounds for j=start[(level-1)*nt+tid+1]-1:-1:start[(level-1)*nt+tid]
+				x[j] = y[j] / nzval[diag[j]] 
+				for i=colptr[j]:diag[j]-1
+					y[rowval[i]] -= nzval[i]*x[j]
+				end
+			end
+		end
+	end
+
+end
+
+
+function ldiv!(x, ILU::PILUAMPrecon, b)
+	#@info "piluam ldiv 1"
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A
+    start = ILU.start
+    nt    = ILU.nt
+    depth = ILU.depth
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, start, nt, depth, A)
+	backward_subst_old!(x, y, nzval, diag, start, nt, depth, A)
+	#@info "PILUAM:", b[1], y[1], x[1], maximum(abs.(b-A*x)), nnz(A) #, A[10,10]
+	#@info "PILUAM:", maximum(abs.(b-A*x)), b[1], x[1], maximum(abs.(b)), maximum(abs.(x))
+	x
+end
+
+function ldiv!(ILU::PILUAMPrecon, b)
+	#@info "piluam ldiv 2"
+	nzval = ILU.nzval
+	diag  = ILU.diag
+	A     = ILU.A
+    start = ILU.start
+    nt    = ILU.nt
+    depth = ILU.depth
+	y = copy(b)
+	#forward_subst!(y, b, ILU)
+	forward_subst_old!(y, b, nzval, diag, start, nt, depth, A)
+	backward_subst_old!(b, y, nzval, diag, start, nt, depth, A)
+	b
+end
+
+function \(ilu::PILUAMPrecon{T,N}, b) where {T,N<:Integer}
+    x = copy(b)
+    ldiv!(x, ilu, b)
+	x
+end
+
+function nnz(ilu::PILUAMPrecon{T,N}) where {T,N<:Integer}
+    length(ilu.nzval)
+end
+
+#end
\ No newline at end of file
diff --git a/src/experimental/ExtendableSparseMatrixParallel/piluam.jl b/src/experimental/ExtendableSparseMatrixParallel/piluam.jl
new file mode 100644
index 0000000..50a46fd
--- /dev/null
+++ b/src/experimental/ExtendableSparseMatrixParallel/piluam.jl
@@ -0,0 +1,36 @@
+mutable struct PILUAMPreconditioner <: AbstractPreconditioner
+    A::ExtendableSparseMatrixParallel
+    factorization::PILUAMPrecon
+    phash::UInt64
+    function PILUAMPreconditioner()
+        p = new()
+        p.phash = 0
+        p
+    end
+end
+
+"""
+```
+PILUAMPreconditioner()
+PILUAMPreconditioner(matrix)
+```
+Incomplete LU preconditioner with zero fill-in using ... . This preconditioner
+also calculates and stores updates to the off-diagonal entries and thus delivers better convergence than  the [`ILU0Preconditioner`](@ref).
+"""
+function PILUAMPreconditioner end
+
+function update!(p::PILUAMPreconditioner)
+    #@warn "Should flush now", nnz_noflush(p.A)
+    flush!(p.A)
+    if p.A.phash != p.phash
+        p.factorization = piluAM(p.A)
+        p.phash=p.A.phash
+    else
+        piluAM!(p.factorization, p.A)
+    end
+    p
+end
+
+allow_views(::PILUAMPreconditioner)=true
+allow_views(::Type{PILUAMPreconditioner})=true
+
diff --git a/src/experimental/ExtendableSparseMatrixParallel/preparatory.jl b/src/experimental/ExtendableSparseMatrixParallel/preparatory.jl
new file mode 100644
index 0000000..fe5686f
--- /dev/null
+++ b/src/experimental/ExtendableSparseMatrixParallel/preparatory.jl
@@ -0,0 +1,941 @@
+"""
+`function preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth)`
+
+`nm` is the number of nodes in each dimension (Examples: 2d: nm = (100,100) -> 100 x 100 grid, 3d: nm = (50,50,50) -> 50 x 50 x 50 grid).
+`nt` is the number of threads.
+`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices...
+To assemble the system matrix parallely, things such as `cellsforpart` (= which thread takes which cells) need to be computed in advance. This is done here.
+
+This should be somewhere else, longterm
+"""
+function preparatory_multi_ps_less_reverse(mat_cell_node, nc, nn, nt, depth, Ti; 
+										   sequential=false, assembly=:cellwise, 
+										   minsize_sepa=10, do_print=false, check_partition=false, ne=0, ce=[], mat_edge_node=[], block_struct=true)
+	#grid = getgrid(nm; x0, x1)
+	adepth = 0
+	if sequential
+		(allcells, start, cellparts, adepth) = grid_to_graph_cellwise_nogrid!(mat_cell_node, nc, nn, nt, depth; minsize_sepa, do_print)#)
+	else
+		(allcells, start, cellparts, adepth) = grid_to_graph_cellwise_par_nogrid!(mat_cell_node, nc, nn, nt, depth; minsize_sepa, do_print)
+	end
+
+	if (adepth != depth) && do_print
+		@info "The requested depth of partitioning is too high. The depth is set to $adepth."
+	end
+	depth = adepth
+	
+	if assembly == :cellwise
+		cfp = bettercellsforpart(cellparts, depth*nt+1)
+
+	else
+		edgeparts = edgewise_partition_from_cellwise_partition(nc, ne, ce, cellparts)
+		cfp = bettercellsforpart(edgeparts, depth*nt+1)
+	end
+
+		
+	if check_partition
+		if assembly == :cellwise
+			validate_partition(nn, mat_cell_node, cellparts, start, allcells, nt, depth, assembly)
+		else
+			validate_partition(nn, mat_edge_node, cellparts, start, allcells, nt, depth, assembly)
+		end
+	end
+
+	#@info length.(cfp)
+	#@info minimum(cellparts), maximum(cellparts), nt, depth
+
+	(nnts, s, onr, gi, ni, rni, starts) = get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(
+		cellparts, allcells, start, nn, Ti, nt, depth; block_struct
+	)
+	
+
+	return nnts, s, onr, cfp, gi, ni, rni, starts, cellparts, adepth
+end
+
+
+
+"""
+`function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt)`
+
+After the cellregions (partitioning of the grid) of the grid have been computed, other things have to be computed, such as `sortednodesperthread` a depth+1 x num_nodes matrix, here `sortednodesperthreads[i,j]` is the point at which the j-th node appears in the i-th level matrix in the corresponding submatrix.
+`cellregs` contains the partiton for each cell.
+Furthermore, `nnts` (number of nodes of the threads) is computed, which contain for each thread the number of nodes that are contained in the cells of that thread.
+`allcells` and `start` together behave like the rowval and colptr arrays of a CSC matrix, such that `allcells[start[j]:start[j+1]-1]` are all cells that contain the j-th node.
+`nn` is the number of nodes in the grid.
+`Ti` is the type (Int64,...) of the elements in the created arrays.
+`nt` is the number of threads.
+`block_struct=true` means, the matrix should be reordered two have a block structure, this is necessary for parallel ILU, for `false`, the matrix is not reordered 
+"""
+function get_nnnts_and_sortednodesperthread_and_noderegs_from_cellregs_ps_less_reverse_nopush(cellregs, allcells, start, nn, Ti, nt, depth; block_struct = true)
+		
+	#num_matrices = maximum(cellregs)
+	#depth = Int(floor((num_matrices-1)/nt))
+
+	#loop over each node, get the cellregion of the cell (the one not in the separator) write the position of that node inside the cellregions sorted ranking into a long vector
+	#nnts = [zeros(Ti, nt+1) for i=1:depth+1]
+	nnts = zeros(Ti, nt)
+	#noderegs_max_tmp = 0
+	old_noderegions = zeros(Ti, (depth+1, nn))
+	
+	# Count nodes per thread:
+	tmp = zeros(depth+1)
+	for j=1:nn
+		cells = @view allcells[start[j]:start[j+1]-1]
+		sortedcellregs = unique(sort(cellregs[cells]))
+		#tmp = []
+		tmpctr = 1
+		for cr in sortedcellregs
+			crmod = (cr-1)%nt+1
+			level = Int(ceil(cr/nt))
+			#nnts[crmod] += 1
+			old_noderegions[level,j] = crmod
+			if !(crmod in tmp[1:tmpctr-1])
+				nnts[crmod] += 1
+				#sortednodesperthread[crmod,j] = nnts[crmod] #nnts[i][cr]
+				#push!(tmp, crmod)
+				if tmpctr > depth+1
+					@info "Cellregs: ", sortedcellregs
+					@info "Levels  : ", Int.(ceil.(sortedcellregs/nt))
+					@info "PartsMod: ", ((sortedcellregs.-1).%nt).+1
+				end
+				tmp[tmpctr] = crmod
+				tmpctr += 1
+			end
+		end
+	end
+	
+	# Reorder inidices to receive a block structure:
+	# Taking the original matrix [a_ij] and mapping each i and j to new_indices[i] and new_indices[j], gives a block structure
+	# the reverse is also defined rev_new_indices[new_indices[k]] = k
+	# From now on we will only use this new ordering
+	counter_for_reorder = zeros(Ti, depth*nt+1)
+	for j=1:nn
+		level, reg                                 = last_nz(old_noderegions[:, j])
+		counter_for_reorder[(level-1)*nt + reg]    += 1 #(reg-1)*depth + level] += 1
+	end
+	
+	starts               = vcat([0], cumsum(counter_for_reorder))
+	counter_for_reorder2 = zeros(Ti, depth*nt+1)
+	new_indices          = Vector{Ti}(undef, nn)
+	rev_new_indices      = Vector{Ti}(undef, nn)
+	origin               = Vector{Ti}(undef, nn)
+	for j=1:nn
+		level, reg                                  = last_nz(old_noderegions[:, j])
+		counter_for_reorder2[(level-1)*nt + reg]    += 1
+		origin[j]                                   = reg
+		new_indices[j]                              = starts[(level-1)*nt + reg]+counter_for_reorder2[(level-1)*nt + reg]
+		rev_new_indices[new_indices[j]]             = j
+	end
+	starts .+= 1
+	
+	if !block_struct
+		new_indices = collect(1:nn)
+		rev_new_indices = collect(1:nn)
+		starts = []
+	end
+
+	# Build sortednodesperthread and globalindices array:
+	# They are inverses of each other: globalindices[tid][sortednodeperthread[tid][j]] = j
+	# Note that j has to be a `new index`
+	
+	sortednodesperthread = zeros(Ti, (nt, nn)) #vvcons(Ti, nnts)
+	globalindices = vvcons(Ti, nnts)
+	gictrs = zeros(Ti, nt)
+
+	for nj=1:nn
+		oj = rev_new_indices[nj]
+		cells = @view allcells[start[oj]:start[oj+1]-1]
+		sortedcellregs = unique(sort(cellregs[cells]))
+		#tmp = []
+		tmpctr = 1
+		for cr in sortedcellregs
+			crmod = (cr-1)%nt+1
+			#level = Int(ceil(cr/nt))
+			if !(crmod in tmp[1:tmpctr-1])
+				gictrs[crmod] += 1 # , level] += 1
+				sortednodesperthread[crmod,nj] = gictrs[crmod]
+				globalindices[crmod][gictrs[crmod]] = nj
+				#push!(tmp, crmod)
+				tmp[tmpctr] = crmod
+				tmpctr += 1
+			end
+		end
+	end
+	
+	nnts, sortednodesperthread, old_noderegions, globalindices, new_indices, rev_new_indices, starts
+end
+
+
+
+
+
+"""
+`function separate!(cellregs, nc, ACSC, nt, level0, ctr_sepanodes)`
+
+This function partitons the separator, which is done if `depth`>1 (see `grid_to_graph_ps_multi!` and/or `preparatory_multi_ps`).
+`cellregs` contains the regions/partitions/colors of each cell. 
+`nc` is the number of cells in the grid.
+`ACSC` is the adjacency matrix of the graph of the (separator-) grid (vertex in graph is cell in grid, edge in graph means two cells share a node) stored as a CSC. 
+`nt` is the number of threads.
+`level0` is the separator-partitoning level, if the (first) separator is partitioned, level0 = 1, in the next iteration, level0 = 2...
+`preparatory_multi_ps` is the number of separator-cells.
+"""
+function separate!(cellregs, ACSC, nt, level0, ctr_sepanodes, ri, gi, do_print)
+	# current number of cells treated
+    nc2 = size(ACSC, 1)
+
+	indptr  = collect(1:nc2+1)
+	indices = zeros(Int64, nc2)
+	rowval  = zeros(Int64, nc2)
+
+	indptrT = collect(1:ctr_sepanodes+1)
+	indicesT = zeros(Int64, ctr_sepanodes)
+	rowvalT = zeros(Int64, ctr_sepanodes)
+
+	for i=1:ctr_sepanodes
+	    j = ri[i]
+        indices[j] = i
+		indicesT[i] = j
+		rowval[j]  = 1
+		rowvalT[i] = 1
+	end
+
+    
+
+	R = SparseMatrixCSC(ctr_sepanodes, nc2, indptr, indices, rowval)
+	RT = SparseMatrixCSC(nc2, ctr_sepanodes, indptrT, indicesT, rowvalT)
+	# current adjacency matrix, taken as a part of the given one ACSC
+	RART = dropzeros(R)*ACSC*dropzeros(RT)
+	
+	cellregs2 = Metis.partition(RART, nt)
+	
+    
+    for i=1:ctr_sepanodes
+        if cellregs[gi[i]] < level0*nt+1
+            @warn "cell treated in this iteration was not a separator-cell last iteration"
+        end
+        cellregs[gi[i]] = level0*nt + cellregs2[i]
+    end
+
+	# how many cells are in the separator of the new partiton (which is only computed on the separator of the old partition)
+    new_ctr_sepanodes = 0
+    ri2 = Vector{Int64}(undef, ctr_sepanodes)
+    gi2 = Vector{Int64}(undef, ctr_sepanodes)
+    
+    for tid=1:nt
+        for i=1:ctr_sepanodes
+            if cellregs2[i] == tid
+				neighbors = RART.rowval[RART.colptr[i]:(RART.colptr[i+1]-1)]
+                rows = gi[vcat(neighbors, [i])]
+				#counts how many different regions (besides) the separator are adjacent to the current cell
+                x = how_many_different_below(cellregs[rows], (level0+1)*nt+1)
+                if x > 1
+                    cellregs[gi[i]] = (level0+1)*nt+1
+                    new_ctr_sepanodes += 1
+                    gi2[new_ctr_sepanodes] = gi[i]
+                    ri2[new_ctr_sepanodes] = i
+                end
+            end
+        end
+    end
+
+
+    ri2 = ri2[1:new_ctr_sepanodes]
+    gi2 = gi2[1:new_ctr_sepanodes]
+
+    if do_print
+    	@info "At level $(level0+1), we found $new_ctr_sepanodes cells that have to be treated in the next iteration!"
+    end
+
+	RART, new_ctr_sepanodes, ri2, gi2
+end
+
+
+
+"""
+`function grid_to_graph_ps_multi_nogrid!(nc, nn, mat_cell_node, nt, depth)`
+
+The function assigns colors/partitons to each cell in the `grid`. First, the grid is partitoned into `nt` partitions. If `depth` > 1, the separator is partitioned again...
+The grid is specified by nc (number of cells), nn (number of nodes) and the mat_cell_node (i.e. grid[CellNodes] if ExtendableGrids is used). 
+Here, `mat_cell_node[k,i]` is the i-th node in the k-th cell. 
+`nt` is the number of threads.
+`depth` is the number of partition layers, for depth=1, there are nt parts and 1 separator, for depth=2, the separator is partitioned again, leading to 2*nt+1 submatrices...
+"""
+function grid_to_graph_cellwise_nogrid!(nc, nn, mat_cell_node, nt, depth; minsize_sepa=10, do_print=false)
+	A = SparseMatrixLNK{Int64, Int64}(nc, nc)
+	number_cells_per_node = zeros(Int64, nn)
+	for j=1:nc
+		for node_id in mat_cell_node[:,j]
+			number_cells_per_node[node_id] += 1
+		end
+	end
+	allcells = zeros(Int64, sum(number_cells_per_node))
+	start = ones(Int64, nn+1)
+	start[2:end] += cumsum(number_cells_per_node)
+	number_cells_per_node .= 0
+	for j=1:nc
+		for node_id in mat_cell_node[:,j]
+			allcells[start[node_id] + number_cells_per_node[node_id]] = j
+			number_cells_per_node[node_id] += 1
+		end
+	end
+
+	for j=1:nn
+		cells = @view allcells[start[j]:start[j+1]-1]
+		for (i,id1) in enumerate(cells)
+			for id2 in cells[i+1:end]
+				A[id1,id2] = 1
+				A[id2,id1] = 1
+			end
+		end	
+	end
+
+	ACSC = SparseArrays.SparseMatrixCSC(A)
+	
+	partition = Metis.partition(ACSC, nt)
+	cellregs  = copy(partition)
+	
+	sn = Vector{Int64}(undef, nc)
+	gi = Vector{Int64}(undef, nc)
+	ctr_sepanodes = 0
+    
+    for tid=1:nt
+        for j=1:nc
+            if cellregs[j] == tid
+                rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
+                if how_many_different_below(cellregs[rows], nt+1) > 1 
+                    cellregs[j] = nt+1 #+ctr_sepanodes
+                    ctr_sepanodes += 1
+                    sn[ctr_sepanodes] = j
+                    gi[ctr_sepanodes] = j
+                end
+            end
+        end
+	end
+
+    sn = sn[1:ctr_sepanodes]
+    gi = gi[1:ctr_sepanodes]
+    
+    if do_print
+        @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!"
+    end
+
+    RART = copy(ACSC)
+    actual_depth = 1
+	for level=1:depth-1
+		RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+        actual_depth += 1
+		if ctr_sepanodes < minsize_sepa
+			break
+		end
+	end
+        
+    return allcells, start, cellregs, actual_depth
+end
+
+
+"""
+`function grid_to_graph_ps_multi_par_nogrid!(nc, nn, mat_cell_node, nt, depth)`
+
+Same result as `grid_to_graph_ps_multi_nogrid!`, but computed on multiple threads.
+"""
+function grid_to_graph_cellwise_par_nogrid!(cn, nc, nn, nt, depth; minsize_sepa=10, do_print=false)
+	As = [ExtendableSparseMatrix{Int64, Int64}(nc, nc) for tid=1:nt]
+	number_cells_per_node = zeros(Int64, nn)
+	
+	
+	for j=1:nc
+		tmp = view(cn, :, j)
+		for node_id in tmp
+			number_cells_per_node[node_id] += 1
+		end
+	end
+		
+	
+	allcells = zeros(Int64, sum(number_cells_per_node))
+	start = ones(Int64, nn+1)
+	start[2:end] += cumsum(number_cells_per_node)
+	number_cells_per_node .= 0
+	
+	for j=1:nc
+		tmp = view(cn, :, j)
+		for node_id in tmp
+			allcells[start[node_id] + number_cells_per_node[node_id]] = j
+			number_cells_per_node[node_id] += 1
+		end
+	end
+
+	node_range = get_starts(nn, nt)
+	Threads.@threads for tid=1:nt
+		for j in node_range[tid]:node_range[tid+1]-1
+			cells = @view allcells[start[j]:start[j+1]-1]
+			l = length(cells)
+			for (i,id1) in enumerate(cells)
+				ce = view(cells, i+1:l)
+				for id2 in ce
+					As[tid][id1,id2] = 1
+					As[tid][id2,id1] = 1
+				end
+			end	
+		end
+		ExtendableSparse.flush!(As[tid])
+	end
+	
+	ACSC = add_all_par!(As).cscmatrix
+	
+	cellregs = Metis.partition(ACSC, nt)
+	
+	sn = [Vector{Int64}(undef, Int(ceil(nc/nt))) for tid=1:nt]
+	ctr_sepanodess = zeros(Int64, nt)
+    
+    @threads for tid=1:nt
+        for j=1:nc
+            if cellregs[j] == tid
+                rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
+                if how_many_different_below(cellregs[rows], nt+1) > 1 
+                    cellregs[j] = nt+1 #+ctr_sepanodes
+                    ctr_sepanodess[tid] += 1
+                    sn[tid][ctr_sepanodess[tid]] = j
+                end
+            end
+        end
+	end
+
+    for tid=1:nt
+        sn[tid] = sn[tid][1:ctr_sepanodess[tid]]
+    end
+    ctr_sepanodes = sum(ctr_sepanodess)
+    sn = vcat(sn...)
+    gi = copy(sn)
+
+    if do_print
+        @info "At level $(1), we found $ctr_sepanodes cells that have to be treated in the next iteration!"
+    end
+
+    RART = ACSC
+    actual_depth = 1
+	for level=1:depth-1
+		RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+        actual_depth += 1
+		if ctr_sepanodes < minsize_sepa
+			break
+		end
+	end
+    
+    #grid[CellRegions] = cellregs
+    #grid
+    return allcells, start, cellregs, actual_depth
+end
+
+"""
+function grid_to_graph_cellwise!(grid, nt, depth; minsize_sepa=10, do_print=false)
+	A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid))
+	number_cells_per_node = zeros(Int64, num_nodes(grid))
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			number_cells_per_node[node_id] += 1
+		end
+	end
+	allcells = zeros(Int64, sum(number_cells_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_cells_per_node)
+	number_cells_per_node .= 0
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			allcells[start[node_id] + number_cells_per_node[node_id]] = j
+			number_cells_per_node[node_id] += 1
+		end
+	end
+
+	for j=1:num_nodes(grid)
+		cells = @view allcells[start[j]:start[j+1]-1]
+		for (i,id1) in enumerate(cells)
+			for id2 in cells[i+1:end]
+				A[id1,id2] = 1
+				A[id2,id1] = 1
+			end
+		end	
+	end
+
+	ACSC = SparseArrays.SparseMatrixCSC(A)
+	
+	partition = Metis.partition(ACSC, nt)
+	cellregs  = copy(partition)
+	
+	sn = Vector{Int64}(undef, num_cells(grid))
+	gi = Vector{Int64}(undef, num_cells(grid))
+	ctr_sepanodes = 0
+    
+    for tid=1:nt
+        for j=1:num_cells(grid)
+            if cellregs[j] == tid
+                rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
+                if how_many_different_below(cellregs[rows], nt+1) > 1 
+                    cellregs[j] = nt+1 #+ctr_sepanodes
+                    ctr_sepanodes += 1
+                    sn[ctr_sepanodes] = j
+                    gi[ctr_sepanodes] = j
+                end
+            end
+        end
+	end
+
+    sn = sn[1:ctr_sepanodes]
+    gi = gi[1:ctr_sepanodes]
+    
+    if do_print
+        @info "At level (1), we found ctr_sepanodes cells that have to be treated in the next iteration!"
+    end
+
+    RART = copy(ACSC)
+    actual_depth = 1
+	for level=1:depth-1
+		RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+        actual_depth += 1
+		if ctr_sepanodes < minsize_sepa
+			break
+		end
+	end
+        
+    return allcells, start, cellregs, actual_depth
+end
+
+
+function grid_to_graph_edgewise!(grid, nt, depth; minsize_sepa=10, do_print=false)
+	ce = grid[CellEdges]
+    A = SparseMatrixLNK{Int64, Int64}(num_edges(grid), num_edges(grid))
+	number_edges_per_node = zeros(Int64, num_nodes(grid))
+	
+    for i=1:num_edges(grid)
+        for node_id in grid[EdgeNodes][:,i] 
+            number_edges_per_node[node_id] += 1
+        end
+    end
+
+    alledges = zeros(Int64, sum(number_edges_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_edges_per_node)
+	number_edges_per_node .= 0
+	
+    for j=1:num_edges(grid)
+		for node_id in grid[EdgeNodes][:,j]
+			alledges[start[node_id] + number_edges_per_node[node_id]] = j
+			number_edges_per_node[node_id] += 1
+		end
+	end
+
+	for j=1:num_nodes(grid)
+		edges = @view alledges[start[j]:start[j+1]-1]
+		for (i,id1) in enumerate(edges)
+			for id2 in edges[i+1:end]
+				A[id1,id2] = 1
+				A[id2,id1] = 1
+			end
+		end	
+	end
+
+	ACSC = SparseArrays.SparseMatrixCSC(A)
+	
+	partition = Metis.partition(ACSC, nt)
+
+    sn = Vector{Int64}(undef, num_edges(grid))
+	gi = Vector{Int64}(undef, num_edges(grid))
+	ctr_sepanodes = 0
+    
+	edgeregs  = copy(partition)
+    for tid=1:nt
+        for j=1:num_edges(grid)
+            if edgeregs[j] == tid
+                rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
+                if how_many_different_below(edgeregs[rows], nt+1) > 1 
+                    edgeregs[j] = nt+1 #+ctr_sepanodes
+                    ctr_sepanodes += 1
+                    sn[ctr_sepanodes] = j
+                    gi[ctr_sepanodes] = j
+                end
+            end
+        end
+	end
+
+    sn = sn[1:ctr_sepanodes]
+    gi = gi[1:ctr_sepanodes]
+    
+    if do_print
+        @info "At level (1), we found ctr_sepanodes cells that have to be treated in the next iteration!"
+    end
+    
+    RART = copy(ACSC)
+    actual_depth = 1
+	for level=1:depth-1
+		RART, ctr_sepanodes, sn, gi = separate!(edgeregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+        actual_depth += 1
+		if ctr_sepanodes < minsize_sepa
+			break
+		end
+	end
+        
+    return alledges, start, edgeregs, actual_depth
+end
+
+function grid_to_graph_edgewise_par!(grid, nt, depth; minsize_sepa=10, do_print=false)
+	ce = grid[CellEdges]
+    cn = grid[EdgeNodes]
+	
+    As = [ExtendableSparseMatrix{Int64, Int64}(num_edges(grid), num_edges(grid)) for tid=1:nt]
+	number_edges_per_node = zeros(Int64, num_nodes(grid))
+	
+	
+	for j=1:num_edges(grid)
+		tmp = view(cn, :, j)
+		for node_id in tmp
+			number_edges_per_node[node_id] += 1
+		end
+	end
+		
+	
+	alledges = zeros(Int64, sum(number_edges_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_edges_per_node)
+	number_edges_per_node .= 0
+	
+	for j=1:num_edges(grid)
+		tmp = view(cn, :, j)
+		for node_id in tmp
+			alledges[start[node_id] + number_edges_per_node[node_id]] = j
+			number_edges_per_node[node_id] += 1
+		end
+	end
+
+	node_range = get_starts(num_nodes(grid), nt)
+	Threads.@threads for tid=1:nt
+		for j in node_range[tid]:node_range[tid+1]-1
+			edges = @view alledges[start[j]:start[j+1]-1]
+			l = length(edges)
+			for (i,id1) in enumerate(edges)
+				ce = view(edges, i+1:l)
+				for id2 in ce
+					As[tid][id1,id2] = 1
+					As[tid][id2,id1] = 1
+					
+				end
+			end	
+		end
+		ExtendableSparse.flush!(As[tid])
+	end
+	
+	ACSC = add_all_par!(As).cscmatrix
+	
+	cellregs = Metis.partition(ACSC, nt)
+	
+	sn = [Vector{Int64}(undef, Int(ceil(num_cells(grid)/nt))) for tid=1:nt]
+	ctr_sepanodess = zeros(Int64, nt)
+    
+    @threads for tid=1:nt
+        for j=1:num_edges(grid)
+            if cellregs[j] == tid
+                rows = vcat(ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)], [j])
+                if how_many_different_below(cellregs[rows], nt+1) > 1 
+                    cellregs[j] = nt+1 #+ctr_sepanodes
+                    ctr_sepanodess[tid] += 1
+                    sn[tid][ctr_sepanodess[tid]] = j
+                end
+            end
+        end
+	end
+
+    for tid=1:nt
+        sn[tid] = sn[tid][1:ctr_sepanodess[tid]]
+    end
+    ctr_sepanodes = sum(ctr_sepanodess)
+    sn = vcat(sn...)
+    gi = copy(sn)
+
+    if do_print
+        @info "At level (1), we found ctr_sepanodes edges that have to be treated in the next iteration!"
+    end
+
+    RART = ACSC
+    actual_depth = 1
+	for level=1:depth-1
+		RART, ctr_sepanodes, sn, gi = separate!(cellregs, RART, nt, level, ctr_sepanodes, sn, gi, do_print)
+        actual_depth += 1
+		if ctr_sepanodes < minsize_sepa
+			break
+		end
+	end
+    
+    #grid[CellRegions] = cellregs
+    #grid
+    return alledges, start, cellregs, actual_depth
+end
+"""
+
+function edgewise_partition_from_cellwise_partition(nc, ne, ce, cellregs)
+	#ce = grid[CellEdges]
+	edgeregs = maximum(cellregs)*ones(Int64, ne)
+
+	for icell=1:nc
+		tmp = cellregs[icell]
+		for iedge in ce[:,icell]
+			if tmp < edgeregs[iedge]
+				edgeregs[iedge] = tmp
+			end
+		end
+	end
+
+	edgeregs
+end
+
+"""
+`function add_all_par!(As)`
+
+Add LNK matrices (stored in a vector) parallely (tree structure).
+The result is stored in the first LNK matrix.
+"""
+function add_all_par!(As)
+	nt = length(As)
+	depth = Int(floor(log2(nt)))
+	ende = nt
+	for level=1:depth
+		
+		@threads for tid=1:2^(depth-level)
+			#@info "$level, $tid"
+			start = tid+2^(depth-level)
+			while start <= ende
+				As[tid] += As[start]
+				start += 2^(depth-level)
+			end
+		end
+		ende = 2^(depth-level)
+	end
+	As[1]
+
+end
+
+
+"""
+`function vvcons(Ti, lengths)`
+
+`lengths` is a vector of integers.
+The function creates a vector of zero vectors of type `Ti` of length `lengths[i]`.
+"""
+function vvcons(Ti, lengths)
+	x::Vector{Vector{Ti}} = [zeros(Ti, i) for i in lengths]
+	return x
+end
+
+
+"""
+`function bettercellsforpart(xx, upper)`
+
+`xx` are the CellRegions (i.e. the color/partition of each cell).
+`upper` is the number of partitions (upper=depth*nt+1).
+The function returns a vector e.g. [v1, v2, v3, v4, v5].
+The element v1 would be the list of cells that are in partition 1 etc.
+The function is basically a faster findall.
+"""
+function bettercellsforpart(xx, upper)
+	ctr = zeros(Int64, upper)
+	for x in xx
+		ctr[x] += 1
+	end
+	cfp = vvcons(Int64, ctr)
+	ctr .= 1
+	for (i,x) in enumerate(xx)
+		cfp[x][ctr[x]] = i
+		ctr[x] += 1
+	end
+	cfp
+end
+
+
+
+function get_starts(n, nt)
+	ret = ones(Int64, nt+1)
+	ret[end] = n+1
+	for i=nt:-1:2
+		ret[i] = ret[i+1] - Int(round(ret[i+1]/i)) #Int(round(n/nt))-1
+	end 
+	ret
+end
+
+function last_nz(x)
+	n = length(x)
+	for j=n:-1:1
+		if x[j] != 0
+			return (j, x[j])
+		end
+	end
+end
+
+
+function how_many_different_below(x0, y; u=0)
+    x = copy(x0)
+    z = unique(x)
+    t = findall(w->w<y,z)
+    t = findall(w->w>u,z[t])
+    length(t)
+end
+
+
+
+function lookat_grid_to_graph_ps_multi!(nm, nt, depth)
+	grid = getgrid(nm)
+	A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid))
+	number_cells_per_node = zeros(Int64, num_nodes(grid))
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			number_cells_per_node[node_id] += 1
+		end
+	end
+	allcells = zeros(Int64, sum(number_cells_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_cells_per_node)
+	number_cells_per_node .= 0
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			allcells[start[node_id] + number_cells_per_node[node_id]] = j
+			number_cells_per_node[node_id] += 1
+		end
+	end
+
+	for j=1:num_nodes(grid)
+		cells = @view allcells[start[j]:start[j+1]-1]
+		for (i,id1) in enumerate(cells)
+			for id2 in cells[i+1:end]
+				A[id1,id2] = 1
+				A[id2,id1] = 1
+			end
+		end	
+	end
+
+	ACSC = SparseArrays.SparseMatrixCSC(A)
+	
+	partition = Metis.partition(ACSC, nt)
+	cellregs  = copy(partition)
+	
+	sn = []
+	gi = []
+	ctr_sepanodes = 0
+	for j=1:num_cells(grid)
+		rows = ACSC.rowval[ACSC.colptr[j]:(ACSC.colptr[j+1]-1)]
+		if minimum(partition[rows]) != maximum(partition[rows])
+			cellregs[j] = nt+1
+			ctr_sepanodes += 1
+			push!(sn, j)
+			push!(gi, j)
+		end
+	end
+	RART = ACSC
+	#sn = 1:num_cells(grid)
+	#gi = 1:num_cells(grid)
+	for level=1:depth-1
+		RART, ctr_sepanodes, sn, gi = separate_careful!(cellregs, num_cells(grid), RART, nt, level, ctr_sepanodes, sn, gi)
+		if ctr_sepanodes == 0
+			return RART
+		end
+	end
+
+			
+	#return allcells, start, cellregs
+	RART
+end
+
+
+function adjacencies(grid)
+	A = SparseMatrixLNK{Int64, Int64}(num_cells(grid), num_cells(grid))
+	number_cells_per_node = zeros(Int64, num_nodes(grid))
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			number_cells_per_node[node_id] += 1
+		end
+	end
+	allcells = zeros(Int64, sum(number_cells_per_node))
+	start = ones(Int64, num_nodes(grid)+1)
+	start[2:end] += cumsum(number_cells_per_node)
+	number_cells_per_node .= 0
+	for j=1:num_cells(grid)
+		for node_id in grid[CellNodes][:,j]
+			allcells[start[node_id] + number_cells_per_node[node_id]] = j
+			number_cells_per_node[node_id] += 1
+		end
+	end
+
+	for j=1:num_nodes(grid)
+		cells = @view allcells[start[j]:start[j+1]-1]
+		for (i,id1) in enumerate(cells)
+			for id2 in cells[i+1:end]
+				A[id1,id2] = 1
+				A[id2,id1] = 1
+			end
+		end	
+	end
+
+	allcells, start, SparseArrays.SparseMatrixCSC(A)
+end
+
+function check_adjacencies(nm)
+	grid = getgrid(nm)
+	allcells, start, A = adjacencies(grid)
+
+	i = 1
+	cells1 = sort(vcat([i], A.rowval[A.colptr[i]:(A.colptr[i+1]-1)])) #adjacent cells
+	nodes2 = grid[CellNodes][:,i]
+	cells2 = sort(unique(vcat([allcells[start[j]:start[j+1]-1] for j in nodes2]...)))
+
+	@info cells1
+	@info cells2
+	@info maximum(abs.(cells1-cells2))
+
+
+end
+
+#=
+function check_partition(nm, nt, depth)
+	grid = getgrid(nm)
+	
+	(allcells, start, cellregs, adepth, ACSC) = grid_to_graph_ps_multi!(grid, nt, depth; minsize_sepa=10, do_print=true)#)
+	
+	if (adepth != depth)
+		@info "The requested depth of partitioning is too high. The depth is set to $adepth."
+	end
+	depth = adepth
+
+	validate_partition(num_nodes(grid), num_cells(grid), grid, cellregs, start, allcells, nt, depth, ACSC)
+end
+=#
+
+function validate_partition(nn, mat, cellregs, start, allcells, nt, depth, assemblytype)
+	violation_ctr = 0
+
+	if assemblytype == :cellwise
+		key = CellNodes
+	else
+		key = EdgeNodes
+	end
+
+	for j=1:nn
+		cells = @view allcells[start[j]:start[j+1]-1]
+		sortedcellregs = unique(sort(cellregs[cells]))
+		levels         = Int.(ceil.(sortedcellregs/nt))
+		
+		for i=1:depth+1
+			ids_lev = findall(x->x==i, levels)
+			if length(ids_lev) > 1
+				violation_ctr += 1
+
+				if violation_ctr == 1
+					@info "Node Id : $j (we only show one violation)"
+					@info "Cellregs: $sortedcellregs"
+					@info "Levels  : $levels"
+					
+					loc = findall(x->x==4, Int.(ceil.(cellregs[allcells[start[j]:start[j+1]-1]]/nt)))
+					cells_at_level4 = allcells[loc.+(start[j]-1)]
+					@info cells_at_level4, cellregs[cells_at_level4]
+					@info mat[:,cells_at_level4[1]], mat[:,cells_at_level4[2]]
+				end
+			end
+		end
+	end
+	@info "We found $violation_ctr violation(s)"
+end
diff --git a/src/experimental/ExtendableSparseMatrixParallel/struct_flush.jl b/src/experimental/ExtendableSparseMatrixParallel/struct_flush.jl
new file mode 100644
index 0000000..3169d4b
--- /dev/null
+++ b/src/experimental/ExtendableSparseMatrixParallel/struct_flush.jl
@@ -0,0 +1,273 @@
+function flush!(A::ExtendableSparseMatrixParallel; do_dense=false, keep_zeros=true)
+	_, nnzLNK = nnz_noflush(A)
+
+	if nnzLNK == 0
+		return
+	end
+
+	if !do_dense
+		A.cscmatrix = A.cscmatrix+sparse_flush!(A; keep_zeros)
+	
+	else
+		if keep_zeros
+			A.cscmatrix = dense_flush_keepzeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices)
+		else
+			A.cscmatrix = dense_flush_removezeros!(A.lnkmatrices, A.old_noderegions, A.sortednodesperthread, A.nt, A.rev_new_indices)
+		end
+	end
+	A.phash = phash(A.cscmatrix)
+	A.lnkmatrices = [SuperSparseMatrixLNK{matrixvaluetype(A), matrixindextype(A)}(A.n, A.nnts[tid]) for tid=1:A.nt]
+
+end
+
+"""
+`CSC_RLNK_plusequals_less3_reordered_super!` from `plusequals.jl`
+"""
+function sparse_flush!(A::ExtendableSparseMatrixParallel; keep_zeros=true)
+
+	#dropzeros!(
+	plus_remap(A.lnkmatrices, A.cscmatrix, A.globalindices; keep_zeros)
+	#)
+		
+end
+
+
+
+"""
+`CSC_RLNK_si_oc_ps_dz_less_reordered` from `conversion.jl`
+"""
+function dense_flush_keepzeros!(
+	As::Vector{SuperSparseMatrixLNK{Tv, Ti}}, 
+	onr, s, nt, rni
+	) where {Tv, Ti <: Integer}
+
+	nnz = sum([As[i].nnz for i=1:nt]) #you could also subtract the diagonal entries from shared columns, since those are definitely double
+	indptr = zeros(Ti, As[1].m+1)
+	indices = zeros(Ti, nnz) #sum(As.nnz))
+	data = zeros(Float64, nnz) #sum(As.nnz))
+	ctr = 1
+	eqctr = 0
+	tmp = zeros(Ti, size(onr)[1])
+	
+	#@warn [As[i].nnz for i=1:nt], [As[i].n for i=1:nt], [As[i].m for i=1:nt]
+	#@info maximum.([As[i].colptr for i=1:nt])
+
+	for nj=1:As[1].m
+		indptr[nj] = ctr
+		oj = rni[nj]
+		regionctr = 1
+		jc = 0
+		nrr = view(onr, :, oj) 
+		tmp .= 0
+		for region in nrr #nrr #[:,j]
+			regmod = region #(region-1)%nt+1
+			if (region > 0) & !(region in tmp)
+				k = s[regmod, nj]
+				if regionctr == 1
+					while k>0
+						if As[regmod].rowval[k] != 0
+							if ctr > nnz
+								@info "ctr > nnz, $nj, $oj"
+							end
+							indices[ctr] = As[regmod].rowval[k]
+							data[ctr]    = As[regmod].nzval[k]
+							
+							for jcc=1:jc
+								if indices[ctr-jcc] > indices[ctr-jcc+1]
+									tmp_i = indices[ctr-jcc+1]
+									tmp_d = data[ctr-jcc+1]
+									indices[ctr-jcc+1] = indices[ctr-jcc]
+									data[ctr-jcc+1]    = data[ctr-jcc]
+									
+									indices[ctr-jcc] = tmp_i
+									data[ctr-jcc]    = tmp_d
+								else
+									break
+								end
+							end
+							
+							ctr += 1
+							jc += 1
+						end
+						k = As[regmod].colptr[k]
+					end
+				else
+					while k>0
+						if As[regmod].rowval[k] != 0
+							indices[ctr] = As[regmod].rowval[k]
+							data[ctr]    = As[regmod].nzval[k]
+							
+							for jcc=1:jc
+								if indices[ctr-jcc] > indices[ctr-jcc+1]
+									tmp_i = indices[ctr-jcc+1]
+									tmp_d = data[ctr-jcc+1]
+									indices[ctr-jcc+1] = indices[ctr-jcc]
+									data[ctr-jcc+1]    = data[ctr-jcc]
+									
+									indices[ctr-jcc] = tmp_i
+									data[ctr-jcc]    = tmp_d
+								elseif indices[ctr-jcc] == indices[ctr-jcc+1]
+									data[ctr-jcc] += data[ctr-jcc+1]
+									eqctr += 1
+									
+									for jccc=1:jcc
+										indices[ctr-jcc+jccc] = indices[ctr-jcc+jccc+1]
+										data[ctr-jcc+jccc]    = data[ctr-jcc+jccc+1]
+									end
+									
+									ctr -= 1
+									jc  -= 1
+									
+									break
+								else
+									break
+								end
+							end
+							
+							ctr += 1
+							jc += 1
+						end
+						k = As[regmod].colptr[k]
+					end
+					
+				end
+				tmp[regionctr] = region
+				regionctr += 1
+				
+			end
+			
+		end
+		
+	end
+
+	#@warn ctr/nnz
+	
+	indptr[end] = ctr
+	resize!(indices, ctr-1)
+	resize!(data, ctr-1)
+
+	
+	SparseArrays.SparseMatrixCSC(
+		As[1].m, As[1].m, indptr, indices, data
+	)
+	
+end
+
+
+function dense_flush_removezeros!(
+	As::Vector{SuperSparseMatrixLNK{Tv, Ti}}, 
+	onr, s, nt, rni
+	) where {Tv, Ti <: Integer}
+
+	nnz = sum([As[i].nnz for i=1:nt]) #you could also subtract the diagonal entries from shared columns, since those are definitely double
+	indptr = zeros(Ti, As[1].m+1)
+	indices = zeros(Ti, nnz) #sum(As.nnz))
+	data = zeros(Float64, nnz) #sum(As.nnz))
+	ctr = 1
+	eqctr = 0
+	tmp = zeros(Ti, size(onr)[1])
+	
+	for nj=1:As[1].m
+		indptr[nj] = ctr
+		oj = rni[nj]
+		regionctr = 1
+		jc = 0
+		nrr = view(onr, :, oj) 
+		tmp .= 0
+		for region in nrr #nrr #[:,j]
+			regmod = region #(region-1)%nt+1
+			if (region > 0) & !(region in tmp)
+				k = s[regmod, nj]
+				if regionctr == 1
+					while k>0
+						if As[regmod].nzval[k] != 0.0
+							indices[ctr] = As[regmod].rowval[k]
+							data[ctr]    = As[regmod].nzval[k]
+							
+							for jcc=1:jc
+								if indices[ctr-jcc] > indices[ctr-jcc+1]
+									tmp_i = indices[ctr-jcc+1]
+									tmp_d = data[ctr-jcc+1]
+									indices[ctr-jcc+1] = indices[ctr-jcc]
+									data[ctr-jcc+1]    = data[ctr-jcc]
+									
+									indices[ctr-jcc] = tmp_i
+									data[ctr-jcc]    = tmp_d
+								else
+									break
+								end
+							end
+							
+							ctr += 1
+							jc += 1
+						end
+						k = As[regmod].colptr[k]
+					end
+				else
+					while k>0
+						if As[regmod].nzval[k] != 0.0
+							indices[ctr] = As[regmod].rowval[k]
+							data[ctr]    = As[regmod].nzval[k]
+							
+							for jcc=1:jc
+								if indices[ctr-jcc] > indices[ctr-jcc+1]
+									tmp_i = indices[ctr-jcc+1]
+									tmp_d = data[ctr-jcc+1]
+									indices[ctr-jcc+1] = indices[ctr-jcc]
+									data[ctr-jcc+1]    = data[ctr-jcc]
+									
+									indices[ctr-jcc] = tmp_i
+									data[ctr-jcc]    = tmp_d
+								elseif indices[ctr-jcc] == indices[ctr-jcc+1]
+									data[ctr-jcc] += data[ctr-jcc+1]
+									eqctr += 1
+									
+									for jccc=1:jcc
+										indices[ctr-jcc+jccc] = indices[ctr-jcc+jccc+1]
+										data[ctr-jcc+jccc]    = data[ctr-jcc+jccc+1]
+									end
+									
+									ctr -= 1
+									jc  -= 1
+									
+									break
+								else
+									break
+								end
+							end
+							
+							ctr += 1
+							jc += 1
+						end
+						k = As[regmod].colptr[k]
+					end
+					
+				end
+				tmp[regionctr] = region
+				regionctr += 1
+				
+			end
+			
+		end
+		
+	end
+
+	#@warn ctr/nnz
+	
+	indptr[end] = ctr
+	resize!(indices, ctr-1)
+	resize!(data, ctr-1)
+
+	
+	SparseArrays.SparseMatrixCSC(
+		As[1].m, As[1].m, indptr, indices, data
+	)
+	
+end
+
+
+
+
+
+
+
diff --git a/src/experimental/ExtendableSparseMatrixParallel/supersparse.jl b/src/experimental/ExtendableSparseMatrixParallel/supersparse.jl
new file mode 100644
index 0000000..691e158
--- /dev/null
+++ b/src/experimental/ExtendableSparseMatrixParallel/supersparse.jl
@@ -0,0 +1,798 @@
+
+mutable struct SuperSparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti}
+    """
+    Number of rows
+    """
+    m::Ti
+
+    """
+    Number of columns
+    """
+    n::Ti
+
+    """
+    Number of nonzeros
+    """
+    nnz::Ti
+
+    """
+    Length of arrays
+    """
+    nentries::Ti
+
+    """
+    Linked list of column entries. Initial length is n,
+    it grows with each new entry.
+
+    colptr[index] contains the next
+    index in the list or zero, in the later case terminating the list which
+    starts at index 1<=j<=n for each column j.
+    """
+    colptr::Vector{Ti}
+
+    """
+    Row numbers. For each index it contains the zero (initial state)
+    or the row numbers corresponding to the column entry list in colptr.
+
+    Initial length is n,
+    it grows with each new entry.
+    """
+    rowval::Vector{Ti}
+
+    """
+    Nonzero entry values corresponding to each pair
+    (colptr[index],rowval[index])
+
+    Initial length is n, it grows with each new entry.
+    """
+    nzval::Vector{Tv}
+    
+	"""
+	(Unsorted) list of all columns with non-zero entries
+	"""
+    collnk::Vector{Ti}
+    
+	# counts the number of columns in use
+    colctr::Ti
+end
+
+
+function SparseArrays.SparseMatrixCSC(A::SuperSparseMatrixLNK{Tv, Ti})::SparseArrays.SparseMatrixCSC where {Tv, Ti <: Integer}
+	SparseArrays.SparseMatrixCSC(SparseMatrixLNK{Tv, Ti}(A.m, A.n, A.nnz, A.nentries, A.colptr, A.rowval, A.nzval))
+
+end
+
+function SuperSparseMatrixLNK{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
+    SuperSparseMatrixLNK{Tv, Ti}(m, n, 0, n, zeros(Ti, n), zeros(Ti, n), zeros(Tv, n), zeros(Ti, n), 0)
+end
+
+
+function ExtendableSparse.findindex(lnk::SuperSparseMatrixLNK, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    k = j
+    k0 = j
+    while k > 0
+        if lnk.rowval[k] == i
+            return k, 0
+        end
+        k0 = k
+        k = lnk.colptr[k]
+    end
+    return 0, k0
+end
+
+"""
+Return tuple containing size of the matrix.
+"""
+Base.size(lnk::SuperSparseMatrixLNK) = (lnk.m, lnk.n)
+
+"""    
+Return value stored for entry or zero if not found
+"""
+function Base.getindex(lnk::SuperSparseMatrixLNK{Tv, Ti}, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k == 0
+        return zero(Tv)
+    else
+        return lnk.nzval[k]
+    end
+end
+
+function addentry!(lnk::SuperSparseMatrixLNK, i, j, k, k0)
+    # increase number of entries
+    lnk.nentries += 1
+    if length(lnk.nzval) < lnk.nentries
+        newsize = Int(ceil(5.0 * lnk.nentries / 4.0))
+        resize!(lnk.nzval, newsize)
+        resize!(lnk.rowval, newsize)
+        resize!(lnk.colptr, newsize)
+    end
+
+    # Append entry if not found
+    lnk.rowval[lnk.nentries] = i
+
+    # Shift the end of the list
+    lnk.colptr[lnk.nentries] = 0
+    lnk.colptr[k0] = lnk.nentries
+
+    # Update number of nonzero entries
+    lnk.nnz += 1
+    return lnk.nentries
+end
+
+"""    
+Update value of existing entry, otherwise extend matrix if v is nonzero.
+"""
+function Base.setindex!(lnk::SuperSparseMatrixLNK, v, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    # Set the first  column entry if it was not yet set.
+    if lnk.rowval[j] == 0 && !iszero(v)
+    	lnk.colctr             += 1
+    	lnk.collnk[lnk.colctr] = j
+        lnk.rowval[j]          = i
+        lnk.nzval[j]           = v
+        lnk.nnz                += 1
+        return lnk
+    end
+
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = v
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = v
+    end
+    return lnk
+end
+
+"""
+Update element of the matrix  with operation `op`. 
+It assumes that `op(0,0)==0`. If `v` is zero, no new 
+entry is created.
+"""
+function updateindex!(lnk::SuperSparseMatrixLNK{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    # Set the first  column entry if it was not yet set.
+    if lnk.rowval[j] == 0 && !iszero(v)
+        lnk.colctr             += 1
+    	lnk.collnk[lnk.colctr] = j
+        lnk.rowval[j]          = i
+        lnk.nzval[j]           = op(lnk.nzval[j], v)
+        lnk.nnz                += 1
+        return lnk
+    end
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+function rawupdateindex!(lnk::SuperSparseMatrixLNK{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    # Set the first  column entry if it was not yet set.
+    if lnk.rowval[j] == 0
+        lnk.colctr             += 1
+    	lnk.collnk[lnk.colctr] = j
+        lnk.rowval[j]          = i
+        lnk.nzval[j]           = op(lnk.nzval[j], v)
+        lnk.nnz                += 1
+        return lnk
+    end
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+#=
+mutable struct ColEntry{Tv, Ti <: Integer}
+    rowval::Ti
+    nzval::Tv
+end
+
+# Comparison method for sorting
+Base.isless(x::ColEntry, y::ColEntry) = (x.rowval < y.rowval)
+=#
+
+function get_column!(col::Vector{ColEntry{Tv, Ti}}, lnk::SuperSparseMatrixLNK{Tv, Ti}, j::Ti)::Ti where {Tv, Ti <: Integer}
+	k = j
+	ctr = zero(Ti)
+	while k>0
+		if abs(lnk.nzval[k]) > 0
+			ctr += 1
+			col[ctr] = ColEntry(lnk.rowval[k], lnk.nzval[k])
+		end
+		k = lnk.colptr[k]
+	end
+	sort!(col, 1, ctr, Base.QuickSort, Base.Forward)
+	ctr
+end
+
+
+function remove_doubles!(col, coll)
+	#input_ctr = 1
+	last = 1
+	for j=2:coll
+		if col[j].rowval == col[last].rowval
+			col[last].nzval += col[j].nzval
+		else
+			last += 1
+			if last != j
+				col[last] = col[j]
+			end
+		end
+	end	
+	last
+end
+
+function get_column_removezeros!(col::Vector{ColEntry{Tv, Ti}}, lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, js, tids, length)::Ti where {Tv, Ti <: Integer}
+	ctr = zero(Ti)
+	for i=1:length
+		tid = tids[i]
+		k   = js[i] 
+		#for (tid,j) in zip(tids, js) #j0:j1
+		#tid = tids[j]
+		#k = j
+		while k>0
+			if abs(lnks[tid].nzval[k]) > 0
+				ctr += 1
+				col[ctr] = ColEntry(lnks[tid].rowval[k], lnks[tid].nzval[k])
+			end
+			k = lnks[tid].colptr[k]
+		end
+	end
+	
+	sort!(col, 1, ctr, Base.QuickSort, Base.Forward)
+	ctr = remove_doubles!(col, ctr)
+	#print_col(col, ctr)
+	ctr
+
+end
+
+function get_column_keepzeros!(col::Vector{ColEntry{Tv, Ti}}, lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, js, tids, length)::Ti where {Tv, Ti <: Integer}
+	ctr = zero(Ti)
+	for i=1:length
+		tid = tids[i]
+		k   = js[i] 
+		#for (tid,j) in zip(tids, js) #j0:j1
+		#tid = tids[j]
+		#k = j
+		while k>0
+			#if abs(lnks[tid].nzval[k]) > 0
+				ctr += 1
+				col[ctr] = ColEntry(lnks[tid].rowval[k], lnks[tid].nzval[k])
+			#end
+			k = lnks[tid].colptr[k]
+		end
+	end
+	
+	sort!(col, 1, ctr, Base.QuickSort, Base.Forward)
+	ctr = remove_doubles!(col, ctr)
+	#print_col(col, ctr)
+	ctr
+
+end
+
+function merge_into!(rowval::Vector{Ti}, nzval::Vector{Tv}, C::SparseArrays.SparseMatrixCSC{Tv, Ti}, col::Vector{ColEntry{Tv, Ti}}, J::Ti, coll::Ti, ptr1::Ti) where {Tv, Ti <: Integer}
+	j_min = 1
+	numshifts = 0
+	j_last = 0
+	last_row = 0
+	
+	#@warn "MERGING $J"
+	
+	#rowval0 = copy(C.rowval[C.colptr[J]:C.colptr[J+1]-1])
+	#endptr = C.colptr[J+1]
+	
+	for (di,i) in enumerate(C.colptr[J]:C.colptr[J+1]-1)
+		for j=j_min:coll
+			#if col[j].rowval == last_row
+			#	#@info "!! col j rowval == last row"
+			#end
+			if col[j].rowval < C.rowval[i] #ptr1+di+numshifts] #i+numshifts]
+				if col[j].rowval == last_row
+					#@info "$(ptr1+di+numshifts) : backwards EQUALITY: "
+					nzval[ptr1+di+numshifts] += col[j].nzval
+				else
+					#@info "$(ptr1+di+numshifts) : Insert from col: j=$j"
+					#shift_e!(C.rowval, C.nzval, 1, i+numshifts, C.colptr[end]-1)
+					rowval[ptr1+di+numshifts] = col[j].rowval
+					nzval[ptr1+di+numshifts]  = col[j].nzval
+					numshifts += 1
+					#endptr += 1
+				end
+				j_last = j
+			elseif col[j].rowval > C.rowval[i] #if col[j].rowval  
+				#@info "$(ptr1+di+numshifts) : Insert from C: i=$i"
+				rowval[ptr1+di+numshifts] = C.rowval[i]
+				nzval[ptr1+di+numshifts]  = C.nzval[i]
+				j_min = j
+				break
+			else
+				#@info "$(ptr1+di+numshifts) : normal EQUALITY: i=$i, j=$j"
+				rowval[ptr1+di+numshifts] = C.rowval[i]
+				nzval[ptr1+di+numshifts]  = C.nzval[i]+col[j].nzval
+				#numshifts += 1
+				j_min = j+1
+				j_last = j
+				
+				if j == coll
+					#@info "$(ptr1+di+numshifts+1) → $(ptr1+numshifts+(C.colptr[J+1]-C.colptr[J]))"
+					rowval[ptr1+di+numshifts+1:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.rowval, i+1:C.colptr[J+1]-1) #C.rowval[i:C.colptr[J+1]-1]
+					nzval[ptr1+di+numshifts+1:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])]  = view(C.nzval, i+1:C.colptr[J+1]-1) #C.nzval[i:C.colptr[J+1]-1]
+					
+					#@info "FINISH"
+					return numshifts
+				end
+				
+				break
+			end
+			
+			if j == coll
+				#@info "$(ptr1+di+numshifts) → $(ptr1+numshifts+(C.colptr[J+1]-C.colptr[J]))"
+				rowval[ptr1+di+numshifts:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])] = view(C.rowval, i:C.colptr[J+1]-1) #C.rowval[i:C.colptr[J+1]-1]
+				nzval[ptr1+di+numshifts:ptr1+numshifts+(C.colptr[J+1]-C.colptr[J])]  = view(C.nzval, i:C.colptr[J+1]-1) #C.nzval[i:C.colptr[J+1]-1]
+				
+				#@info "FINISH"
+				return numshifts
+			end
+			
+			last_row = col[j].rowval
+		end
+	end
+	endptr = ptr1 + numshifts + (C.colptr[J+1]-C.colptr[J])
+	last_row = 0
+	numshifts_old = numshifts
+	numshifts = 0
+	#start_ptr = endptr - 1 #C.colptr[J+1]-1
+	if j_last > 0
+		last_row = col[j_last].rowval
+	end
+	
+	if j_last != coll
+		for j=j_last+1:coll
+			if col[j].rowval != last_row
+				numshifts += 1
+				#shift_e!(C.rowval, C.nzval, 1, start_ptr+numshifts, C.colptr[end]-1)
+				#for k=start_ptr+numshifts:
+				#@info "$(endptr+numshifts) : after..."
+				rowval[endptr+numshifts] = col[j].rowval
+				nzval[endptr+numshifts]  = col[j].nzval
+				last_row                    = rowval[endptr+numshifts]
+				#colptr[J+1:end]             .+= 1
+			else
+				nzval[endptr+numshifts] += col[j].nzval
+			end
+		end
+	end
+	
+	return numshifts + numshifts_old
+
+end
+
+
+function print_col(col, coll)
+	v = zeros((2, coll))
+	for j=1:coll
+		v[1,j] = col[j].rowval
+		v[2,j] = col[j].nzval
+	end
+	@info v
+end
+
+
+"""
+$(SIGNATURES)
+
+Add the matrices `lnks` of type SuperSparseMatrixLNK onto the SparseMatrixCSC `csc`.
+`gi[i]` maps the indices in `lnks[i]` to the indices of `csc`.
+"""
+function plus_remap(lnks::Vector{SuperSparseMatrixLNK{Tv, Ti}}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Vector{Ti}}; keep_zeros=true) where {Tv, Ti <: Integer}
+	nt = length(lnks)
+
+	if keep_zeros
+		get_col! = get_column_keepzeros!
+	else
+		get_col! = get_column_removezeros!
+	end
+	lnkscols           = vcat([lnks[i].collnk[1:lnks[i].colctr] for i=1:nt]...)
+	supersparsecolumns = vcat([gi[i][lnks[i].collnk[1:lnks[i].colctr]] for i=1:nt]...)
+	num_cols = sum([lnks[i].colctr for i=1:nt])
+	tids     = Vector{Ti}(undef, num_cols)
+	ctr = 0
+	for i=1:nt
+		for j=1:lnks[i].colctr
+			ctr += 1
+			tids[ctr] = i
+		end
+	end
+
+
+	sortedcolumnids    = sortperm(supersparsecolumns)
+	sortedcolumns      = supersparsecolumns[sortedcolumnids]
+	sortedcolumns      = vcat(sortedcolumns, [Ti(csc.n+1)])
+	
+	coll = sum([lnks[i].nnz for i=1:nt])
+	nnz_sum   = length(csc.rowval) + coll
+	colptr    = Vector{Ti}(undef, csc.n+1)
+	rowval    = Vector{Ti}(undef, nnz_sum)
+	nzval     = Vector{Tv}(undef, nnz_sum)
+	colptr[1] = one(Ti)
+	
+	if csc.m < coll
+		coll = csc.m
+	end
+	
+	col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:coll]
+	numshifts = 0
+	
+	colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1])
+	rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1)
+	nzval[1:csc.colptr[sortedcolumns[1]]-1]  = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1)
+	
+	J = 1
+	i0 = 0
+	#lj_last = []
+	#tid_last = []
+	lj_last  = Vector{Ti}(undef, nt)
+	tid_last = Vector{Ti}(undef, nt)
+	ctr_last = 1
+	gj_last = 0
+	for J=1:length(sortedcolumns)-1
+		gj_now  = sortedcolumns[J]
+		gj_next = sortedcolumns[J+1]
+			
+		lj_last[ctr_last]  = lnkscols[sortedcolumnids[J]]
+		tid_last[ctr_last] = tids[sortedcolumnids[J]]
+		
+		if gj_now != gj_next
+			#@info typeof(lnks)
+			# do stuff from gj_last to gj_now / from last_lj to J
+			#@info lj_last, tid_last
+			coll = get_col!(col, lnks, lj_last, tid_last, ctr_last)
+			
+			nns       = merge_into!(rowval, nzval, csc, col, gj_now, coll, colptr[gj_now]-one(Ti))
+			numshifts += nns
+			
+			
+			colptr[gj_now+1:sortedcolumns[J+1]] = csc.colptr[gj_now+1:sortedcolumns[J+1]].+(-csc.colptr[gj_now]+colptr[gj_now] + nns)
+			
+			rowval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1)
+			nzval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1]  = view(csc.nzval, csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1)
+			
+			#rowval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1] = csc.rowval[csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1]
+			#nzval[colptr[gj_now+1]:colptr[sortedcolumns[J+1]]-1]  = csc.nzval[csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1]
+			
+			
+			#for k=csc.colptr[gj_now+1]:csc.colptr[sortedcolumns[J+1]]-1
+			#	k2 = k+(-csc.colptr[gj_now]+colptr[gj_now] + nns)
+			#	rowval[k2] = csc.rowval[k]
+			#	nzval[k2]  = csc.nzval[k]
+			#end	
+			
+			gj_last  = gj_now
+			ctr_last = 0 #tids[sortedcolumnids[J]]]
+		end
+		
+		ctr_last += 1
+		
+		
+	end
+		
+	
+	resize!(rowval, length(csc.rowval)+numshifts)
+	resize!(nzval, length(csc.rowval)+numshifts)
+	
+	
+	SparseArrays.SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval)
+	
+
+	#for ...
+	#	take many columns together if necessary in `get_column`
+	#end
+
+
+
+end
+
+
+"""
+$(SIGNATURES)
+
+Add the SuperSparseMatrixLNK `lnk` onto the SparseMatrixCSC `csc`.
+`gi` maps the indices in `lnk` to the indices of `csc`.
+"""
+function plus_remap(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC, gi::Vector{Ti}) where {Tv, Ti <: Integer}
+
+	#@info lnk.collnk[1:lnk.colctr]
+	
+	
+	supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]]
+	sortedcolumnids    = sortperm(supersparsecolumns)
+	sortedcolumns      = supersparsecolumns[sortedcolumnids]
+	#sortedcolumns      = vcat([1], sortedcolumns)
+	#@info typeof(supersparsecolumns), typeof(sortedcolumns)
+	
+	sortedcolumns      = vcat(sortedcolumns, [Ti(csc.n+1)])
+	
+	#@info typeof(supersparsecolumns), typeof(sortedcolumns)
+	
+	
+	#@info supersparsecolumns
+	#@info sortedcolumns
+	#@info lnk.collnk[1:length(sortedcolumns)-1]
+	#@info lnk.collnk[sortedcolumnids[1:length(sortedcolumns)-1]]
+	
+	col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m]
+	
+	#@info sortedcolumnids 
+	
+	nnz_sum   = length(csc.rowval) + lnk.nnz
+	colptr    = Vector{Ti}(undef, csc.n+1)
+	rowval    = Vector{Ti}(undef, nnz_sum)
+	nzval     = Vector{Tv}(undef, nnz_sum)
+	colptr[1] = one(Ti)
+	
+	#first part: columns between 1 and first column of lnk
+	
+	colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1])
+	rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1)
+	nzval[1:csc.colptr[sortedcolumns[1]]-1]  = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1)
+	
+	numshifts = 0
+	
+	for J=1:length(sortedcolumns)-1
+		i = sortedcolumns[J]
+		
+		coll = get_column!(col, lnk, lnk.collnk[sortedcolumnids[J]] )
+		#@info typeof(i), typeof(coll), typeof(colptr), typeof(colptr[i]), typeof(colptr[i]-1)
+		nns       = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-one(Ti))
+		numshifts += nns
+		
+		
+		colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns)
+		rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
+		nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1]  = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
+		
+		#=
+		for k=csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1
+			k2 = k+(-csc.colptr[i]+colptr[i] + nns)
+			rowval[k2] = csc.rowval[k]
+			nzval[k2]  = csc.nzval[k]
+		end
+		=#
+	end
+	
+	
+	resize!(rowval, length(csc.rowval)+numshifts)
+	resize!(nzval, length(csc.rowval)+numshifts)
+	
+	
+	SparseArrays.SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval)
+
+end
+
+
+"""
+
+function plus(lnk::SparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer}
+	if lnk.nnz == 0
+		return csc
+	elseif length(csc.rowval) == 0
+		return SparseMatrixCSC(lnk)
+	else
+		return lnk + csc
+	end
+end
+
+function plus(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer}
+	gi = collect(1:csc.n)
+	
+	
+	supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]]
+	sortedcolumnids    = sortperm(supersparsecolumns)
+	sortedcolumns      = supersparsecolumns[sortedcolumnids]
+	#sortedcolumns      = vcat([1], sortedcolumns)
+	sortedcolumns      = vcat(sortedcolumns, [csc.n+1])
+	
+	col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m]
+	
+	#@info sortedcolumnids 
+	
+	nnz_sum   = length(csc.rowval) + lnk.nnz
+	colptr    = Vector{Ti}(undef, csc.n+1)
+	rowval    = Vector{Ti}(undef, nnz_sum)
+	nzval     = Vector{Tv}(undef, nnz_sum)
+	colptr[1] = one(Ti)
+	
+	#first part: columns between 1 and first column of lnk
+	
+	colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1])
+	rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1)
+	nzval[1:csc.colptr[sortedcolumns[1]]-1]  = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1)
+	
+	numshifts = 0
+	
+	for J=1:length(sortedcolumns)-1
+		#@info ">>>>>>> J <<<<<<<<<<<<<<<"
+		# insert new added column here / dummy
+		i = sortedcolumns[J]
+		coll = get_column!(col, lnk, i)
+		#print_col(col, coll)
+		
+		nns       = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1)
+		
+		numshifts += nns
+		#j = colptr[i] #sortedcolumns[J]] 
+		#rowval[j] = J
+		#nzval[j]  = J
+		# insertion end
+		
+		#colptr[i+1] = colptr[i] + csc.colptr[i+1]-csc.colptr[i] + numshifts
+		
+		#a = i+1
+		#b = sortedcolumns[J+1]
+		#@info a, b
+		
+		
+		#colptr[i+1:sortedcolumns[J+1]] = (csc.colptr[i+1:sortedcolumns[J+1]]-csc.colptr[i:sortedcolumns[J+1]-1]).+(colptr[i] + nns)
+		
+		colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns)
+		
+		
+		rowval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1] = view(csc.rowval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
+		nzval[colptr[i+1]:colptr[sortedcolumns[J+1]]-1]  = view(csc.nzval, csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1)
+		
+		
+		#=
+		
+		@info csc.colptr[a:b]
+		
+		colptr[a:b] = csc.colptr[a:b].+numshifts
+		
+		#colptr[i+2:sortedcolumns[J+1]] = csc.colptr[i+2:sortedcolumns[J+1]].+numshifts
+		@info i, J, colptr[i+2], colptr[sortedcolumns[J+1]], csc.colptr[i+2], csc.colptr[sortedcolumns[J+1]]
+		@info i, J, colptr[a], colptr[b], csc.colptr[a], csc.colptr[b]
+		rowval[colptr[i+2]:colptr[sortedcolumns[J+1]]] = view(csc.rowval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]])
+		nzval[colptr[i+2]:colptr[sortedcolumns[J+1]]]  = view(csc.nzval, csc.colptr[i+2]:csc.colptr[sortedcolumns[J+1]])
+		#rowval[colptrsortedcolumns[J+1]]
+		=#
+	end
+	
+	#@info colptr
+	
+	resize!(rowval, length(csc.rowval)+numshifts)
+	resize!(nzval, length(csc.rowval)+numshifts)
+	
+	
+	SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval)
+
+		
+
+end
+
+function plus_loop(lnk::SuperSparseMatrixLNK{Tv, Ti}, csc::SparseArrays.SparseMatrixCSC) where {Tv, Ti <: Integer}
+	gi = collect(1:csc.n)
+	
+	supersparsecolumns = gi[lnk.collnk[1:lnk.colctr]]
+	sortedcolumnids    = sortperm(supersparsecolumns)
+	sortedcolumns      = supersparsecolumns[sortedcolumnids]
+	#sortedcolumns      = vcat([1], sortedcolumns)
+	sortedcolumns      = vcat(sortedcolumns, [csc.n+1])
+	
+	col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i=1:csc.m]
+	
+	#@info sortedcolumnids 
+	
+	nnz_sum   = length(csc.rowval) + lnk.nnz
+	colptr    = Vector{Ti}(undef, csc.n+1)
+	rowval    = Vector{Ti}(undef, nnz_sum)
+	nzval     = Vector{Tv}(undef, nnz_sum)
+	colptr[1] = one(Ti)
+	
+	#first part: columns between 1 and first column of lnk
+	
+	colptr[1:sortedcolumns[1]] = view(csc.colptr, 1:sortedcolumns[1])
+	rowval[1:csc.colptr[sortedcolumns[1]]-1] = view(csc.rowval, 1:csc.colptr[sortedcolumns[1]]-1)
+	nzval[1:csc.colptr[sortedcolumns[1]]-1]  = view(csc.nzval, 1:csc.colptr[sortedcolumns[1]]-1)
+	
+	numshifts = 0
+	
+	for J=1:length(sortedcolumns)-1
+		i = sortedcolumns[J]
+		coll = get_column!(col, lnk, i)
+		
+		nns       = merge_into!(rowval, nzval, csc, col, i, coll, colptr[i]-1)
+		numshifts += nns
+		
+		colptr[i+1:sortedcolumns[J+1]] = csc.colptr[i+1:sortedcolumns[J+1]].+(-csc.colptr[i]+colptr[i] + nns)
+		
+		
+		for k=csc.colptr[i+1]:csc.colptr[sortedcolumns[J+1]]-1
+			k2 = k+(-csc.colptr[i]+colptr[i] + nns)
+			rowval[k2] = csc.rowval[k]
+			nzval[k2]  = csc.nzval[k]
+		end
+		
+		
+	end
+	
+	#@info colptr
+	
+	resize!(rowval, length(csc.rowval)+numshifts)
+	resize!(nzval, length(csc.rowval)+numshifts)
+	
+	
+	SparseMatrixCSC(csc.m, csc.n, colptr, rowval, nzval)
+
+		
+
+end
+
+
+function twodisjointsets(n, k)
+	A = rand(1:n, k)
+	B = zeros(Int64, k)
+	done = false
+	ctr = 0
+	while ctr != k
+		v = rand(1:n)
+		if !(v in A)
+			ctr += 1
+			B[ctr] = v
+		end
+	end
+
+	A, B
+end
+
+function distinct(x, n)
+	y = zeros(typeof(x[1]), n)
+	ctr = 0
+	while ctr != n
+		v = rand(x)
+		if !(v in y[1:ctr])
+			ctr += 1
+			y[ctr] = v
+		end
+	end
+	y
+end 
+"""
+
+function mean(x)
+	sum(x)/length(x)
+end
+
+function form(x)
+	[minimum(x), mean(x), maximum(x)]
+end
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/factorizations/blockpreconditioner.jl b/src/factorizations/blockpreconditioner.jl
index 7c97eca..bf0d5de 100644
--- a/src/factorizations/blockpreconditioner.jl
+++ b/src/factorizations/blockpreconditioner.jl
@@ -49,7 +49,7 @@ function update!(precon::BlockPreconditioner)
 
     np=length(precon.partitioning)
     precon.facts=Vector{Any}(undef,np)
-    Threads.@threads for ipart=1:np
+    @tasks for ipart=1:np
         factorization=deepcopy(precon.factorization)
         AP=precon.A[precon.partitioning[ipart],precon.partitioning[ipart]]
         FP=factorization(AP)
@@ -66,11 +66,11 @@ function LinearAlgebra.ldiv!(p::BlockPreconditioner,v)
     np=length(partitioning)
 
     if allow_views(p.factorization)
-        Threads.@threads for ipart=1:np
+        @tasks for ipart=1:np
 	    ldiv!(facts[ipart],view(v,partitioning[ipart]))
         end
     else
-        Threads.@threads for ipart=1:np
+        @tasks for ipart=1:np
             vv=v[partitioning[ipart]]
 	    ldiv!(facts[ipart],vv)
             view(v,partitioning[ipart]).=vv
@@ -85,11 +85,11 @@ function LinearAlgebra.ldiv!(u,p::BlockPreconditioner,v)
     np=length(partitioning)
     
     if allow_views(p.factorization)
-        Threads.@threads for ipart=1:np
+        @tasks for ipart=1:np
 	    ldiv!(view(u,partitioning[ipart]),facts[ipart],view(v,partitioning[ipart]))
         end
     else
-        Threads.@threads for ipart=1:np
+        @tasks for ipart=1:np
             uu=u[partitioning[ipart]]
 	    ldiv!(uu,facts[ipart],v[partitioning[ipart]])
             view(u,partitioning[ipart]).=uu
diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index d278d8b..c9809d3 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -51,6 +51,48 @@ Determine if factorization is a solver or not
 issolver(::AbstractLUFactorization) = true
 issolver(::AbstractPreconditioner) = false
 
+
+
+""""
+    @makefrommatrix(fact)
+
+For an AbstractFactorization `MyFact`, provide methods
+```
+    MyFact(A::ExtendableSparseMatrix; kwargs...)
+    MyFact(A::SparseMatrixCSC; kwargs...)
+```
+"""
+macro makefrommatrix(fact)
+    return quote
+        function $(esc(fact))(A::ExtendableSparseMatrix; kwargs...)
+            factorize!($(esc(fact))(;kwargs...), A)
+        end
+        function $(esc(fact))(A::SparseMatrixCSC; kwargs...)
+            $(esc(fact))(ExtendableSparseMatrix(A); kwargs...)
+        end
+    end
+end
+
+include("ilu0.jl")
+include("iluzero.jl")
+include("parallel_jacobi.jl")
+include("parallel_ilu0.jl")
+include("sparspak.jl")
+include("blockpreconditioner.jl")
+include("jacobi.jl")
+
+@eval begin
+    @makefrommatrix ILU0Preconditioner
+    @makefrommatrix ILUZeroPreconditioner
+    @makefrommatrix PointBlockILUZeroPreconditioner
+    @makefrommatrix JacobiPreconditioner
+    @makefrommatrix ParallelJacobiPreconditioner
+    @makefrommatrix ParallelILU0Preconditioner
+    @makefrommatrix SparspakLU
+    @makefrommatrix UpdateteableBlockpreconditioner
+    @makefrommatrix BlockPreconditioner
+end
+
 """
 ```
 factorize!(factorization, matrix)
@@ -64,9 +106,9 @@ function factorize!(p::AbstractFactorization, A::ExtendableSparseMatrix)
     update!(p)
     p
 end
+factorize!(p::AbstractFactorization, A::SparseMatrixCSC)=factorize!(p,ExtendableSparseMatrix(A))
 
 
-factorize!(p::AbstractFactorization, A::SparseMatrixCSC)=factorize!(p,ExtendableSparseMatrix(A))
 """
 ```
 lu!(factorization, matrix)
@@ -134,45 +176,6 @@ LinearAlgebra.ldiv!(fact::AbstractFactorization, v) = ldiv!(fact.factorization,
 
 
 
-""""
-    @makefrommatrix(fact)
-
-For an AbstractFactorization `MyFact`, provide methods
-```
-    MyFact(A::ExtendableSparseMatrix; kwargs...)
-    MyFact(A::SparseMatrixCSC; kwargs...)
-```
-"""
-macro makefrommatrix(fact)
-    return quote
-        function $(esc(fact))(A::ExtendableSparseMatrix; kwargs...)
-            factorize!($(esc(fact))(;kwargs...), A)
-        end
-        function $(esc(fact))(A::SparseMatrixCSC; kwargs...)
-            $(esc(fact))(ExtendableSparseMatrix(A); kwargs...)
-        end
-    end
-end
-
-include("jacobi.jl")
-include("ilu0.jl")
-include("iluzero.jl")
-include("parallel_jacobi.jl")
-include("parallel_ilu0.jl")
-include("sparspak.jl")
-include("blockpreconditioner.jl")
-
-@eval begin
-    @makefrommatrix ILU0Preconditioner
-    @makefrommatrix ILUZeroPreconditioner
-    @makefrommatrix PointBlockILUZeroPreconditioner
-    @makefrommatrix JacobiPreconditioner
-    @makefrommatrix ParallelJacobiPreconditioner
-    @makefrommatrix ParallelILU0Preconditioner
-    @makefrommatrix SparspakLU
-    @makefrommatrix UpdateteableBlockpreconditioner
-    @makefrommatrix BlockPreconditioner
-end
 
 if USE_GPL_LIBS
     #requires SuiteSparse which is not available in non-GPL builds
diff --git a/src/matrix/abstractextendablesparsematrixcsc.jl b/src/matrix/abstractextendablesparsematrixcsc.jl
new file mode 100644
index 0000000..491ebfb
--- /dev/null
+++ b/src/matrix/abstractextendablesparsematrixcsc.jl
@@ -0,0 +1,299 @@
+"""
+
+Subtypes must implement:
+- SparseArrays.sparse (may be should be sparse! ?) flush+return SparseMatrixCSC
+- Constructor from SparseMatrixCSC
+rawupdateindex!
+reset!: empty all internals, just keep size 
+"""
+
+abstract type AbstractExtendableSparseMatrixCSC{Tv,Ti} <: AbstractSparseMatrixCSC{Tv,Ti} end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and return number of nonzeros in ext.cscmatrix.
+"""
+SparseArrays.nnz(ext::AbstractExtendableSparseMatrixCSC)=nnz(sparse(ext))
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and return nonzeros in ext.cscmatrix.
+"""
+SparseArrays.nonzeros(ext::AbstractExtendableSparseMatrixCSC)=nonzeros(sparse(ext))
+
+Base.size(ext::AbstractExtendableSparseMatrixCSC)=size(ext.cscmatrix)
+
+
+
+"""
+$(SIGNATURES)
+
+Return element type.
+"""
+Base.eltype(::AbstractExtendableSparseMatrixCSC{Tv, Ti}) where {Tv, Ti} = Tv
+
+
+
+"""
+$(SIGNATURES)
+
+ Create SparseMatrixCSC from ExtendableSparseMatrix
+"""
+SparseArrays.SparseMatrixCSC(A::AbstractExtendableSparseMatrixCSC)=sparse(A)
+
+
+
+
+function Base.show(io::IO, ::MIME"text/plain", ext::AbstractExtendableSparseMatrixCSC)
+    A=sparse(ext)
+    xnnz = nnz(A)
+    m, n = size(A)
+    print(io,
+          m,
+          "×",
+          n,
+          " ",
+          typeof(ext),
+          " with ",
+          xnnz,
+          " stored ",
+          xnnz == 1 ? "entry" : "entries")
+
+    if !haskey(io, :compact)
+        io = IOContext(io, :compact => true)
+    end
+
+    if !(m == 0 || n == 0 || xnnz == 0)
+        print(io, ":\n")
+        Base.print_array(IOContext(io), A)
+    end
+end
+
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and return rowvals in ext.cscmatrix.
+"""
+SparseArrays.rowvals(ext::AbstractExtendableSparseMatrixCSC)=rowvals(sparse(ext))
+
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and return colptr of  in ext.cscmatrix.
+"""
+SparseArrays.getcolptr(ext::AbstractExtendableSparseMatrixCSC)=getcolptr(sparse(ext))
+
+    
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and return findnz(ext.cscmatrix).
+"""
+SparseArrays.findnz(ext::AbstractExtendableSparseMatrixCSC)=findnz(sparse(ext))
+
+
+@static if VERSION >= v"1.7"
+    SparseArrays._checkbuffers(ext::AbstractExtendableSparseMatrixCSC)=  SparseArrays._checkbuffers(sparse(ext))
+end
+
+"""
+    A\b
+
+[`\\`](@ref) for ExtendableSparse. It calls the LU factorization form Sparspak.jl, unless GPL components
+are allowed  in the Julia sysimage and the floating point type of the matrix is Float64 or Complex64.
+In that case, Julias standard `\` is called, which is realized via UMFPACK.
+"""
+function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrixCSC{Tv, Ti},
+                          b::AbstractVector) where {Tv, Ti}
+    SparspakLU(sparse(ext)) \ b
+end
+
+
+"""
+$(SIGNATURES)
+
+[`\\`](@ref) for Symmetric{ExtendableSparse}
+"""
+function LinearAlgebra.:\(symm_ext::Symmetric{Tm, T},
+                          b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrixCSC{Tm,Ti}}
+    Symmetric(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ...
+end
+
+"""
+$(SIGNATURES)
+
+[`\\`](@ref) for Hermitian{ExtendableSparse}
+"""
+function LinearAlgebra.:\(symm_ext::Hermitian{Tm, T},
+                          b::AbstractVector) where {Tm, Ti, T<:AbstractExtendableSparseMatrixCSC{Tm,Ti}}
+    Hermitian(sparse(symm_ext.data),Symbol(symm_ext.uplo)) \ b # no ldlt yet ...
+end
+
+if USE_GPL_LIBS
+    for (Tv) in (:Float64, :ComplexF64)
+        @eval begin function LinearAlgebra.:\(ext::AbstractExtendableSparseMatrixCSC{$Tv, Ti},
+                                              B::AbstractVector) where {Ti}
+            sparse(ext) \ B
+        end end
+
+        @eval begin function LinearAlgebra.:\(symm_ext::Symmetric{$Tv,
+                                                                  AbstractExtendableSparseMatrixCSC{
+                                                                      $Tv,
+                                                                      Ti
+                                                                  }},
+            B::AbstractVector) where {Ti}
+            symm_csc = Symmetric(sparse(symm_ext.data), Symbol(symm_ext.uplo))
+            symm_csc \ B
+        end end
+
+        @eval begin function LinearAlgebra.:\(symm_ext::Hermitian{$Tv,
+                                                                  AbstractExtendableSparseMatrixCSC{
+                                                                      $Tv,
+                                                                      Ti
+                                                                  }},
+                                              B::AbstractVector) where {Ti}
+            symm_csc = Hermitian(sparse(symm_ext.data), Symbol(symm_ext.uplo))
+            symm_csc \ B
+        end end
+    end
+end # USE_GPL_LIBS
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and ldiv with ext.cscmatrix
+"""
+function LinearAlgebra.ldiv!(r, ext::AbstractExtendableSparseMatrixCSC, x)
+    LinearAlgebra.ldiv!(r, sparse(ext), x)
+end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and multiply with ext.cscmatrix
+"""
+function LinearAlgebra.mul!(r, ext::AbstractExtendableSparseMatrixCSC, x)
+    LinearAlgebra.mul!(r, sparse(ext), x)
+end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and calculate norm from cscmatrix
+"""
+function LinearAlgebra.norm(A::AbstractExtendableSparseMatrixCSC, p::Real = 2)
+    return LinearAlgebra.norm(sparse(A), p)
+end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and calculate opnorm from cscmatrix
+"""
+function LinearAlgebra.opnorm(A::AbstractExtendableSparseMatrixCSC, p::Real = 2)
+    return LinearAlgebra.opnorm(sparse(A), p)
+end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and calculate cond from cscmatrix
+"""
+function LinearAlgebra.cond(A::AbstractExtendableSparseMatrixCSC, p::Real = 2)
+    return LinearAlgebra.cond(sparse(A), p)
+end
+
+"""
+$(SIGNATURES)
+
+[`flush!`](@ref) and check for symmetry of cscmatrix
+"""
+function LinearAlgebra.issymmetric(A::AbstractExtendableSparseMatrixCSC)
+    return LinearAlgebra.issymmetric(sparse(A))
+end
+    
+    
+
+
+    
+
+function Base.:+(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC
+    T(sparse(A) + sparse(B))
+end
+
+function Base.:-(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC
+    T(sparse(A) - sparse(B))
+end
+
+function Base.:*(A::T, B::T) where T<:AbstractExtendableSparseMatrixCSC
+    T(sparse(A) * sparse(B))
+end
+
+"""
+$(SIGNATURES)
+"""
+function Base.:*(d::Diagonal, ext::T)where T<:AbstractExtendableSparseMatrixCSC
+    return T(d * sparse(ext))
+end
+
+"""
+$(SIGNATURES)
+"""
+function Base.:*(ext::T, d::Diagonal) where  T<:AbstractExtendableSparseMatrixCSC
+    return T(sparse(ext) * d)
+end
+
+
+"""
+$(SIGNATURES)
+
+Add SparseMatrixCSC matrix and [`ExtendableSparseMatrix`](@ref)  ext.
+"""
+function Base.:+(ext::AbstractExtendableSparseMatrixCSC, csc::SparseMatrixCSC)
+    return sparse(ext) + csc
+end
+
+
+"""
+$(SIGNATURES)
+
+Subtract  SparseMatrixCSC matrix from  [`ExtendableSparseMatrix`](@ref)  ext.
+"""
+function Base.:-(ext::AbstractExtendableSparseMatrixCSC, csc::SparseMatrixCSC)
+    return sparse(ext) - csc
+end
+
+"""
+$(SIGNATURES)
+
+Subtract  [`ExtendableSparseMatrix`](@ref)  ext from  SparseMatrixCSC.
+"""
+function Base.:-(csc::SparseMatrixCSC, ext::AbstractExtendableSparseMatrixCSC)
+    return csc - sparse(ext)
+end
+
+"""
+$(SIGNATURES)
+"""
+function SparseArrays.dropzeros!(ext::AbstractExtendableSparseMatrixCSC)
+    dropzeros!(sparse(ext))
+end
+
+
+
+function mark_dirichlet(A::AbstractExtendableSparseMatrixCSC;penalty=1.0e20)
+    mark_dirichlet(sparse(A);penalty)
+end
+
+function eliminate_dirichlet(A::T,dirichlet) where T<:AbstractExtendableSparseMatrixCSC
+   T(eliminate_dirichlet(sparse(A),dirichlet))
+end
+
+function eliminate_dirichlet!(A::AbstractExtendableSparseMatrixCSC,dirichlet)
+    eliminate_dirichlet!(sparse(A),dirichlet)
+    A
+end
diff --git a/src/matrix/abstractsparsematrixextension.jl b/src/matrix/abstractsparsematrixextension.jl
new file mode 100644
index 0000000..c206483
--- /dev/null
+++ b/src/matrix/abstractsparsematrixextension.jl
@@ -0,0 +1,19 @@
+"""
+    $(TYPEDEF)
+
+Abstract type for sparse matrix extension.
+
+Subtypes T_ext must implement:
+
+- Constructor `T_ext(m,n)`
+- `SparseArrays.nnz(ext::T_ext)`
+- `Base.size(ext::T_ext)`
+- `Base.sum(extmatrices::Vector{T_ext}, csx)`:  add csr or csc matrix and extension matrices (one per partition) and return csx matrix
+- `Base.+(ext::T_ext, csx)` (optional)  - Add extension matrix and csc/csr matrix, return csx matrix
+- `rawupdateindex!(ext::Text, op, v, i, j, tid) where {Tv, Ti}`: Set `ext[i,j]op=v`, possibly insert new entry into matrix. `tid` is a
+task or partition id
+
+"""
+abstract type AbstractSparseMatrixExtension{Tv, Ti} <: AbstractSparseMatrix{Tv,Ti} end
+
+Base.:+(ext::AbstractSparseMatrixExtension, csx) = sum([ext],csx) 
diff --git a/src/matrix/extendable.jl b/src/matrix/extendable.jl
index cba26b3..d4c35b5 100644
--- a/src/matrix/extendable.jl
+++ b/src/matrix/extendable.jl
@@ -7,7 +7,7 @@ either in cscmatrix, or in lnkmatrix, never in both.
 
 $(TYPEDFIELDS)
 """
-mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractSparseMatrixCSC{Tv, Ti}
+mutable struct ExtendableSparseMatrixCSC{Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti}
     """
     Final matrix data
     """
@@ -17,122 +17,118 @@ mutable struct ExtendableSparseMatrix{Tv, Ti <: Integer} <: AbstractSparseMatrix
     Linked list structure holding data of extension
     """
     lnkmatrix::Union{SparseMatrixLNK{Tv, Ti}, Nothing}
-
+    
     """
     Pattern hash
     """
     phash::UInt64
 end
 
+
 """
 ```
-ExtendableSparseMatrix(Tv,Ti,m,n)
-ExtendableSparseMatrix(Tv,m,n)
-ExtendableSparseMatrix(m,n)
+ExtendableSparseMatrixCSC(Tv,Ti,m,n)
+ExtendableSparseMatrixCSC(Tv,m,n)
+ExtendableSparseMatrixCSC(m,n)
 ```
-Create empty ExtendableSparseMatrix. This is equivalent to `spzeros(m,n)` for
+Create empty ExtendableSparseMatrixCSC. This is equivalent to `spzeros(m,n)` for
 `SparseMartrixCSC`.
 
 """
 
-function ExtendableSparseMatrix{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
-    ExtendableSparseMatrix{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0)
+function ExtendableSparseMatrixCSC{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
+    ExtendableSparseMatrixCSC{Tv, Ti}(spzeros(Tv, Ti, m, n), nothing, 0)
 end
 
-function ExtendableSparseMatrix(valuetype::Type{Tv},
+function ExtendableSparseMatrixCSC(valuetype::Type{Tv},
                                 indextype::Type{Ti},
                                 m,
                                 n) where {Tv, Ti <: Integer}
-    ExtendableSparseMatrix{Tv, Ti}(m, n)
+    ExtendableSparseMatrixCSC{Tv, Ti}(m, n)
 end
 
-function ExtendableSparseMatrix(valuetype::Type{Tv}, m, n) where {Tv}
-    ExtendableSparseMatrix{Tv, Int}(m, n)
+function ExtendableSparseMatrixCSC(valuetype::Type{Tv}, m, n) where {Tv}
+    ExtendableSparseMatrixCSC{Tv, Int}(m, n)
 end
 
-ExtendableSparseMatrix(m, n) = ExtendableSparseMatrix{Float64, Int}(m, n)
+ExtendableSparseMatrixCSC(m, n) = ExtendableSparseMatrixCSC{Float64, Int}(m, n)
 
 """
 $(SIGNATURES)
 
- Create ExtendableSparseMatrix from SparseMatrixCSC
+Create ExtendableSparseMatrixCSC from SparseMatrixCSC
 """
+function ExtendableSparseMatrixCSC(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
+    ExtendableSparseMatrixCSC{Tv, Ti}(csc, nothing, phash(csc))
+end
 
-function ExtendableSparseMatrix(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
-    return ExtendableSparseMatrix{Tv, Ti}(csc, nothing, phash(csc))
+function ExtendableSparseMatrixCSC{Tv,Ti}(csc::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <: Integer}
+    ExtendableSparseMatrixCSC{Tv, Ti}(csc, nothing, phash(csc))
 end
 
 """
 $(SIGNATURES)
 
- Create ExtendableSparseMatrix from Diagonal
+ Create ExtendableSparseMatrixCSC from Diagonal
 """
-ExtendableSparseMatrix(D::Diagonal) = ExtendableSparseMatrix(sparse(D))
+ExtendableSparseMatrixCSC(D::Diagonal) = ExtendableSparseMatrixCSC(sparse(D))
 
 """
 $(SIGNATURES)
 
- Create ExtendableSparseMatrix from AbstractMatrix, dropping all zero entries.
+ Create ExtendableSparseMatrixCSC from AbstractMatrix, dropping all zero entries.
  This is the equivalent to `sparse(A)`.
 """
-ExtendableSparseMatrix(A::AbstractMatrix) = ExtendableSparseMatrix(sparse(A))
+ExtendableSparseMatrixCSC(A::AbstractMatrix) = ExtendableSparseMatrixCSC(sparse(A))
 
 """
-    ExtendableSparseMatrix(I,J,V)
-    ExtendableSparseMatrix(I,J,V,m,n)
-    ExtendableSparseMatrix(I,J,V,combine::Function)
-    ExtendableSparseMatrix(I,J,V,m,n,combine::Function)
+    ExtendableSparseMatrixCSC(I,J,V)
+    ExtendableSparseMatrixCSC(I,J,V,m,n)
+    ExtendableSparseMatrixCSC(I,J,V,combine::Function)
+    ExtendableSparseMatrixCSC(I,J,V,m,n,combine::Function)
 
-Create ExtendableSparseMatrix from triplet (COO) data.
+Create ExtendableSparseMatrixCSC from triplet (COO) data.
 """
-ExtendableSparseMatrix(I, J, V::AbstractVector) = ExtendableSparseMatrix(sparse(I, J, V))
-function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n)
-    ExtendableSparseMatrix(sparse(I, J, V, m, n))
+ExtendableSparseMatrixCSC(I, J, V::AbstractVector) = ExtendableSparseMatrixCSC(sparse(I, J, V))
+
+function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, m, n)
+    ExtendableSparseMatrixCSC(sparse(I, J, V, m, n))
 end
-function ExtendableSparseMatrix(I, J, V::AbstractVector, combine::Function)
-    ExtendableSparseMatrix(sparse(I, J, V, combine))
+
+function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, combine::Function)
+    ExtendableSparseMatrixCSC(sparse(I, J, V, combine))
 end
-function ExtendableSparseMatrix(I, J, V::AbstractVector, m, n, combine::Function)
-    ExtendableSparseMatrix(sparse(I, J, V, m, n, combine))
+
+function ExtendableSparseMatrixCSC(I, J, V::AbstractVector, m, n, combine::Function)
+    ExtendableSparseMatrixCSC(sparse(I, J, V, m, n, combine))
 end
 
 # THese are probably too much...
-# function Base.transpose(A::ExtendableSparseMatrix)
+# function Base.transpose(A::ExtendableSparseMatrixCSC)
 #     flush!(A)
-#     ExtendableSparseMatrix(Base.transpose(sparse(A)))
+#     ExtendableSparseMatrixCSC(Base.transpose(sparse(A)))
 # end
-# function Base.adjoint(A::ExtendableSparseMatrix)
+# function Base.adjoint(A::ExtendableSparseMatrixCSC)
 #     flush!(A)
-#     ExtendableSparseMatrix(Base.adjoint(sparse(A)))
+#     ExtendableSparseMatrixCSC(Base.adjoint(sparse(A)))
 # end
-# function SparseArrays.sparse(text::LinearAlgebra.Transpose{Tv,ExtendableSparseMatrix{Tv,Ti}}) where {Tv,Ti}
+# function SparseArrays.sparse(text::LinearAlgebra.Transpose{Tv,ExtendableSparseMatrixCSC{Tv,Ti}}) where {Tv,Ti}
 #     transpose(sparse(parent(text)))
 # end
 
 
 
-"""
-$(SIGNATURES)
-
- Create SparseMatrixCSC from ExtendableSparseMatrix
-"""
-function SparseArrays.SparseMatrixCSC(A::ExtendableSparseMatrix)
-    flush!(A)
-    A.cscmatrix
-end
-
-
 """
 $(SIGNATURES)
 
 Create similar but emtpy extendableSparseMatrix
 """
-function Base.similar(m::ExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti}
-    ExtendableSparseMatrix{Tv, Ti}(size(m)...)
+function Base.similar(m::ExtendableSparseMatrixCSC{Tv, Ti}) where {Tv, Ti}
+    ExtendableSparseMatrixCSC{Tv, Ti}(size(m)...)
 end
 
-function Base.similar(m::ExtendableSparseMatrix{Tv, Ti}, ::Type{T}) where {Tv, Ti, T}
-    ExtendableSparseMatrix{T, Ti}(size(m)...)
+function Base.similar(m::ExtendableSparseMatrixCSC{Tv, Ti}, ::Type{T}) where {Tv, Ti, T}
+    ExtendableSparseMatrixCSC{T, Ti}(size(m)...)
 end
 
 """
@@ -144,7 +140,7 @@ search during acces:
 
 ```@example
 using ExtendableSparse # hide
-A=ExtendableSparseMatrix(3,3)
+A=ExtendableSparseMatrixCSC(3,3)
 A[1,2]+=0.1
 A
 ```
@@ -152,7 +148,7 @@ A
 ```@example
 using ExtendableSparse # hide
 
-A=ExtendableSparseMatrix(3,3)
+A=ExtendableSparseMatrixCSC(3,3)
 updateindex!(A,+,0.1,1,2)
 A
 ```
@@ -160,7 +156,7 @@ A
 If `v` is zero, no new entry is created.
 """
 
-function updateindex!(ext::ExtendableSparseMatrix{Tv, Ti},
+function updateindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti},
                       op,
                       v,
                       i,
@@ -182,19 +178,20 @@ $(SIGNATURES)
 Like [`updateindex!`](@ref) but without 
 checking if v is zero.
 """
-function rawupdateindex!(ext::ExtendableSparseMatrix{Tv, Ti},
+function rawupdateindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti},
                          op,
                          v,
                          i,
-                         j) where {Tv, Ti <: Integer}
+                         j,
+                         part=1) where {Tv, Ti <: Integer}
     k = findindex(ext.cscmatrix, i, j)
     if k > 0
         ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
     else
-        if ext.lnkmatrix == nothing
-            ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
-        end
-        rawupdateindex!(ext.lnkmatrix, op, v, i, j)
+            if ext.lnkmatrix == nothing
+                ext.lnkmatrix = SparseMatrixLNK{Tv, Ti}(ext.cscmatrix.m, ext.cscmatrix.n)
+            end
+            rawupdateindex!(ext.lnkmatrix, op, v, i, j)
     end
     ext
 end
@@ -205,7 +202,7 @@ $(SIGNATURES)
 Find index in CSC matrix and set value if it exists. Otherwise,
 set index in extension if `v` is nonzero.
 """
-function Base.setindex!(ext::ExtendableSparseMatrix{Tv, Ti},
+function Base.setindex!(ext::ExtendableSparseMatrixCSC{Tv, Ti},
                         v::Union{Number,AbstractVecOrMat},
                         i::Integer,
                         j::Integer) where {Tv, Ti}
@@ -226,7 +223,7 @@ $(SIGNATURES)
 Find index in CSC matrix and return value, if it exists.
 Otherwise, return value from extension.
 """
-function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti},
+function Base.getindex(ext::ExtendableSparseMatrixCSC{Tv, Ti},
                        i::Integer,
                        j::Integer) where {Tv, Ti <: Integer}
     k = findindex(ext.cscmatrix, i, j)
@@ -235,46 +232,12 @@ function Base.getindex(ext::ExtendableSparseMatrix{Tv, Ti},
     elseif ext.lnkmatrix == nothing
         return zero(Tv)
     else
-        return ext.lnkmatrix[i, j]
+        v=zero(Tv)
+        v=ext.lnkmatrix[i, j]
     end
 end
 
-"""
-$(SIGNATURES)
 
-Size of ExtendableSparseMatrix.
-"""
-Base.size(ext::ExtendableSparseMatrix) = (ext.cscmatrix.m, ext.cscmatrix.n)
-
-"""
-$(SIGNATURES)
-
-Show matrix
-"""
-function Base.show(io::IO, ::MIME"text/plain", ext::ExtendableSparseMatrix)
-    flush!(ext)
-    xnnz = nnz(ext)
-    m, n = size(ext)
-    print(io,
-          m,
-          "×",
-          n,
-          " ",
-          typeof(ext),
-          " with ",
-          xnnz,
-          " stored ",
-          xnnz == 1 ? "entry" : "entries")
-
-    if !haskey(io, :compact)
-        io = IOContext(io, :compact => true)
-    end
-
-    if !(m == 0 || n == 0 || xnnz == 0)
-        print(io, ":\n")
-        Base.print_array(IOContext(io), ext.cscmatrix)
-    end
-end
 
 """
 $(SIGNATURES)
@@ -282,7 +245,7 @@ $(SIGNATURES)
 If there are new entries in extension, create new CSC matrix by adding the
 cscmatrix and linked list matrix and reset the linked list based extension.
 """
-function flush!(ext::ExtendableSparseMatrix)
+function flush!(ext::ExtendableSparseMatrixCSC)
     if ext.lnkmatrix != nothing && nnz(ext.lnkmatrix) > 0
         ext.cscmatrix = ext.lnkmatrix + ext.cscmatrix
         ext.lnkmatrix = nothing
@@ -291,275 +254,33 @@ function flush!(ext::ExtendableSparseMatrix)
     return ext
 end
 
-"""
-$(SIGNATURES)
 
-[`flush!`](@ref) and return number of nonzeros in ext.cscmatrix.
-"""
-function SparseArrays.nnz(ext::ExtendableSparseMatrix)
+function SparseArrays.sparse(ext::ExtendableSparseMatrixCSC)
     flush!(ext)
-    return nnz(ext.cscmatrix)
+    ext.cscmatrix
 end
 
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and return nonzeros in ext.cscmatrix.
-"""
-function SparseArrays.nonzeros(ext::ExtendableSparseMatrix)
-    flush!(ext)
-    return nonzeros(ext.cscmatrix)
-end
 
 """
 $(SIGNATURES)
 
-Return element type.
-"""
-Base.eltype(::ExtendableSparseMatrix{Tv, Ti}) where {Tv, Ti} = Tv
-
+Reset ExtenableSparseMatrix into state similar to that after creation.
 """
-$(SIGNATURES)
-
-[`flush!`](@ref) and return rowvals in ext.cscmatrix.
-"""
-function SparseArrays.rowvals(ext::ExtendableSparseMatrix)
-    flush!(ext)
-    rowvals(ext.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and return colptr of  in ext.cscmatrix.
-"""
-function SparseArrays.getcolptr(ext::ExtendableSparseMatrix)
-    flush!(ext)
-    return getcolptr(ext.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and return findnz(ext.cscmatrix).
-"""
-function SparseArrays.findnz(ext::ExtendableSparseMatrix)
-    flush!(ext)
-    return findnz(ext.cscmatrix)
-end
-
-@static if VERSION >= v"1.7"
-    function SparseArrays._checkbuffers(ext::ExtendableSparseMatrix)
-        flush!(ext)
-        SparseArrays._checkbuffers(ext.cscmatrix)
-    end
-end
-
-"""
-    A\b
-
-[`\\`](@ref) for ExtendableSparse. It calls the LU factorization form Sparspak.jl, unless GPL components
-are allowed  in the Julia sysimage and the floating point type of the matrix is Float64 or Complex64.
-In that case, Julias standard `\` is called, which is realized via UMFPACK.
-"""
-function LinearAlgebra.:\(ext::ExtendableSparseMatrix{Tv, Ti},
-                          b::AbstractVector) where {Tv, Ti}
-    flush!(ext)
-    SparspakLU(ext) \ b
+function reset!(A::ExtendableSparseMatrixCSC)
+    A.cscmatrix=spzeros(size(A)...)
+    A.lnkmatrix=nothing
 end
 
-"""
-$(SIGNATURES)
 
-[`\\`](@ref) for Symmetric{ExtendableSparse}
-"""
-function LinearAlgebra.:\(symm_ext::Symmetric{Tm, ExtendableSparseMatrix{Tm, Ti}},
-                          b::AbstractVector) where {Tm, Ti}
-    symm_ext.data \ b # no ldlt yet ...
-end
 
 """
 $(SIGNATURES)
-
-[`\\`](@ref) for Hermitian{ExtendableSparse}
-"""
-function LinearAlgebra.:\(symm_ext::Hermitian{Tm, ExtendableSparseMatrix{Tm, Ti}},
-                          b::AbstractVector) where {Tm, Ti}
-    symm_ext.data \ B # no ldlt yet ...
-end
-
-if USE_GPL_LIBS
-    for (Tv) in (:Float64, :ComplexF64)
-        @eval begin function LinearAlgebra.:\(ext::ExtendableSparseMatrix{$Tv, Ti},
-                                              B::AbstractVector) where {Ti}
-            flush!(ext)
-            ext.cscmatrix \ B
-        end end
-
-        @eval begin function LinearAlgebra.:\(symm_ext::Symmetric{$Tv,
-                                                                  ExtendableSparseMatrix{
-                                                                                         $Tv,
-                                                                                         Ti
-                                                                                         }},
-                                              B::AbstractVector) where {Ti}
-            flush!(symm_ext.data)
-            symm_csc = Symmetric(symm_ext.data.cscmatrix, Symbol(symm_ext.uplo))
-            symm_csc \ B
-        end end
-
-        @eval begin function LinearAlgebra.:\(symm_ext::Hermitian{$Tv,
-                                                                  ExtendableSparseMatrix{
-                                                                                         $Tv,
-                                                                                         Ti
-                                                                                         }},
-                                              B::AbstractVector) where {Ti}
-            flush!(symm_ext.data)
-            symm_csc = Hermitian(symm_ext.data.cscmatrix, Symbol(symm_ext.uplo))
-            symm_csc \ B
-        end end
-    end
-end # USE_GPL_LIBS
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and ldiv with ext.cscmatrix
-"""
-function LinearAlgebra.ldiv!(r, ext::ExtendableSparse.ExtendableSparseMatrix, x)
-    flush!(ext)
-    return LinearAlgebra.ldiv!(r, ext.cscmatrix, x)
-end
-
 """
-$(SIGNATURES)
-
-[`flush!`](@ref) and multiply with ext.cscmatrix
-"""
-function LinearAlgebra.mul!(r, ext::ExtendableSparse.ExtendableSparseMatrix, x)
-    flush!(ext)
-    return LinearAlgebra.mul!(r, ext.cscmatrix, x)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and calculate norm from cscmatrix
-"""
-function LinearAlgebra.norm(A::ExtendableSparseMatrix, p::Real = 2)
-    flush!(A)
-    return LinearAlgebra.norm(A.cscmatrix, p)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and calculate opnorm from cscmatrix
-"""
-function LinearAlgebra.opnorm(A::ExtendableSparseMatrix, p::Real = 2)
-    flush!(A)
-    return LinearAlgebra.opnorm(A.cscmatrix, p)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and calculate cond from cscmatrix
-"""
-function LinearAlgebra.cond(A::ExtendableSparseMatrix, p::Real = 2)
-    flush!(A)
-    return LinearAlgebra.cond(A.cscmatrix, p)
-end
-
-"""
-$(SIGNATURES)
-
-[`flush!`](@ref) and check for symmetry of cscmatrix
-"""
-function LinearAlgebra.issymmetric(A::ExtendableSparseMatrix)
-    flush!(A)
-    return LinearAlgebra.issymmetric(A.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-
-Add SparseMatrixCSC matrix and [`ExtendableSparseMatrix`](@ref)  ext.
-"""
-function Base.:+(ext::ExtendableSparseMatrix, csc::SparseMatrixCSC)
-    flush!(ext)
-    return ext.cscmatrix + csc
-end
-
-function Base.:+(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix)
-    flush!(A)
-    flush!(B)
-    return ExtendableSparseMatrix(A.cscmatrix + B.cscmatrix)
-end
-
-function Base.:-(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix)
-    flush!(A)
-    flush!(B)
-    return ExtendableSparseMatrix(A.cscmatrix - B.cscmatrix)
-end
-
-function Base.:*(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix)
-    flush!(A)
-    flush!(B)
-    return ExtendableSparseMatrix(A.cscmatrix * B.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-"""
-function Base.:*(d::Diagonal, ext::ExtendableSparseMatrix)
-    flush!(ext)
-    return ExtendableSparseMatrix(d * ext.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-"""
-function Base.:*(ext::ExtendableSparseMatrix, d::Diagonal)
-    flush!(ext)
-    return ExtendableSparseMatrix(ext.cscmatrix * d)
-end
-
-"""
-$(SIGNATURES)
-
-Subtract  SparseMatrixCSC matrix from  [`ExtendableSparseMatrix`](@ref)  ext.
-"""
-function Base.:-(ext::ExtendableSparseMatrix, csc::SparseMatrixCSC)
-    flush!(ext)
-    return ext.cscmatrix - csc
-end
-
-"""
-$(SIGNATURES)
-
-Subtract  [`ExtendableSparseMatrix`](@ref)  ext from  SparseMatrixCSC.
-"""
-function Base.:-(csc::SparseMatrixCSC, ext::ExtendableSparseMatrix)
-    flush!(ext)
-    return csc - ext.cscmatrix
-end
-
-"""
-$(SIGNATURES)
-"""
-function SparseArrays.dropzeros!(ext::ExtendableSparseMatrix)
-    flush!(ext)
-    dropzeros!(ext.cscmatrix)
-end
-
-"""
-$(SIGNATURES)
-"""
-function Base.copy(S::ExtendableSparseMatrix)
+function Base.copy(S::ExtendableSparseMatrixCSC)
     if isnothing(S.lnkmatrix)
-        ExtendableSparseMatrix(copy(S.cscmatrix), nothing, S.phash)
+        ExtendableSparseMatrixCSC(copy(S.cscmatrix), nothing,S.phash)
     else
-        ExtendableSparseMatrix(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash)
+        ExtendableSparseMatrixCSC(copy(S.cscmatrix), copy(S.lnkmatrix), S.phash)
     end
 end
 
@@ -568,7 +289,7 @@ end
 
 Create a pointblock matrix.
 """
-function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti}
+function pointblock(A0::ExtendableSparseMatrixCSC{Tv,Ti},blocksize) where {Tv,Ti}
     A=SparseMatrixCSC(A0)
     colptr=A.colptr
     rowval=A.rowval
@@ -578,7 +299,7 @@ function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti}
     nblock=n÷blocksize
     b=SMatrix{blocksize,blocksize}(block)
     Tb=typeof(b)
-    Ab=ExtendableSparseMatrix{Tb,Ti}(nblock,nblock)
+    Ab=ExtendableSparseMatrixCSC{Tb,Ti}(nblock,nblock)
     
     
     for i=1:n
@@ -597,19 +318,3 @@ function pointblock(A0::ExtendableSparseMatrix{Tv,Ti},blocksize) where {Tv,Ti}
 end
 
 
-function mark_dirichlet(A::ExtendableSparseMatrix;penalty=1.0e20)
-    flush!(A)
-    mark_dirichlet(A.cscmatrix;penalty)
-end
-
-function eliminate_dirichlet(A::ExtendableSparseMatrix,dirichlet)
-    flush!(A)
-    ExtendableSparseMatrix(eliminate_dirichlet(A.cscmatrix,dirichlet))
-end
-
-function eliminate_dirichlet!(A::ExtendableSparseMatrix,dirichlet)
-    flush!(A)
-    eliminate_dirichlet!(A.cscmatrix,dirichlet)
-    A
-end
-
diff --git a/src/matrix/genericextendablesparsematrixcsc.jl b/src/matrix/genericextendablesparsematrixcsc.jl
new file mode 100644
index 0000000..457413e
--- /dev/null
+++ b/src/matrix/genericextendablesparsematrixcsc.jl
@@ -0,0 +1,93 @@
+mutable struct GenericExtendableSparseMatrixCSC{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti}
+    """
+    Final matrix data
+    """
+    cscmatrix::SparseMatrixCSC{Tv, Ti}
+    
+    """
+    Matrix for new entries
+    """
+    xmatrix::Tm
+end
+
+
+function GenericExtendableSparseMatrixCSC{Tm, Tv, Ti}(m::Integer,n::Integer) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti<:Integer}
+    GenericExtendableSparseMatrixCSC(spzeros(Tv, Ti, m, n),
+                                 Tm(m,n)
+                                 )
+end
+
+
+nnznew(ext::GenericExtendableSparseMatrixCSC)=nnz(ext.xmatrix)
+
+function reset!(ext::GenericExtendableSparseMatrixCSC{Tm,Tv,Ti}) where {Tm,Tv,Ti}
+    m,n=size(ext.cscmatrix)
+    ext.cscmatrix=spzeros(Tv, Ti, m, n)
+    ext.xmatrix=Tm(m,n)
+    ext
+end
+
+
+function flush!(ext::GenericExtendableSparseMatrixCSC{Tm,Tv,Ti}) where{Tm,Tv,Ti}
+    if nnz(ext.xmatrix)>0
+        ext.cscmatrix=ext.xmatrix+ext.cscmatrix
+        ext.xmatrix=Tm(size(ext.cscmatrix)...)
+    end
+    ext
+end
+    
+function SparseArrays.sparse(ext::GenericExtendableSparseMatrixCSC)
+    flush!(ext)
+    ext.cscmatrix
+end
+
+function Base.setindex!(ext::GenericExtendableSparseMatrixCSC,
+                        v::Union{Number,AbstractVecOrMat},
+                        i::Integer,
+                        j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = v
+    else
+        setindex!(ext.xmatrix,v,i,j)
+    end
+end
+
+
+function Base.getindex(ext::GenericExtendableSparseMatrixCSC,
+                       i::Integer,
+                       j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k]
+    else
+        getindex(ext.xmatrix,i,j)
+    end
+end
+
+function rawupdateindex!(ext::GenericExtendableSparseMatrixCSC,
+                         op,
+                         v,
+                         i,
+                         j)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        rawupdateindex!(ext.xmatrix,op,v,i,j)
+    end
+end
+
+function updateindex!(ext::GenericExtendableSparseMatrixCSC,
+                         op,
+                         v,
+                         i,
+                         j)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        updateindex!(ext.xmatrix,op,v,i,j)
+    end
+end
+
diff --git a/src/matrix/genericmtextendablesparsematrixcsc.jl b/src/matrix/genericmtextendablesparsematrixcsc.jl
new file mode 100644
index 0000000..88a4d68
--- /dev/null
+++ b/src/matrix/genericmtextendablesparsematrixcsc.jl
@@ -0,0 +1,143 @@
+mutable struct GenericMTExtendableSparseMatrixCSC{Tm<:AbstractSparseMatrixExtension, Tv, Ti <: Integer} <: AbstractExtendableSparseMatrixCSC{Tv, Ti}
+    """
+    Final matrix data
+    """
+    cscmatrix::SparseMatrixCSC{Tv, Ti}
+
+    """
+        Vector of dictionaries for new entries
+    """
+    xmatrices::Vector{Tm}
+
+    colparts::Vector{Ti}
+    partnodes::Vector{Ti}
+end
+
+function GenericMTExtendableSparseMatrixCSC{Tm, Tv, Ti}(n,m,p::Integer=1) where{Tm<:AbstractSparseMatrixExtension, Tv, Ti}
+    GenericMTExtendableSparseMatrixCSC(spzeros(Tv, Ti, m, n),
+                                [Tm(m,n) for i=1:p],
+                                Ti[1,2],
+                                Ti[1,n+1],
+                                )
+end
+
+function partitioning!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti}, colparts, partnodes) where {Tm, Tv, Ti}
+    ext.partnodes=partnodes
+    ext.colparts=colparts
+    ext
+end
+
+
+function reset!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti},p::Integer) where {Tm,Tv,Ti}
+    m,n=size(ext.cscmatrix)
+    ext.cscmatrix=spzeros(Tv, Ti, m, n)
+    ext.xmatrices=[Tm(m,n) for i=1:p]
+    ext.colparts=Ti[1,2]
+    ext.partnodes=Ti[1,n+1]
+    ext
+end
+
+function reset!(ext::GenericMTExtendableSparseMatrixCSC)
+    reset!(ext,length(ext.xmatrices))
+end
+
+
+function flush!(ext::GenericMTExtendableSparseMatrixCSC{Tm,Tv,Ti}) where{Tm,Tv,Ti}
+    ext.cscmatrix=Base.sum(ext.xmatrices, ext.cscmatrix)
+    np=length(ext.xmatrices)
+    (m,n)=size(ext.cscmatrix)
+    ext.xmatrices=[Tm(m,n) for i=1:np]
+    ext
+end
+
+
+function SparseArrays.sparse(ext::GenericMTExtendableSparseMatrixCSC)
+    flush!(ext)
+    ext.cscmatrix
+end
+
+function Base.setindex!(ext::GenericMTExtendableSparseMatrixCSC,
+                        v::Union{Number,AbstractVecOrMat},
+                        i::Integer,
+                        j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = v
+    else
+        error("use rawupdateindex! for new entries into GenericMTExtendableSparseMatrixCSC")
+    end
+end
+
+function Base.getindex(ext::GenericMTExtendableSparseMatrixCSC,
+                       i::Integer,
+                       j::Integer)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        return ext.cscmatrix.nzval[k]
+    elseif sum(nnz,ext.xmatrices) == 0
+        return zero(eltype(ext.cscmatrix))
+    else
+        error("flush! GenericMTExtendableSparseMatrixCSC before using getindex")
+    end
+end
+
+nnznew(ext::GenericMTExtendableSparseMatrixCSC)=sum(nnz,ext.xmatrices)
+
+
+function rawupdateindex!(ext::GenericMTExtendableSparseMatrixCSC,
+                         op,
+                         v,
+                         i,
+                         j,
+                         tid=1)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        rawupdateindex!(ext.xmatrices[tid],op,v,i,j)
+    end
+end
+
+
+function updateindex!(ext::GenericMTExtendableSparseMatrixCSC,
+                      op,
+                      v,
+                      i,
+                      j,
+                      tid=1)
+    k = findindex(ext.cscmatrix, i, j)
+    if k > 0
+        ext.cscmatrix.nzval[k] = op(ext.cscmatrix.nzval[k], v)
+    else
+        updateindex!(ext.xmatrices[tid],op,v,i,j)
+    end
+end
+
+
+
+
+# Needed in 1.9
+function Base.:*(ext::GenericMTExtendableSparseMatrixCSC{Tm, TA} where Tm<:ExtendableSparse.AbstractSparseMatrixExtension, x::Union{StridedVector, BitVector}) where TA
+    mul!(similar(x),ext,x)
+end
+
+function LinearAlgebra.mul!(r, ext::GenericMTExtendableSparseMatrixCSC, x)
+    flush!(ext)
+    A=ext.cscmatrix
+    colparts=ext.colparts
+    partnodes=ext.partnodes
+    rows = SparseArrays.rowvals(A)
+    vals = nonzeros(A)
+    r.=zero(eltype(ext))
+    m,n=size(A)
+    for icol=1:length(colparts)-1
+        @tasks for ip in colparts[icol]:colparts[icol+1]-1
+            @inbounds for inode in  partnodes[ip]:partnodes[ip+1]-1
+                @inbounds for i in nzrange(A,inode)
+                    r[rows[i]]+=vals[i]*x[inode]
+                end
+            end
+        end
+    end
+    r
+end
diff --git a/src/matrix/sparsematrixdict.jl b/src/matrix/sparsematrixdict.jl
new file mode 100644
index 0000000..c5ee469
--- /dev/null
+++ b/src/matrix/sparsematrixdict.jl
@@ -0,0 +1,122 @@
+"""
+    $(TYPEDEF)    
+
+Sparse matrix where entries are organized as dictionary.
+"""
+mutable struct SparseMatrixDict{Tv,Ti} <: AbstractSparseMatrixExtension{Tv,Ti}
+    m::Ti
+    n::Ti
+    values::Dict{Pair{Ti,Ti}, Tv}
+    SparseMatrixDict{Tv,Ti}(m,n) where {Tv,Ti} = new(m,n,Dict{Pair{Ti,Ti}, Tv}())
+end
+
+function reset!(m::SparseMatrixDict{Tv,Ti}) where {Tv,Ti}
+    m.values=Dict{Pair{Ti,Ti}, Tv}()
+end
+
+function Base.setindex!(m::SparseMatrixDict,v,i,j)
+	m.values[Pair(i,j)]=v
+end
+
+function rawupdateindex!(m::SparseMatrixDict{Tv,Ti},op,v,i,j) where {Tv,Ti}
+    p=Pair(i,j)
+    m.values[p]=op(get(m.values, p, zero(Tv)),v)
+end
+
+function Base.getindex(m::SparseMatrixDict{Tv},i,j) where Tv
+    get(m.values,Pair(i,j),zero(Tv)) 
+end
+
+Base.size(m::SparseMatrixDict)=(m.m,m.n)
+
+SparseArrays.nnz(m::SparseMatrixDict)=length(m.values)
+
+function SparseArrays.sparse(mat::SparseMatrixDict{Tv,Ti}) where {Tv,Ti} 
+    l=length(mat.values)
+    I=Vector{Ti}(undef,l)
+    J=Vector{Ti}(undef,l)
+    V=Vector{Tv}(undef,l)
+    i=1
+    for (p,v) in mat.values
+	I[i]=first(p)
+	J[i]=last(p)
+	V[i]=v
+	i=i+1
+    end
+    @static if VERSION>=v"1.10"
+        return SparseArrays.sparse!(I,J,V,size(mat)...,+)
+    else
+        return SparseArrays.sparse(I,J,V,size(mat)...,+)
+    end
+end
+
+function Base.:+(dictmatrix::SparseMatrixDict{Tv,Ti}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} 
+    lnew=length(dictmatrix.values)
+    if lnew>0
+        (;colptr,nzval,rowval,m,n)=cscmatrix
+        l=lnew+nnz(cscmatrix)
+        I=Vector{Ti}(undef,l)
+        J=Vector{Ti}(undef,l)
+        V=Vector{Tv}(undef,l)
+        i=1
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+        
+        for (p,v) in dictmatrix.values
+	    I[i]=first(p)
+	    J[i]=last(p)
+	    V[i]=v
+	    i=i+1
+        end
+        @static if VERSION>=v"1.10"
+            return SparseArrays.sparse!(I,J,V,m,n,+)
+        else
+            return SparseArrays.sparse(I,J,V,m,n,+)
+        end
+    end
+    cscmatrix
+end
+
+function Base.sum(dictmatrices::Vector{SparseMatrixDict{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+    lnew=sum(m->length(m.values),dictmatrices)
+    if lnew>0
+        (;colptr,nzval,rowval,m,n)=cscmatrix
+        l=lnew+nnz(cscmatrix)
+        I=Vector{Ti}(undef,l)
+        J=Vector{Ti}(undef,l)
+        V=Vector{Tv}(undef,l)
+        i=1
+        
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+        
+        ip=1
+        for m in dictmatrices
+            for (p,v) in m.values
+	        I[i]=first(p)
+	        J[i]=last(p)
+	        V[i]=v
+	        i=i+1
+            end
+            ip=ip+1
+        end
+        @static if VERSION>=v"1.10"
+            return SparseArrays.sparse!(I,J,V,m,n,+)
+        else
+            return SparseArrays.sparse(I,J,V,m,n,+)
+        end
+    end
+    return cscmatrix
+end
diff --git a/src/matrix/sparsematrixdilnkc.jl b/src/matrix/sparsematrixdilnkc.jl
new file mode 100644
index 0000000..ea58534
--- /dev/null
+++ b/src/matrix/sparsematrixdilnkc.jl
@@ -0,0 +1,472 @@
+"""
+        $(TYPEDEF)
+    
+    Modification of SparseMatrixLNK where the pointer to first index of
+column j is stored in a dictionary.
+    """
+mutable struct SparseMatrixDILNKC{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti}
+    """
+        Number of rows
+        """
+    m::Ti
+    
+    """
+    Number of columns
+    """
+    n::Ti
+
+    """
+    Number of nonzeros
+    """
+    nnz::Ti
+
+    """
+    Length of arrays
+    """
+    nentries::Ti
+
+    """
+    Linked list of column entries. Initial length is n,
+    it grows with each new entry.
+
+    colptr[index] contains the next
+    index in the list or zero, in the later case terminating the list which
+    starts at index 1<=j<=n for each column j.
+    """
+    colptr::Vector{Ti}
+
+    """
+    Dictionary to store start indices of columns
+    """
+    colstart::Dict{Ti,Ti}
+
+    """
+    Row numbers. For each index it contains the zero (initial state)
+    or the row numbers corresponding to the column entry list in colptr.
+    """
+    rowval::Vector{Ti}
+
+    """
+    Nonzero entry values correspondin to each pair
+    (colptr[index],rowval[index])
+    """
+    nzval::Vector{Tv}
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+function SparseMatrixDILNKC{Tv, Ti}(m, n) where {Tv, Ti <: Integer}
+    SparseMatrixDILNKC{Tv, Ti}(m, n, 0, 0,  zeros(Ti,10), Dict{Ti,Ti}(), zeros(Ti,10), zeros(Ti,10))
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+function SparseMatrixDILNKC(valuetype::Type{Tv}, indextype::Type{Ti}, m,
+                         n) where {Tv, Ti <: Integer}
+    SparseMatrixDILNKC{Tv, Ti}(m, n)
+end
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+SparseMatrixDILNKC(valuetype::Type{Tv}, m, n) where {Tv} = SparseMatrixDILNKC(Tv, Int, m, n)
+
+"""
+$(SIGNATURES)
+    
+Constructor of empty matrix.
+"""
+SparseMatrixDILNKC(m, n) = SparseMatrixDILNKC(Float64, m, n)
+
+"""
+$(SIGNATURES)
+    
+Constructor from SparseMatrixCSC.
+
+"""
+function SparseMatrixDILNKC(csc::SparseArrays.SparseMatrixCSC{Tv, Ti}) where {Tv, Ti <:
+                                                                               Integer}
+    lnk = SparseMatrixDILNKC{Tv, Ti}(csc.m, csc.n)
+    for j = 1:(csc.n)
+        for k = csc.colptr[j]:(csc.colptr[j + 1] - 1)
+            lnk[csc.rowval[k], j] = csc.nzval[k]
+        end
+    end
+    lnk
+end
+
+"""
+$(SIGNATURES)
+    
+Find index in matrix.
+"""
+function findindex(lnk::SparseMatrixDILNKC, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    k = get(lnk.colstart, j, 0)
+    if k==0
+        return 0,0
+    end
+    k0 = k
+    while k > 0
+        if lnk.rowval[k] == i
+            return k, 0
+        end
+        k0 = k
+        k = lnk.colptr[k]
+    end
+    return 0, k0
+end
+
+"""
+$(SIGNATURES)
+    
+Return value stored for entry or zero if not found
+"""
+function Base.getindex(lnk::SparseMatrixDILNKC{Tv, Ti}, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k == 0
+        return zero(Tv)
+    else
+        return lnk.nzval[k]
+    end
+end
+
+"""
+    $(SIGNATURES)
+
+Add entry.
+"""
+function addentry!(lnk::SparseMatrixDILNKC, i, j, k, k0)
+    # increase number of entries
+    lnk.nentries += 1
+    if length(lnk.nzval) < lnk.nentries
+        newsize = Int(ceil(5.0 * lnk.nentries / 4.0))
+        resize!(lnk.nzval, newsize)
+        resize!(lnk.rowval, newsize)
+        resize!(lnk.colptr, newsize)
+    end
+    
+    if k0==0
+        lnk.colstart[j]=lnk.nentries
+    end
+
+    # Append entry if not found
+    lnk.rowval[lnk.nentries] = i
+
+    # Shift the end of the list
+    lnk.colptr[lnk.nentries] = 0
+
+    if k0>0
+        lnk.colptr[k0] = lnk.nentries
+    end
+    
+    # Update number of nonzero entries
+    lnk.nnz += 1
+    return lnk.nentries
+end
+
+"""
+$(SIGNATURES)
+    
+Update value of existing entry, otherwise extend matrix if v is nonzero.
+"""
+function Base.setindex!(lnk::SparseMatrixDILNKC, v, i, j)
+    if !((1 <= i <= lnk.m) & (1 <= j <= lnk.n))
+        throw(BoundsError(lnk, (i, j)))
+    end
+
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = v
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = v
+    end
+    return lnk
+end
+
+"""
+$(SIGNATURES)
+
+Update element of the matrix  with operation `op`. 
+It assumes that `op(0,0)==0`. If `v` is zero, no new 
+entry is created.
+"""
+function updateindex!(lnk::SparseMatrixDILNKC{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+        return lnk
+    end
+    if !iszero(v)
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+"""
+$(SIGNATURES)
+
+Update element of the matrix  with operation `op`. 
+It assumes that `op(0,0)==0`. If `v` is zero a new entry
+is created nevertheless.
+"""
+function rawupdateindex!(lnk::SparseMatrixDILNKC{Tv, Ti}, op, v, i, j) where {Tv, Ti}
+    k, k0 = findindex(lnk, i, j)
+    if k > 0
+        lnk.nzval[k] = op(lnk.nzval[k], v)
+    else
+        k = addentry!(lnk, i, j, k, k0)
+        lnk.nzval[k] = op(zero(Tv), v)
+    end
+    lnk
+end
+
+"""
+$(SIGNATURES)
+
+Return tuple containing size of the matrix.
+"""
+Base.size(lnk::SparseMatrixDILNKC) = (lnk.m, lnk.n)
+
+"""
+$(SIGNATURES)
+
+Return number of nonzero entries.
+"""
+SparseArrays.nnz(lnk::SparseMatrixDILNKC) = lnk.nnz
+
+
+"""
+    $(SIGNATURES)
+Add lnk and csc via interim COO (coordinate) format, i.e. arrays I,J,V.
+"""
+function add_via_COO(lnk::SparseMatrixDILNKC{Tv, Ti},
+                     csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
+    (;colptr,nzval,rowval,m,n)=csc
+    l=nnz(lnk)+nnz(csc)
+    I=Vector{Ti}(undef,l)
+    J=Vector{Ti}(undef,l)
+    V=Vector{Tv}(undef,l)
+    i=1
+    if nnz(csc)>0
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=icsc
+                J[i]=rowval[j]
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+    end
+    for (j,k) in lnk.colstart
+        while k>0
+            I[i]=lnk.rowval[k]
+            J[i]=j
+            V[i]=lnk.nzval[k]
+            k=lnk.colptr[k]
+            i=i+1
+        end
+    end
+    @static if VERSION>=v"1.10"
+        return SparseArrays.sparse!(I,J,V,m,n,+)
+    else
+        return SparseArrays.sparse(I,J,V,m,n,+)
+    end
+end
+
+
+"""
+    $(SIGNATURES)
+Add lnk and csc without creation of intermediate data.
+(to be fixed)
+"""
+function add_directly(lnk::SparseMatrixDILNKC{Tv, Ti},
+                      csc::SparseMatrixCSC)::SparseMatrixCSC where {Tv, Ti <: Integer}
+    @assert(csc.m==lnk.m)
+    @assert(csc.n==lnk.n)
+
+    # overallocate arrays in order to avoid
+    # presumably slower push!
+    xnnz = nnz(csc) + nnz(lnk)
+    colptr = Vector{Ti}(undef, csc.n + 1)
+    rowval = Vector{Ti}(undef, xnnz)
+    nzval = Vector{Tv}(undef, xnnz)
+
+    # Detect the maximum column length of lnk
+    lnk_maxcol = 0
+    for (j,k) in lnk.colstart
+        lcol = zero(Ti)
+        while k > 0
+            lcol += 1
+            k = lnk.colptr[k]
+        end
+        lnk_maxcol = max(lcol, lnk_maxcol)
+    end
+
+    # pre-allocate column  data
+    col = [ColEntry{Tv, Ti}(0, zero(Tv)) for i = 1:lnk_maxcol]
+
+    inz = 1 # counts the nonzero entries in the new matrix
+
+    in_csc_col(jcsc, j) = (nnz(csc) > zero(Ti)) && (jcsc < csc.colptr[j + 1])
+
+    in_lnk_col(jlnk, l_lnk_col) = (jlnk <= l_lnk_col)
+
+    # loop over all columns
+    for j = 1:(csc.n)
+        # Copy extension entries into col and sort them
+        k = get(lnk.colstart, j, 0)
+        l_lnk_col = 0
+        while k > 0
+            if lnk.rowval[k] > 0
+                l_lnk_col += 1
+                col[l_lnk_col] = ColEntry(lnk.rowval[k], lnk.nzval[k])
+            end
+            k = lnk.colptr[k]
+        end
+        sort!(col, 1, l_lnk_col, Base.QuickSort, Base.Forward)
+
+        # jointly sort lnk and csc entries  into new matrix data
+        # this could be replaced in a more transparent manner by joint sorting:
+        # make a joint array for csc and lnk col, sort them.
+        # Will this be faster? 
+
+        colptr[j] = inz
+        jlnk = one(Ti) # counts the entries in col
+        jcsc = csc.colptr[j]  # counts entries in csc
+
+        while true
+            if in_csc_col(jcsc, j) &&
+               (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] < col[jlnk].rowval ||
+                !in_lnk_col(jlnk, l_lnk_col))
+                # Insert entries from csc into new structure
+                rowval[inz] = csc.rowval[jcsc]
+                nzval[inz] = csc.nzval[jcsc]
+                jcsc += 1
+                inz += 1
+            elseif in_csc_col(jcsc, j) &&
+                   (in_lnk_col(jlnk, l_lnk_col) && csc.rowval[jcsc] == col[jlnk].rowval)
+                # Add up entries from csc and lnk
+                rowval[inz] = csc.rowval[jcsc]
+                nzval[inz] = csc.nzval[jcsc] + col[jlnk].nzval
+                jcsc += 1
+                inz += 1
+                jlnk += 1
+            elseif in_lnk_col(jlnk, l_lnk_col)
+                # Insert entries from lnk res. col into new structure
+                rowval[inz] = col[jlnk].rowval
+                nzval[inz] = col[jlnk].nzval
+                jlnk += 1
+                inz += 1
+            else
+                break
+            end
+        end
+    end
+    colptr[csc.n + 1] = inz
+    resize!(rowval, inz - 1)
+    resize!(nzval, inz - 1)
+    SparseMatrixCSC{Tv, Ti}(csc.m, csc.n, colptr, rowval, nzval)
+end
+
+
+
+"""
+    $(SIGNATURES)
+
+Add SparseMatrixCSC matrix and [`SparseMatrixDILNKC`](@ref)  lnk, returning a SparseMatrixCSC
+"""
+#Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = add_directly(lnk, csc)
+Base.:+(lnk::SparseMatrixDILNKC, csc::SparseMatrixCSC) = sum([lnk],csc)
+
+function Base.sum(lnkdictmatrices::Vector{SparseMatrixDILNKC{Tv,Ti}}, cscmatrix::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+    lnew=sum(nnz,lnkdictmatrices)
+    if lnew>0
+        (;colptr,nzval,rowval,m,n)=cscmatrix
+        l=lnew+nnz(cscmatrix)
+        I=Vector{Ti}(undef,l)
+        J=Vector{Ti}(undef,l)
+        V=Vector{Tv}(undef,l)
+        i=1
+        
+        for icsc=1:length(colptr)-1
+            for j=colptr[icsc]:colptr[icsc+1]-1
+                I[i]=rowval[j]
+                J[i]=icsc
+                V[i]=nzval[j]
+                i=i+1
+            end            
+        end
+
+        for lnk in lnkdictmatrices
+            for (j,k) in lnk.colstart
+                while k>0
+                    I[i]=lnk.rowval[k]
+                    J[i]=j
+                    V[i]=lnk.nzval[k]
+                    k=lnk.colptr[k]
+                    i=i+1
+                end
+            end
+        end
+        @assert l==i-1
+        @static if VERSION>=v"1.10"
+            return SparseArrays.sparse!(I,J,V,m,n,+)
+        else
+            return SparseArrays.sparse(I,J,V,m,n,+)
+        end
+    end
+    return cscmatrix
+end
+        
+function reset!(m::SparseMatrixDILNKC{Tv,Ti}) where {Tv,Ti}
+    m.nnz=0
+    m.nentries=0
+    m.colptr=zeros(Ti,10)
+    m.colstart::Dict{Ti,Ti}
+    m.rowval=zeros(Ti,10)
+    m.nzval=zeros(Ti,10)
+    m
+end
+
+
+"""
+$(SIGNATURES)
+    
+Constructor from SparseMatrixDILNKC.
+
+"""
+function SparseArrays.SparseMatrixCSC(lnk::SparseMatrixDILNKC)::SparseMatrixCSC
+    csc = spzeros(lnk.m, lnk.n)
+    lnk + csc
+end
+
+function SparseArrays.sparse(lnk::SparseMatrixDILNKC)
+    lnk + spzeros(lnk.m, lnk.n)
+end
+
+function Base.copy(S::SparseMatrixDILNKC)
+    SparseMatrixDILNKC(size(S, 1),
+                        size(S, 2),
+                        S.nnz,
+                        S.nentries,
+                        copy(S.colptr),
+                        copy(S.colstart),
+                        copy(S.rowvals),
+                        copy(S.nzval))
+end
diff --git a/src/matrix/sparsematrixlnk.jl b/src/matrix/sparsematrixlnk.jl
index bc45abc..2ff3f05 100644
--- a/src/matrix/sparsematrixlnk.jl
+++ b/src/matrix/sparsematrixlnk.jl
@@ -18,7 +18,7 @@ can be conveniently updated via `push!`.  No copying of existing data is necessa
 
 $(TYPEDFIELDS)
 """
-mutable struct SparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrix{Tv, Ti}
+mutable struct SparseMatrixLNK{Tv, Ti <: Integer} <: AbstractSparseMatrixExtension{Tv, Ti}
     """
     Number of rows
     """
@@ -278,7 +278,7 @@ end
 
 # Struct holding pair of value and row
 # number, for sorting
-struct ColEntry{Tv, Ti <: Integer}
+mutable struct ColEntry{Tv, Ti <: Integer}
     rowval::Ti
     nzval::Tv
 end
diff --git a/test/ExperimentalParallel.jl b/test/ExperimentalParallel.jl
new file mode 100644
index 0000000..45b05a9
--- /dev/null
+++ b/test/ExperimentalParallel.jl
@@ -0,0 +1,486 @@
+module ExperimentalParallel
+
+using ExtendableSparse,SparseArrays
+using ExtendableSparse.Experimental
+using BenchmarkTools
+using OhMyThreads: @tasks
+using Test
+
+import ChunkSplitters
+# Methods to test parallel assembly
+# Will eventually become part of the package.
+
+"""
+
+Return colored partitioing of grid made up by `X` and `Y`  for work with `max(nt,4)` threads
+as a vector `p` of a vector pairs of index ranges such that `p[i]` containes partions
+of color i which can be assembled independently.
+
+The current algorithm  creates `nt^2` partitions with `nt` colors.
+"""
+function part2d(X,Y, nt)
+    nt=max(4,nt)
+    XP=collect(ChunkSplitters.chunks(1:length(X)-1,n=nt))
+    YP=collect(ChunkSplitters.chunks(1:length(Y)-1,n=nt))
+    partitions = [Tuple{StepRange{Int64}, StepRange{Int64}}[] for i = 1:nt]
+    ipart=1
+    col=1
+    for jp=1:nt
+        for ip=1:nt
+            push!(partitions[col], (XP[ip], YP[jp]))
+            col=(col -1 +1 )%nt+1
+        end
+        col=(col -1 +2)%nt+1
+    end
+    partitions
+end
+
+function colpart2d(X,Y,nt)
+    Nx=length(X)
+    Ny=length(Y)
+    p=part2d(X,Y,nt)
+    pc=zeros(Int,sum(length,p))
+    jp=1
+    for icol=1:length(p)
+        for ip=1:length(p[icol])
+            pc[jp]=icol
+            jp+=1
+        end
+    end
+    p,pc
+end
+
+
+"""
+    showgrid(Makie, ColorSchemes, X,Y,nt)
+
+Show grid partitioned according to [`part2d`](@ref). Needs a makie variant and ColorSchemes
+to be passed as modules.
+"""
+function showgrid(Makie, ColorSchemes, X,Y,nt)
+    f = Makie.Figure()
+    ax = Makie.Axis(f[1, 1]; aspect = 1)
+    p=part2d(X,Y,nt)
+    ncol=length(p)
+    @show sum(length,p), ncol
+    colors=get(ColorSchemes.rainbow,collect(1:ncol)/ncol)
+    poly=Vector{Makie.Point2f}(undef,4)
+    for icol = 1:ncol
+        for (xp, yp) in p[icol]
+            for j in yp
+                for i in xp
+                    poly[1]=Makie.Point2f(X[i], Y[j])
+                    poly[2]=Makie.Point2f(X[i + 1], Y[j])
+                    poly[3]=Makie.Point2f(X[i + 1], Y[j + 1])
+                    poly[4]=Makie.Point2f(X[i], Y[j + 1])
+                    Makie.poly!(copy(poly),color = colors[icol])
+                end
+            end
+        end
+    end
+    f
+end
+
+
+"""
+
+Assemble edge for finite volume laplacian.
+Used by [`partassemble!`](@ref).
+"""
+function assembleedge!(A,v,k,l)
+    rawupdateindex!(A,+,v,k,k)
+    rawupdateindex!(A,+,-v,k,l)
+    rawupdateindex!(A,+,-v,l,k)
+    rawupdateindex!(A,+,v,l,l)
+end
+
+function assembleedge!(A,v,k,l,tid)
+    rawupdateindex!(A,+,v,k,k,tid)
+    rawupdateindex!(A,+,-v,k,l,tid)
+    rawupdateindex!(A,+,-v,l,k,tid)
+    rawupdateindex!(A,+,v,l,l,tid)
+end
+
+"""
+Assemble finite volume Laplacian + diagnonal term
+on grid cell `i,j`.
+Used by [`partassemble!`](@ref).
+"""
+function assemblecell!(A,lindexes,X,Y,i,j,d)
+    hx=X[i+1]-X[i]
+    hy=Y[j+1]-Y[j]
+    ij00=lindexes[i,j]
+    ij10=lindexes[i+1,j]
+    ij11=lindexes[i+1,j+1]
+    ij01=lindexes[i,j+1]
+    
+    assembleedge!(A,0.5*hx/hy,ij00,ij01)
+    assembleedge!(A,0.5*hx/hy,ij10,ij11)
+    assembleedge!(A,0.5*hy/hx,ij00,ij10)
+    assembleedge!(A,0.5*hy/hx,ij01,ij11)
+    v=0.25*hx*hy
+    rawupdateindex!(A,+,v*d,ij00,ij00)
+    rawupdateindex!(A,+,v*d,ij01,ij01)
+    rawupdateindex!(A,+,v*d,ij10,ij10)
+    rawupdateindex!(A,+,v*d,ij11,ij11)
+end
+
+function assemblecell!(A,lindexes,X,Y,i,j,d,tid)
+    hx=X[i+1]-X[i]
+    hy=Y[j+1]-Y[j]
+    ij00=lindexes[i,j]
+    ij10=lindexes[i+1,j]
+    ij11=lindexes[i+1,j+1]
+    ij01=lindexes[i,j+1]
+    
+    assembleedge!(A,0.5*hx/hy,ij00,ij01,tid)
+    assembleedge!(A,0.5*hx/hy,ij10,ij11,tid)
+    assembleedge!(A,0.5*hy/hx,ij00,ij10,tid)
+    assembleedge!(A,0.5*hy/hx,ij01,ij11,tid)
+    v=0.25*hx*hy
+    rawupdateindex!(A,+,v*d,ij00,ij00,tid)
+    rawupdateindex!(A,+,v*d,ij01,ij01,tid)
+    rawupdateindex!(A,+,v*d,ij10,ij10,tid)
+    rawupdateindex!(A,+,v*d,ij11,ij11,tid)
+end
+
+"""
+
+Assemble finite volume Laplacian + diagnonal term
+on grid cells in partition described by ranges xp,yp.
+Used by [`partassemble!`](@ref).
+"""
+function assemblepartition!(A,lindexes,X,Y,xp,yp,d)
+    for j in yp
+	for i in xp
+	    assemblecell!(A,lindexes,X,Y,i,j,d)
+	end
+    end
+end
+
+function assemblepartition!(A,lindexes,X,Y,xp,yp,d,tid)
+    for j in yp
+	for i in xp
+	    assemblecell!(A,lindexes,X,Y,i,j,d,tid)
+	end
+    end
+end
+
+"""
+    partassemble!(A,N,nt=1;xrange=(0,1),yrange=(0,1), d=0.1)
+
+Partitioned, cellwise, multithreaded assembly of finite difference matrix for
+` -Δu + d*u=f` with homogeneous Neumann bc on grid  set up by coordinate vectors
+`X` and `Y` partitioned for work with `nt` threads
+Does not work during structure setup.
+"""
+function partassemble!(A,X,Y,nt=1;d=0.1)
+    Nx=length(X)
+    Ny=length(Y)
+    size(A,1)==Nx*Ny || error("incompatible size of A")
+    size(A,2)==Nx*Ny || error("incompatible size of A")
+
+    lindexes=LinearIndices((1:Nx,1:Ny))
+    if nt==1
+        assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d)
+    else
+        p=part2d(X,Y,nt)
+        for icol=1:length(p)
+	    @tasks for (xp, yp) in p[icol]
+	        assemblepartition!(A,lindexes,X,Y,xp,yp,d)
+	    end
+        end
+    end
+    flush!(A)
+end
+
+
+function partassemble!(A::Union{MTExtendableSparseMatrixCSC},X,Y,nt=1;d=0.1, reset=true)
+    Nx=length(X)
+    Ny=length(Y)
+
+    size(A,1)==Nx*Ny || error("incompatible size of A")
+    size(A,2)==Nx*Ny || error("incompatible size of A")
+
+    lindexes=LinearIndices((1:Nx,1:Ny))
+    if nt==1
+        reset!(A,1)
+        assemblepartition!(A,lindexes,X,Y,1:Nx-1,1:Nx-1,d,1)
+    else
+        p,pc=colpart2d(X,Y,nt)
+        if reset
+            reset!(A,pc)
+        end
+        jp0=0
+        for icol=1:length(p)
+            npc=length(p[icol])
+	    @tasks for ip=1:npc
+                (xp, yp)=p[icol][ip]
+	        assemblepartition!(A,lindexes,X,Y,xp,yp,d,jp0+ip)
+	    end
+            jp0+=npc
+        end
+    end
+    flush!(A)
+end
+
+
+
+
+"""
+`test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)`
+
+Measure and output times for build and update for a rectangle grid with `n * n` cells.
+Calculations are done on `nt` threads (`nt` >= 1).
+Returns the assembled matrix.
+"""
+function test_ESMP(n, nt; depth=1, Tv=Float64, Ti=Int64, k=10)
+    m = n
+    lindexes = LinearIndices((1:n,1:m))
+    mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti)
+    if nt > 1
+        A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false)
+    else
+        A = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m)
+    end
+
+    X = collect(1:n) #LinRange(0,1,n)
+    Y = collect(1:n) #LinRange(0,1,m)
+
+    #Build
+    times_build = zeros(k)
+    for i=1:k
+        ExtendableSparse.reset!(A)
+        times_build[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false)
+    end
+
+
+
+    #update
+    times_update = zeros(k)
+    for i=1:k
+        times_update[i] = @elapsed assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=true)
+    end
+
+    @info "TIMES:  MIN,  AVG,  MAX"
+    info_minmax(times_build, "build ")
+    info_minmax(times_update, "update")
+    
+    A
+end
+
+
+function test_correctness_build(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10])
+    m = n
+    lindexes = LinearIndices((1:n,1:m))
+    X = collect(1:n) #LinRange(0,1,n)
+    Y = collect(1:n) #LinRange(0,1,m)
+    
+    mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti)
+    
+    A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m)
+    assemble_ESMP(A0, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false)
+
+    for nt in allnp
+        A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false)
+        assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false)
+        @assert A.cscmatrix≈A0.cscmatrix
+    end
+end
+
+
+function speedup_build(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10])
+    m = n
+    lindexes = LinearIndices((1:n,1:m))
+    X = collect(1:n) #LinRange(0,1,n)
+    Y = collect(1:n) #LinRange(0,1,m)
+
+    mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti)
+
+    A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m)
+    t0=@belapsed assemble_ESMP($A0, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) seconds=1 setup=(reset!($A0))
+    
+    result=[]
+
+    for nt in allnp
+        A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false)
+        t=@belapsed assemble_ESMP($A, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) setup=(ExtendableSparse.reset!($A)) seconds=1
+        @assert A.cscmatrix≈A0.cscmatrix
+        push!(result,(nt,round(t0/t,digits=2)))
+    end
+
+    result
+    
+end
+
+
+function speedup_update(n, depth=1, Tv=Float64, Ti=Int64, allnp=[4,5,6,7,8,9,10])
+    m = n
+    lindexes = LinearIndices((1:n,1:m))
+    X = collect(1:n) #LinRange(0,1,n)
+    Y = collect(1:n) #LinRange(0,1,m)
+
+    mat_cell_node, nc, nn = generate_rectangle_grid(lindexes, Ti)
+
+    A0 = ExtendableSparseMatrix{Tv, Ti}(n*m, n*m)
+    assemble_ESMP(A0, n-1, m-1, mat_cell_node, X, Y)
+    t0=@belapsed assemble_ESMP($A0, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) seconds=1 setup=(nonzeros($A0.cscmatrix).=0)
+    
+
+
+    result=[]
+
+    for nt in allnp
+        A = ExtendableSparseMatrixParallel{Tv, Ti}(mat_cell_node, nc, nn, nt, depth; block_struct=false)
+        assemble_ESMP(A, n-1, m-1, mat_cell_node, X, Y; set_CSC_zero=false)
+        t=@belapsed assemble_ESMP($A, $n-1, $m-1, $mat_cell_node, $X, $Y; set_CSC_zero=false) setup=(nonzeros($A.cscmatrix).=0) seconds=1
+        @assert A.cscmatrix≈A0.cscmatrix
+        push!(result,(nt,round(t0/t,digits=2)))
+    end
+
+    result
+    
+end
+
+
+"""
+`generate_rectangle_grid(lindexes, Ti)`
+
+Generate a rectangle grid (i.e. a CellNodes matrix) based on LinerIndices
+"""
+function generate_rectangle_grid(lindexes, Ti)
+    n,m = size(lindexes)
+    nn = n*m # num nodes
+    nc = (n-1)*(m-1)
+    #lindexes=LinearIndices((1:n,1:m))
+
+    mat_cell_node = zeros(Ti, 4, nc)
+
+    # links oben, rechts oben, rechts unten, links unten
+    cell_id = 1
+    for ir in 1:n-1
+        for jr in 1:m-1
+            mat_cell_node[1,cell_id] = lindexes[ir,jr]
+            mat_cell_node[2,cell_id] = lindexes[ir,jr+1]
+            mat_cell_node[3,cell_id] = lindexes[ir+1,jr+1]
+            mat_cell_node[4,cell_id] = lindexes[ir+1,jr]
+            cell_id += 1
+        end
+    end
+
+
+    mat_cell_node, nc, nn
+
+end
+
+function info_minmax(x, name; digits=3)
+    n = length(x)
+    @info name*" $(round(minimum(x),digits=digits)), $(round(sum(x)/n,digits=digits)), $(round(maximum(x),digits=digits))"
+end
+
+"""
+Assembly functions for ExtendableSparseMatrixParallel
+"""
+function assemble_ESMP(A::ExtendableSparseMatrixParallel{Tv, Ti}, n, m, mat_cell_node, X, Y; d=0.1, set_CSC_zero=true) where {Tv, Ti <: Integer}
+    if set_CSC_zero
+        A.cscmatrix.nzval .= 0
+    end
+
+    for level=1:A.depth
+        @tasks for tid=1:A.nt
+            for cell in A.cellsforpart[(level-1)*A.nt+tid]
+                assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, tid)
+            end
+        end
+    end
+
+    for cell in A.cellsforpart[A.depth*A.nt+1]
+        assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell, 1)
+    end
+    nnzCSC, nnzLNK = nnz_noflush(A)
+    if nnzCSC > 0 && nnzLNK > 0	
+        flush!(A; do_dense=false)
+        #sparse flush
+    elseif nnzCSC == 0 && nnzLNK > 0
+        flush!(A; do_dense=true)
+        #dense flush
+    end
+end
+
+
+function assembleedge!(A::ExtendableSparseMatrixParallel{Tv, Ti},v,k,l,tid) where {Tv, Ti <: Integer}
+    addtoentry!(A, k, k, tid, +v)
+    addtoentry!(A, k, l, tid, -v)
+    addtoentry!(A, l, k, tid, -v)
+    addtoentry!(A, l, l, tid, +v)
+end
+
+function assemblecell!(A::ExtendableSparseMatrixParallel{Tv, Ti},n,m,mcn,X,Y,d,cell,tid) where {Tv, Ti <: Integer}
+    ij00=mcn[1,cell]
+    ij10=mcn[2,cell]
+    ij11=mcn[3,cell]
+    ij01=mcn[4,cell]
+    
+    ix = (cell-1)%n+1
+    iy = Int64(ceil(cell/n))
+
+    hx=X[ix+1]-X[ix]
+    hy=Y[iy+1]-Y[iy]
+    
+    assembleedge!(A,0.5*hx/hy,ij00,ij01,tid)
+    assembleedge!(A,0.5*hx/hy,ij10,ij11,tid)
+    assembleedge!(A,0.5*hy/hx,ij00,ij10,tid)
+    assembleedge!(A,0.5*hy/hx,ij01,ij11,tid)
+    v=0.25*hx*hy
+    addtoentry!(A, ij00, ij00, tid, v*d)
+    addtoentry!(A, ij01, ij01, tid, v*d)
+    addtoentry!(A, ij10, ij10, tid, v*d)
+    addtoentry!(A, ij11, ij11, tid, v*d)
+end
+
+
+
+"""
+Assembly functions for ExtendableSparseMatrix
+"""
+function assemble_ESMP(A::ExtendableSparseMatrix{Tv, Ti}, n, m, mat_cell_node, X, Y; d=0.1, set_CSC_zero=true) where {Tv, Ti <: Integer}
+    if set_CSC_zero
+        A.cscmatrix.nzval .= 0
+    end
+    nc = size(mat_cell_node,2)
+    for cell=1:nc
+        assemblecell!(A, n, m, mat_cell_node, X, Y, d, cell)
+    end
+    ExtendableSparse.flush!(A) 
+end
+
+function assembleedge!(A::ExtendableSparseMatrix{Tv, Ti},v,k,l) where {Tv, Ti <: Integer}
+    A[k,k]+=v
+    A[k,l]-=v
+    A[l,k]-=v
+    A[l,l]+=v
+end
+
+function assemblecell!(A::ExtendableSparseMatrix{Tv, Ti},n,m,mcn,X,Y,d,cell) where {Tv, Ti <: Integer}
+    ij00=mcn[1,cell]
+    ij10=mcn[2,cell]
+    ij11=mcn[3,cell]
+    ij01=mcn[4,cell]
+    
+    ix = (cell-1)%n+1
+    iy = Int64(ceil(cell/n))
+
+    hx=X[ix+1]-X[ix]
+    hy=Y[iy+1]-Y[iy]
+    
+    assembleedge!(A,0.5*hx/hy,ij00,ij01)
+    assembleedge!(A,0.5*hx/hy,ij10,ij11)
+    assembleedge!(A,0.5*hy/hx,ij00,ij10)
+    assembleedge!(A,0.5*hy/hx,ij01,ij11)
+    v=0.25*hx*hy
+    A[ij00,ij00]+=v*d
+    A[ij01,ij01]+=v*d
+    A[ij10,ij10]+=v*d
+    A[ij11,ij11]+=v*d
+end
+end
diff --git a/test/Project.toml b/test/Project.toml
index e195dd2..76e3ae8 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,19 +1,27 @@
 [deps]
 AMGCLWrap = "4f76b812-4ba5-496d-b042-d70715554288"
 AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c"
+Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
+ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
+ExtendableGrids = "cfc395e8-590f-11e8-1f13-43a2532b2fa8"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f"
 IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895"
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
 MultiFloats = "bdf0d083-296b-4888-a5b6-7498122e68a5"
+OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [compat]
+ExtendableGrids = "1.9"
 IterativeSolvers = "0.9"
diff --git a/test/femtools.jl b/test/femtools.jl
new file mode 100644
index 0000000..8c7e652
--- /dev/null
+++ b/test/femtools.jl
@@ -0,0 +1,110 @@
+using OhMyThreads: @tasks
+using RecursiveFactorization
+
+function testgrid(N; dim=3)
+    X = range(0, 1; length=N^(1.0 / dim) |> ceil |> Int)
+    simplexgrid((X for i = 1:dim)...)
+end
+
+function coordmatrix!(C, coord, cellnodes, k)
+    spacedim=size(coord,1)
+    celldim=size(cellnodes,1)
+    @inbounds for jj = 1:celldim
+        C[1, jj] = 1
+        @inbounds for ii = 1:spacedim
+            C[ii + 1, jj] = coord[ii, cellnodes[jj, k]]
+        end
+    end
+end
+
+function gradient!(G, C, factdim, I, ipiv)
+    clu = RecursiveFactorization.lu!(C, ipiv, Val(true), Val(false))
+    ldiv!(G, clu, I)
+    abs(det(clu)) / factdim
+end
+
+function scalpro(G, dim, jl, il)
+    s = 0.0
+    @inbounds @simd for k = 1:dim
+        s += G[jl, k + 1] * G[il, k + 1]
+    end
+    return s
+end
+
+function stiffness!(S, dim, G)
+    @inbounds for il = 1:(dim + 1)
+        S[il, il] = scalpro(G, dim, il, il)
+        @inbounds for jl = (il + 1):(dim + 1)
+            S[il, jl] = scalpro(G, dim, jl, il)
+            S[jl, il] = S[il, jl]
+        end
+    end
+    return S
+end
+
+function testassemble!(A_h, grid)
+    coord = grid[Coordinates]
+    cellnodes = grid[CellNodes]
+    ncells = num_cells(grid)
+    dim = size(coord, 1)
+    lnodes = dim + 1
+    factdim::Float64 = factorial(dim)
+    S = zeros(lnodes, lnodes) # local stiffness matrix
+    C = zeros(lnodes, lnodes)  # local coordinate matrix
+    G = zeros(lnodes, lnodes) # shape function gradients
+    ipiv = zeros(Int, lnodes)
+    I = Matrix(Diagonal(ones(lnodes)))
+    ncells = size(cellnodes, 2)
+    for icell = 1:ncells
+        coordmatrix!(C, coord, cellnodes, icell)
+        vol = gradient!(G, C, factdim, I, ipiv)
+        stiffness!(S, dim, G)
+        for il = 1:lnodes
+            i = cellnodes[il, icell]
+            rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i)
+            for jl = 1:lnodes
+                j = cellnodes[jl, icell]
+                rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j)
+            end
+        end
+    end
+    flush!(A_h)
+end
+
+function testassemble_parallel!(A_h, grid)
+    coord = grid[Coordinates]
+    cellnodes = grid[CellNodes]
+    ncells = num_cells(grid)
+    dim = size(coord, 1)
+    lnodes = dim + 1
+    npart = num_partitions(grid)
+    factdim::Float64 = factorial(dim)
+    SS = [zeros(lnodes, lnodes) for i = 1:npart] # local stiffness matrix
+    CC = [zeros(lnodes, lnodes) for i = 1:npart] # local coordinate matrix
+    GG = [zeros(lnodes, lnodes) for i = 1:npart] # shape function gradients
+    IP = [zeros(Int, lnodes) for i = 1:npart] # shape function gradients
+    I = Matrix(Diagonal(ones(lnodes)))
+    ncells = size(cellnodes, 2)
+    for color in pcolors(grid)
+        @tasks for part in pcolor_partitions(grid, color)
+            C = CC[part]
+            S = SS[part]
+            G = GG[part]
+            ipiv = IP[part]
+            for icell in partition_cells(grid, part)
+                coordmatrix!(C, coord, cellnodes, icell)
+                vol = gradient!(G, C, factdim, I, ipiv)
+                stiffness!(S, dim, G)
+                for il = 1:lnodes
+                    i = cellnodes[il, icell]
+                    rawupdateindex!(A_h, +, 0.1 * vol / (dim + 1), i, i, part)
+                    for jl = 1:lnodes
+                        j = cellnodes[jl, icell]
+                        rawupdateindex!(A_h, +, vol * (S[il, jl]), i, j, part)
+                    end
+                end
+            end
+        end
+    end
+    flush!(A_h)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 4d7ae07..b154d1d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,11 +2,46 @@ using Test
 using LinearAlgebra
 using SparseArrays
 using ExtendableSparse
+using ExtendableSparse.Experimental
 using Printf
 using BenchmarkTools
 
 using MultiFloats
 using ForwardDiff
+using ExplicitImports
+
+@testset "ExplicitImports" begin
+    @test ExplicitImports.check_no_implicit_imports(ExtendableSparse, allow_unanalyzable=(ExtendableSparse.Experimental,)) === nothing
+    @test ExplicitImports.check_no_stale_explicit_imports(ExtendableSparse, allow_unanalyzable=(ExtendableSparse.Experimental,)) === nothing
+end
+
+@testset "Parallel" begin
+    include("test_parallel.jl")
+
+    for Tm in [STExtendableSparseMatrixCSC, MTExtendableSparseMatrixCSC, ExtendableSparseMatrix]
+        for N in [10000,20000]
+            test_parallel.test_correctness_build_seq(N,Tm, dim=2)
+        end
+    end
+
+    for Tm in [MTExtendableSparseMatrixCSC]
+        for N in [10000,20000]
+            test_parallel.test_correctness_update(N,Tm, dim=2)
+            test_parallel.test_correctness_build(N,Tm, dim=2)
+            test_parallel.test_correctness_mul(N,Tm,dim=2)
+        end
+    end
+end
+
+@testset "ExperimentalParallel" begin
+    include("ExperimentalParallel.jl")
+    for d=[1,2,3]
+        for N in [10,rand(30:40),50]
+            ExperimentalParallel.test_correctness_build(N,d)
+        end
+    end
+end
+
 
 @testset "Constructors" begin include("test_constructors.jl") end
 
diff --git a/test/test_parallel.jl b/test/test_parallel.jl
new file mode 100644
index 0000000..de26229
--- /dev/null
+++ b/test/test_parallel.jl
@@ -0,0 +1,263 @@
+module test_parallel
+
+using ExtendableSparse, SparseArrays, ExtendableSparse.Experimental
+using BenchmarkTools
+using ExtendableGrids
+#using MKLSparse
+#using SparseMatricesCSR
+using Test
+
+using ExtendableSparse, ExtendableGrids, Metis
+using LinearAlgebra
+using BenchmarkTools
+using Test
+
+include("femtools.jl")
+
+function test_correctness_build_seq(N, Tm::Type{<:AbstractSparseMatrix}; dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    A = Tm{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    testassemble!(A, grid)
+    @test sparse(A0) ≈ sparse(A)
+end
+
+function speedup_build_seq(N, Tm::Type{<:AbstractSparseMatrix}; dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    A = Tm{Float64,Int}(nnodes, nnodes)
+    tbase = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (reset!($A0))
+    tx = @belapsed testassemble!($A, $grid) seconds = 1 setup = (reset!($A))
+    tbase / tx
+end
+
+function test_correctness_update(N,
+                                 Tm::Type{<:AbstractSparseMatrix};
+                                 Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                                 allnp=[10, 15, 20],
+                                 dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
+    A = Tm{Float64,Int}(nnodes, nnodes, 1)
+
+    # Assembele without partitioning
+    # this gives the "base truth" to compare with
+    testassemble_parallel!(A, grid)
+
+    # Save the nonzeros 
+    nz = sort(copy(nonzeros(A)))
+    for np in allnp
+        # Reset the nonzeros, keeping the structure intact
+        nonzeros(A) .= 0
+        # Parallel assembly whith np threads
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
+        reset!(A, np)
+        @show num_partitions_per_color(pgrid)
+        testassemble_parallel!(A, pgrid)
+        @test sort(nonzeros(A)) ≈ nz
+    end
+end
+
+"""
+    test_correctness_build(N)
+
+Test correctness of parallel assembly on NxN grid  during 
+build phase, assuming that no structure has been assembled.
+"""
+function test_correctness_build(N,
+                                Tm::Type{<:AbstractSparseMatrix};
+                                Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                                allnp=[10, 15, 20],
+                                dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
+    # Get the "ground truth"
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    nz = sort(copy(nonzeros(A0)))
+    for np in allnp
+        # Make a new matrix and assemble parallel.
+        # this should result in the same nonzeros
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
+        A = Tm(nnodes, nnodes, num_partitions(pgrid))
+        @show num_partitions_per_color(pgrid)
+        @test check_partitioning(pgrid)
+        testassemble_parallel!(A, pgrid)
+        @test sort(nonzeros(A)) ≈ nz
+    end
+end
+
+function test_correctness_mul(N,
+                              Tm::Type{<:AbstractSparseMatrix};
+                              Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                              allnp=[10, 15, 20],
+                              dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
+    # Get the "ground truth"
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    b = rand(nnodes)
+    A0b = A0 * b
+    for np in allnp
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
+        @test check_partitioning(pgrid)
+        A = Tm(nnodes, nnodes, num_partitions(pgrid))
+        partitioning!(A, pgrid[PColorPartitions],
+                                                    pgrid[PartitionNodes])
+        testassemble_parallel!(A, pgrid)
+        invp = invperm(pgrid[NodePermutation])
+        diff = norm(A0b[invp] - A * b[invp], Inf)
+        @show diff
+        @test diff < sqrt(eps())
+    end
+end
+
+function speedup_update(N,
+                        Tm::Type{<:AbstractSparseMatrix};
+                        Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                        allnp=[10, 15, 20],
+                        dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
+    # Get the "ground truth"
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    nz = copy(nonzeros(A0)) |> sort
+    # Get the base timing
+    # During setup, set matrix entries to zero while keeping  the structure 
+    t0 = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (nonzeros($A0) .= 0)
+    result = []
+    A = Tm(nnodes, nnodes, 1)
+    for np in allnp
+        # Get the parallel timing
+        # During setup, set matrix entries to zero while keeping  the structure
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
+        @show num_partitions_per_color(pgrid)
+        reset!(A, num_partitions(pgrid))
+        testassemble_parallel!(A, pgrid)
+        t = @belapsed testassemble_parallel!($A, $pgrid) seconds = 1 setup = (nonzeros($A) .= 0)
+        @assert sort(nonzeros(A)) ≈ nz
+        push!(result, (np, round(t0 / t; digits=2)))
+    end
+    result
+end
+
+function speedup_build(N,
+                       Tm::Type{<:AbstractSparseMatrix};
+                       Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                       allnp=[10, 15, 20],
+                       dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
+    # Get the "ground truth"
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    nz = nonzeros(A0)
+    reset!(A0)
+    testassemble!(A0, grid)
+    @assert nonzeros(A0) ≈ (nz)
+    nz = sort(nz)
+
+    # Get the base timing
+    # During setup, reset matrix to empty state.
+    t0 = @belapsed testassemble!($A0, $grid) seconds = 1 setup = (reset!($A0))
+
+    result = []
+    A = Tm(nnodes, nnodes, 1)
+    for np in allnp
+        # Get the parallel timing
+        # During setup, reset matrix to empty state.
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
+        reset!(A, num_partitions(pgrid))
+        @show num_partitions_per_color(pgrid)
+        t = @belapsed testassemble_parallel!($A, $pgrid) seconds = 1 setup = (reset!($A,
+                                                                                     num_partitions($pgrid)))
+        @assert sort(nonzeros(A)) ≈ nz
+        push!(result, (np, round(t0 / t; digits=2)))
+    end
+    result
+end
+
+function speedup_mul(N,
+                     Tm::Type{<:AbstractSparseMatrix};
+                     Tp::Type{<:AbstractPartitioningAlgorithm}=PlainMetisPartitioning,
+                     allnp=[10, 15, 20],
+                     dim=3)
+    grid = testgrid(N; dim)
+    nnodes = num_nodes(grid)
+    # Get the "ground truth"
+    A0 = ExtendableSparseMatrix{Float64,Int}(nnodes, nnodes)
+    testassemble!(A0, grid)
+    b = rand(nnodes)
+    t0 = @belapsed $A0 * $b seconds = 1
+    A0b = A0 * b
+    result = []
+    A = Tm(nnodes, nnodes, 1)
+    for np in allnp
+        pgrid = partition(grid, Tp(; npart=np), nodes=true, keep_nodepermutation=true)
+        @show num_partitions_per_color(pgrid)
+        reset!(A, num_partitions(pgrid))
+        testassemble_parallel!(A, pgrid)
+        flush!(A)
+        partitioning!(A, pgrid[PColorPartitions],
+                      pgrid[PartitionNodes])
+        t = @belapsed $A * $b seconds = 1
+        invp = invperm(pgrid[NodePermutation])
+        @assert A0b[invp] ≈ A * b[invp]
+        push!(result, (np, round(t0 / t; digits=2)))
+    end
+    result
+end
+
+#=
+function mymul(A::SparseMatrixCSR,v::AbstractVector)
+    y=copy(v)
+    A.n == size(v, 1) || throw(DimensionMismatch())
+    A.m == size(y, 1) || throw(DimensionMismatch())
+    @tasks for row = 1:size(y, 1)
+        y[row]=0.0
+        @inbounds for nz in nzrange(A,row)
+            col = A.colval[nz]
+            y[row] += A.nzval[nz]*v[col]
+        end
+    end
+    return y
+end
+
+function speedup_csrmul(N; dim=3)
+    grid=testgrid(N;dim)
+    nnodes=num_nodes(grid)
+    # Get the "ground truth"
+    A0=ExtendableSparseMatrix{Float64,Int}(nnodes,nnodes)
+    t00=@belapsed testassemble!($A0,$grid) seconds=1 setup=(reset!($A0))
+
+    reset!(A0)
+    testassemble!(A0,grid)
+    b=rand(nnodes)
+    t0=@belapsed $A0*$b seconds=1
+    A0b=A0*b
+
+    t0x=@belapsed  A0x=sparse(transpose(sparse($A0)))
+
+    A0x=sparse(transpose(sparse(A0)))
+
+    tx=@belapsed A=SparseMatrixCSR{1}(transpose($A0x))
+
+    A=SparseMatrixCSR{1}(transpose(sparse(A0x)))
+    t1=@belapsed $A*$b seconds=1
+
+    t2=@belapsed mymul($A, $b) seconds=1
+
+    @info t00,t0,t0x, tx,t1, t2
+
+    @assert A0b≈A*b
+    t0/t1
+end
+
+=#
+
+end