# Matrix Assembly

In [1]:
using LinearAlgebra, SparseArrays
using StructArrays

using BenchmarkTools

In [2]:
function generate_mesh(Nel)
    x = 0:1/Nel:1;

    # Collect points (left and right nodes for each element) and edges (connectivity: indices of left and right nodes)
    N = length(x);
    points = collect( [x[i], x[i+1]] for i in 1:N-1) 
    edges  = collect( [i, i+1] for i in 1:N-1); 

    elem_ids = 1:length(edges)
    e_group = ones(size(edges));
    
    #..Set the source function 
    fsource(group_id) = 1;
    fsource_elem = map(fsource, e_group);


    return N, points, edges, x, elem_ids, e_group, fsource_elem
end

generate_mesh (generic function with 1 method)

## Triple Loop

In [3]:
function assemble_matrices(edges, points, N, fsource_elem)
    #..Initialize global matrix and right-hand side value 
    A = zeros(Float64, N, N);
    f = zeros(Float64, N); 

    #..Perform loop over elements and assemble global matrix and vector 
    @inbounds for (i, nodes) = enumerate(edges)
        xl, xr = points[i,:][1]
        h = xr - xl;

        floc = fsource_elem[i] * h / 2 * [1; 1];
        Aloc = 1 / h * [1 -1; -1 1];

        #....perform loop over nodes of the current element
        #....and add local contribution Aloc to global matrix entity A
        for j = 1:2
            I = nodes[j];
            f[I] += floc[j]
            for k = 1:2
                J = nodes[k];
                A[I,J] += Aloc[j,k]
            end
        end

    end

    #..handle the boundary conditions in the matrix and right-hand side vector 
    A[1,:]   = zeros(N);  A[1,1]     = 1;      f[1]   = 0;
    A[end,:] = zeros(N);  A[end,end] = 1;      f[end] = 0;
    
    return A, f
end

assemble_matrices (generic function with 1 method)

In [4]:
N, points, edges, _, _, _, fsource_elem = generate_mesh(10000);

@benchmark A, f = assemble_matrices(edges, points, N, fsource_elem)

BenchmarkTools.Trial: 17 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m182.952 ms[22m[39m … [35m421.822 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 39.53%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m306.971 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m30.79%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m313.496 ms[22m[39m ± [32m 93.300 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m25.75% ± 20.05%

  [39m▁[39m█[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m [39m [39m▁[39m [39m [39m [39m [39m [39m [39m█[34m [39m[39m [39m [39m [39m [39m [39m▁[39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m▁[39m [39m█[39m█[39m [39m▁[39m [39m▁[39m [39m 
  [39m█[39m█[39m█

## Single Loop

In [5]:
function assemble_matrices(edges, points, N, fsource_elem)    
    #..Initialize global matrix and right-hand side value 
    A = zeros(Float64, N, N);
    f = zeros(Float64, N); 

    #..Perform loop over elements and assemble global matrix and vector 
    @inbounds for (i, nodes) = enumerate(edges)
        xl, xr = points[i,:][1]
        h = xr - xl;

        floc = fsource_elem[i] * h / 2 * [1; 1];
        Aloc = 1 / h * [1 -1; -1 1];

        f[nodes] += floc;
        A[nodes, nodes] += Aloc;
    end

    #..handle the boundary conditions in the matrix and right-hand side vector 
    A[1,:]   = zeros(N);  A[1,1]     = 1;      f[1]   = 0;
    A[end,:] = zeros(N);  A[end,end] = 1;      f[end] = 0;
    
    return A, f
end

assemble_matrices (generic function with 1 method)

In [6]:
N, points, edges, _, _, _, fsource_elem = generate_mesh(10000);

@benchmark A = assemble_matrices(edges, points, N, fsource_elem)

BenchmarkTools.Trial: 18 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m145.762 ms[22m[39m … [35m302.383 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 1.27% … 47.33%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m237.718 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m24.53%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m232.572 ms[22m[39m ± [32m 46.201 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m24.78% ± 15.49%

  [39m▁[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m [39m▁[39m [39m [39m [39m▁[39m [39m [39m [39m [39m▁[39m [39m [39m [39m▁[39m [39m [39m [39m [32m▁[39m[39m█[34m [39m[39m [39m▁[39m▁[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m█[39m▁[39m [39m [39m [39m [39m [39m▁[39m [39m 
  [39m█[39m█[39m▁

## Single Loop with Pre-allocation

## Single Loop with Sparse

In [7]:
struct Element
    p1::Float64
    p2::Float64
    e1::Int64
    e2::Int64
end 

function assemble_matrices(mesh, N, fsource_elem)
    # Initialize vectors for sparse matrix construction
    I = zeros(Int64, 4 * (N - 1))
    J = similar(I);
    V = zeros(Float64, length(I));
    
    # Initialize right-hand side vector
    f = zeros(Float64, N); 
    
    #..Perform loop over elements and assemble global matrix and vector 
    @inbounds for (i, el) = enumerate(mesh)
        idx = 4*(i-1) + 1 : 4*i;
        xl = el.p1; xr = el.p2;
        h  = xr - xl;
        
        j  = el.e1;
        k  = el.e2;
        
        #
        f[[j, k]] += fsource_elem[i] * h / 2 * [1; 1];
        
        # Matrix contribution
        I[idx] = [j, k, j, k];
        J[idx] = [j, j, k, k];
        V[idx] = 1/h * [1 -1 -1 1];
    end
    
    A = sparse(I, J, V)
    
    #..handle the boundary conditions in the matrix and right-hand side vector 
    A[1,:]   = zeros(N);  A[1,1]     = 1;      f[1]   = 0;
    A[end,:] = zeros(N);  A[end,end] = 1;      f[end] = 0;
    
    return A, f
end

assemble_matrices (generic function with 2 methods)

In [8]:
N, _, _, x, _, _, fsource_elem = generate_mesh(10000);
mesh = StructArray{Element}((x[1:end-1], x[2:end], Vector(1:N-1), Vector(2:N)))

@benchmark A, f = assemble_matrices(mesh, N, fsource_elem)

BenchmarkTools.Trial: 1014 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m3.677 ms[22m[39m … [35m10.053 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 48.56%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m4.367 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m4.928 ms[22m[39m ± [32m 1.459 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m15.84% ± 18.49%

  [39m█[39m█[39m▆[39m▃[39m▂[39m [39m [34m▃[39m[39m▂[39m▁[39m▅[39m▅[39m▄[32m▂[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▄[39m▄[39m▃[39m▂[39m▂[39m▁[39m [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m█[39m█[39m█[39m█[39m█[39m▇[

## No Loop
This does not seem to be a very efficient use of ``mapreduce``, next step: study the use of ``map()`` and ``mapreduce()`` and maybe write about this in a notebook about general Julia usage?

Note: still missing assembly of $f$ vector

In [9]:
function compute_element_area(elem_id, e, p)
    area_id = p[e[elem_id][2]] - p[e[elem_id][1]]
    return area_id
end

compute_element_area (generic function with 1 method)

In [10]:
function assemble_matrices(elem_ids, edges, x, N)
    Atempl = [1, -1, -1, 1];
    
    # Calculate element length h
    elem_area = map(elem_id -> compute_element_area(elem_id, edges, x), elem_ids);
    
    # Generate index vectors
    I = mapreduce(e -> e[[1, 2, 1, 2]], vcat, edges)
    J = mapreduce(e -> e[[1, 1, 2, 2]], vcat, edges)
    
    # Generate matrix contributions
    V = mapreduce(h -> Atempl / h, vcat, elem_area);
    
    return sparse(I, J, V, N, N)
end

assemble_matrices (generic function with 2 methods)

In [11]:
N, points, edges, x, elem_ids = generate_mesh(10000);

@benchmark A = assemble_matrices(elem_ids, edges, x, length(x))

BenchmarkTools.Trial: 92 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m42.685 ms[22m[39m … [35m220.885 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m18.40% … 71.70%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m51.008 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m18.25%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m54.739 ms[22m[39m ± [32m 19.227 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m20.89% ±  5.69%

  [39m▁[39m [39m▆[39m▃[39m [39m▆[39m [39m█[39m [39m▆[39m▃[39m [39m▆[34m▃[39m[39m▁[39m [39m [39m▃[39m▃[39m [32m [39m[39m▃[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▃[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m▄[39m█[39m█[39