# Matrix Assembly

In [1]:
using LinearAlgebra, SparseArrays
using BenchmarkTools

In [2]:
function generate_mesh(Nel)
    x = 0:1/Nel:1;

    # Collect points (left and right nodes for each element) and edges (connectivity: indices of left and right nodes)
    N = length(x);
    points = collect( [x[i], x[i+1]] for i in 1:N-1) 
    edges  = collect( [i, i+1] for i in 1:N-1); 

    elem_ids = 1:length(edges)
    e_group = ones(size(edges));
    
    #..Set the source function 
    fsource(group_id) = 1;
    fsource_elem = map(fsource, e_group);


    return N, points, edges, x, elem_ids, e_group, fsource_elem
end

generate_mesh (generic function with 1 method)

## Triple Loop

In [3]:
function assemble_matrices(edges, points, N, fsource_elem)
    #..Initialize global matrix and right-hand side value 
    A = zeros(Float64, N, N);
    f = zeros(Float64, N); 

    #..Perform loop over elements and assemble global matrix and vector 
    for (i, nodes) = enumerate(edges)
        xl, xr = points[i,:][1]
        h = xr - xl;

        floc = fsource_elem[i] * h / 2 * [1; 1];
        Aloc = 1 / h * [1 -1; -1 1];

        #....perform loop over nodes of the current element
        #....and add local contribution Aloc to global matrix entity A
        for j = 1:2
            I = nodes[j];
            f[I] += floc[j]
            for k = 1:2
                J = nodes[k];
                A[I,J] += Aloc[j,k]
            end
        end

    end

    #..handle the boundary conditions in the matrix and right-hand side vector 
    A[1,:]   = zeros(N);  A[1,1]     = 1;      f[1]   = 0;
    A[end,:] = zeros(N);  A[end,end] = 1;      f[end] = 0;
    
    return A, f
end

assemble_matrices (generic function with 1 method)

In [4]:
N, points, edges, _, _, _, fsource_elem = generate_mesh(10000);

@benchmark A, f = assemble_matrices(edges, points, N, fsource_elem)

BenchmarkTools.Trial: 15 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m138.615 ms[22m[39m … [35m   1.099 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.11% …  9.37%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m274.242 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m22.36%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m299.739 ms[22m[39m ± [32m228.828 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m19.96% ± 15.94%

  [39m▃[39m [39m [39m [39m [39m▃[34m [39m[39m [39m▃[39m█[32m [39m[39m▃[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m▇[39m▁

## Single Loop

In [5]:
function assemble_matrices(edges, points, N, fsource_elem)    
    #..Initialize global matrix and right-hand side value 
    A = zeros(Float64, N, N);
    f = zeros(Float64, N); 

    #..Perform loop over elements and assemble global matrix and vector 
    for (i, nodes) = enumerate(edges)
        xl, xr = points[i,:][1]
        h = xr - xl;

        floc = fsource_elem[i] * h / 2 * [1; 1];
        Aloc = 1 / h * [1 -1; -1 1];

        f[nodes] += floc;
        A[nodes, nodes] += Aloc;
    end

    #..handle the boundary conditions in the matrix and right-hand side vector 
    A[1,:]   = zeros(N);  A[1,1]     = 1;      f[1]   = 0;
    A[end,:] = zeros(N);  A[end,end] = 1;      f[end] = 0;
    
    return A, f
end

assemble_matrices (generic function with 1 method)

In [6]:
N, points, edges, _, _, _, fsource_elem = generate_mesh(10000);

@benchmark A = assemble_matrices(edges, points, N, fsource_elem)

BenchmarkTools.Trial: 17 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m160.367 ms[22m[39m … [35m342.028 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.32% … 33.44%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m270.010 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m21.31%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m269.316 ms[22m[39m ± [32m 44.885 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m23.41% ± 10.07%

  [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m [39m▁[39m▁[39m [39m [39m▁[39m▁[34m▁[39m[39m [39m█[32m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m [39m [39m [39m [39m [39m▁[39m▁[39m [39m [39m [39m▁[39m [39m [39m▁[39m [39m 
  [39m█[39m▁[39m▁

## Single Loop with Pre-allocation

## Single Loop with Sparse

In [7]:
function assemble_matrices(edges, points, N, fsource_elem)
    # Initialize vectors for sparse matrix construction
    I = zeros(Int64, 4*length(edges))
    J = similar(I);
    V = zeros(Float64, length(I));
    
    # Initialize right-hand side vector
    f = zeros(Float64, N); 
    
    #..Perform loop over elements and assemble global matrix and vector 
    for (i, nodes) = enumerate(edges)
        idx = 4*(i-1) + 1 : 4*i;
        h   = points[i][2] - points[i][1];
        
        #
        f[nodes] += fsource_elem[i] * h / 2 * [1; 1];
        
        # Matrix contribution
        I[idx] = nodes[[1, 2, 1, 2]];
        J[idx] = nodes[[1, 1, 2, 2]];
        V[idx] = 1/h * [1 -1 -1 1];
    end
    
    A = sparse(I, J, V)
    
    #..handle the boundary conditions in the matrix and right-hand side vector 
    A[1,:]   = zeros(N);  A[1,1]     = 1;      f[1]   = 0;
    A[end,:] = zeros(N);  A[end,end] = 1;      f[end] = 0;
    
    return A, f
end

assemble_matrices (generic function with 1 method)

In [8]:
N, points, edges, _, _, _, fsource_elem = generate_mesh(10000);

@benchmark A, f = assemble_matrices(edges, points, N, fsource_elem)

BenchmarkTools.Trial: 865 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m4.058 ms[22m[39m … [35m24.755 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 78.61%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m5.272 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m5.774 ms[22m[39m ± [32m 1.858 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m14.77% ± 17.11%

  [39m█[39m█[39m▄[39m [39m [39m [39m [39m [39m [34m [39m[39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m█[39m█[39m█[39m▅[39m▃[39m▄[3

## No Loop
This does not seem to be a very efficient use of ``mapreduce``, next step: study the use of ``map()`` and ``mapreduce()`` and maybe write about this in a notebook about general Julia usage?

Note: still missing assembly of $f$ vector

In [9]:
function compute_element_area(elem_id, e, p)
    area_id = p[e[elem_id][2]] - p[e[elem_id][1]]
    return area_id
end

compute_element_area (generic function with 1 method)

In [10]:
function assemble_matrices(elem_ids, edges, x, N)
    Atempl = [1, -1, -1, 1];
    
    # Calculate element length h
    elem_area = map(elem_id -> compute_element_area(elem_id, edges, x), elem_ids);
    
    # Generate index vectors
    I = mapreduce(e -> e[[1, 2, 1, 2]], vcat, edges)
    J = mapreduce(e -> e[[1, 1, 2, 2]], vcat, edges)
    
    # Generate matrix contributions
    V = mapreduce(h -> Atempl / h, vcat, elem_area);
    
    return sparse(I, J, V, N, N)
end

assemble_matrices (generic function with 1 method)

In [11]:
N, points, edges, x, elem_ids = generate_mesh(10000);
@benchmark A = assemble_matrices(elem_ids, edges, x, length(x))

BenchmarkTools.Trial: 92 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m42.413 ms[22m[39m … [35m212.333 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m17.89% … 74.96%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m50.816 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m18.37%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m54.790 ms[22m[39m ± [32m 18.693 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m20.93% ±  6.12%

  [39m [39m▃[39m█[39m [39m▁[39m▃[39m [39m█[39m [39m [39m [39m▁[34m▄[39m[39m [39m▁[39m [39m [39m [32m [39m[39m [39m [39m [39m▁[39m [39m [39m [39m▃[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▆[39m█[39m█[39m▄[39