In [24]:

using BenchmarkTools # included only for comparison
using Distributed

(nprocs() == 1) && addprocs(10) # spawn 10 cores if none are defined

# load using everywhere macro to make sure cores can recognize objects
@everywhere using Graphs
@everywhere using GraphDistanceAlgorithms


In [58]:
# generate a graph
graph = Graphs.SimpleGraphs.random_regular_graph(5000, 5)

{5000, 12500} undirected simple Int64 graph

In [59]:
# spawn arrays - distributed array is default (leave)
dict_arrays = spawn_arrays(
    graph,
    :dijkstra_kary;
)

Dict{Symbol, DistributedArrays.DArray} with 6 entries:
  :heap_data         => [0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0]
  :parents           => [0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0]
  :dists             => [0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0]
  :heap_index        => [0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0]
  :size              => [0 0 … 0 0]
  :heap_index_lookup => [0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0]

# Compare serial and distributed approach
- Simple functions that iterate over each source vertex and calculate a metric


In [60]:
"""
First of two approaches to running Dijkstra: serial (from Graphs.jl)
"""
function dijkstra_serial(
    graph::AbstractGraph,
)
    out = 0
    n = nv(graph)
    
    for i in 1:n
        cur = dijkstra_shortest_paths(
            graph, 
            i
        )
        
        cur = cur.dists

        out += sum(1.0 ./ cur[cur .> 0])
    end
    
    return out
end


"""
Second of two approaches to running Dijkstra: distribute
"""
function dijkstra_distributed(
    graph::AbstractGraph,
    dict_arrays::Dict,
)
    n = nv(graph)
    
    out = @distributed (+) for i in 1:n
        dijkstra_kary!(
            dict_arrays[:dists][:L],
            graph, 
            i; 
            parents = dict_arrays[:parents][:L],
            heap_data = dict_arrays[:heap_data][:L],
            heap_index = dict_arrays[:heap_index][:L],
            heap_index_lookup = dict_arrays[:heap_index_lookup][:L],

        )#.dists

        sum(1.0 ./ dict_arrays[:dists][:L][dict_arrays[:dists][:L] .> 0])
    end
    
    return out
end

dijkstra_distributed

##  Precompile functions

- Since it's Julia, run it once first to allow for compilation

In [61]:
# Run once to compile
@time out_serial = dijkstra_serial(graph)
@time out_parallel = dijkstra_distributed(
    graph, 
    dict_arrays,
)

  8.956889 seconds (434.18 k allocations: 3.614 GiB, 4.32% gc time, 2.66% compilation time)
  2.633227 seconds (75.59 k allocations: 8.741 MiB, 0.91% compilation time)


4.430528995238097e6

##  Now run benchmarks

- Looking at bhat relative performance of the distribtued algorithm will improve in larger graphs + with more processors

In [62]:
@benchmark out_serial = dijkstra_serial(graph_wrapper.graph)

BenchmarkTools.Trial: 58 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m74.853 ms[22m[39m … [35m102.722 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 8.30% … 19.18%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m85.978 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m14.67%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m86.573 ms[22m[39m ± [32m  6.072 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m15.35% ±  4.67%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▃[39m [39m▃[39m [39m [39m [39m█[39m█[39m [39m▃[39m [39m [39m▃[39m▃[39m█[39m [39m [34m [39m[39m [39m [32m▃[39m[39m [39m [39m▃[39m▃[39m [39m [39m [39m [39m▃[39m [39m [39m [39m [39m▃[39m [39m▃[39m [39m [39m▃[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▇[39m▁[39m▁[39m▁[39

In [63]:
@benchmark out_parallel = dijkstra_distributed(
    graph_wrapper.graph, 
    dict_arrays,
)

BenchmarkTools.Trial: 142 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m27.208 ms[22m[39m … [35m57.793 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m32.345 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m35.274 ms[22m[39m ± [32m 6.658 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m1.41% ± 5.66%

  [39m [39m [39m [39m▂[39m [39m█[39m [39m▁[39m▁[39m▄[39m▁[34m [39m[39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▄[39m▆[39m▅[39m█[39m▆[39m█[39m

In [64]:
# there may be minor numerical differences
error_frac = abs((out_parallel - out_serial)/out_serial)

print("out_serial = $(out_serial)\nout_parallel = $(out_parallel)\nerror fraction = $(error_frac)\n")

out_serial = 4.430528995238097e6
out_parallel = 4.430528995238097e6
error fraction = 0.0
