# Parallelize code using native julia methods


This notebook will focus on the use of the functions pmap

- https://github.com/JuliaLang/julia/blob/master/examples/wordcount.jl
- https://blog.ajdecon.org/parallel-word-count-with-julia-an-interesting/

In [283]:
big_array = rand(1:10, 10^8);

In [284]:
function count_elements(array::Array{Int64})
    n = length(array)
    counts = Dict{Int64}{Int64}()
    for i in array
        if i in keys(counts)
            counts[i] += 1 
        else
            counts[i] = 1
        end
    end
    return counts
end



count_elements (generic function with 1 method)

In [285]:
@time count_elements(big_array);

  4.991269 seconds (3.19 k allocations: 137.589 KB)


In [53]:
function count_elements2(array::Array{Int64})
    n = length(array)
    counts = Dict{Int64}{Int64}()
    for i in array
        counts[i] = get(counts,i,0) + 1
    end
    return counts
end

count_elements2 (generic function with 1 method)

In [220]:
@time count_elements2(big_array);

  0.424855 seconds (14 allocations: 1.125 KB)


## Paralelizing dict counts

In [286]:
# reducer
function count_reduce(array_of_count_dicts)
    counts_combined = Dict{Int64}{Int64}()
    
    for d in array_of_count_dicts
        for k in keys(d)
            if k in keys(counts_combined)
                counts_combined[k] += d[k]  
            else
                counts_combined[k] = d[k] 
            end
        end
    end
    return counts_combined
end



count_reduce (generic function with 2 methods)

In [287]:
# 1) Splits input string into nprocs() equal-sized chunks (last one rounds up),
# 2) @spawns wordcount() for each chunk to run in parallel. 
# 3) Then fetch()s results and performs count_reduce().

function parallel_wordcount(big_array, n_processors)
    
    n = length(big_array)
    splits_ind = [Int(x) for x in 1:(n/n_processors):(n+1)]
    big_array_splits = [big_array[x:y-1] for (x,y) in zip(splits_ind[1:end-1], splits_ind[2:end])]
    
    partial_res = []
    for subarray in big_array_splits
        push!(partial_res, @spawn count_elements(subarray) )
    end    
    results = [fetch(partial_res[i]) for i in 1:length(partial_res)]
    return count_reduce(results)
end




parallel_wordcount (generic function with 1 method)

In [289]:
@time r = parallel_wordcount(big_array, 2)

  5.607081 seconds (170 allocations: 762.950 MB, 3.02% gc time)


Dict{Int64,Int64} with 10 entries:
  7  => 10001575
  9  => 9998597
  4  => 10000766
  10 => 9995546
  2  => 10002138
  3  => 10004073
  5  => 9999662
  8  => 10002225
  6  => 9997214
  1  => 9998204

## Let us look at the code piece by piece

In [271]:
# notice in this example n_processors divides n, if not this code will not work
# think how to solve this problem
n = length(big_array)
n_processors = 4
splits_ind = [Int(x) for x in 1:(n/n_processors):(n+1)]
big_array_splits = [big_array[x:y-1] for (x,y) in zip(splits_ind[1:end-1], splits_ind[2:end])]

4-element Array{Array{Int64,1},1}:
 [7,5,2,6,1,1,4,1,6,3  …  9,4,3,10,7,4,8,5,5,3] 
 [3,10,2,8,8,3,6,7,2,9  …  9,8,2,6,10,3,3,4,9,7]
 [6,3,1,5,5,9,5,8,5,8  …  5,1,7,8,5,4,6,1,1,7]  
 [2,5,3,9,3,9,9,4,5,7  …  2,10,5,3,1,8,1,2,1,9] 

In [272]:
partial_res = []
for subarray in big_array_splits
    push!(partial_res, @spawn count_elements(subarray) )
end

In [273]:
results = [fetch(partial_res[i]) for i in 1:length(partial_res)]

4-element Array{Dict{Int64,Int64},1}:
 Dict(7=>250508,4=>249515,9=>249632,10=>250443,2=>250866,3=>249778,5=>249493,8=>249654,6=>249803,1=>250308…)
 Dict(7=>249996,9=>249945,4=>250435,10=>249986,2=>250057,3=>249377,8=>249414,5=>250152,6=>250384,1=>250254…)
 Dict(7=>250889,9=>250171,4=>249754,10=>249813,2=>249146,3=>250076,5=>250093,8=>250303,6=>249935,1=>249820…)
 Dict(7=>250696,9=>250296,4=>249746,10=>249503,2=>250417,3=>250455,5=>250033,8=>249077,6=>249843,1=>249934…)

In [274]:
x = count_reduce(results)

Dict{Int64,Int64} with 10 entries:
  7  => 1002089
  4  => 999450
  9  => 1000044
  10 => 999745
  2  => 1000486
  3  => 999686
  5  => 999771
  8  => 998448
  6  => 999965
  1  => 1000316