In [1]:
using BenchmarkTools
using LinearAlgebra

### Solution

In [2]:
V(x) = 1.0/x^5 # no need for inline here.

struct mine{F<:Function} # doesn't need to be mutable, also parametrize on function type
    x   :: Float64
    Pot :: F
end

W     = mine(21.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_from_glob(r,glob::mine)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    ri  = zeros(T, ncols)
    rij = zeros(T, ncols)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        for id in 1:3
            ri[id] = r[i,id]
        end
        for j in i+1:N
            aux = zero(T)
            for jd in 1:3
                aux += (ri[jd] - r[j,jd])^2
            end
            rr  = sqrt(aux)
            Ep += glob.Pot(rr)
        end
    end
    Ep
end; 

In [3]:
@btime Epot_from_glob(r,W)

  72.052 μs (3 allocations: 240 bytes)


7.5259324525120165e6

Notice that the version from below uses less memory (it does not allocate ri or rij) but it is actually a bit slower than the previous version

In [9]:
function Epot_opt(r,glob::mine)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        for j in i+1:N
            aux = zero(T)
            for jd in 1:3
                aux += (r[i,jd] - r[j,jd])^2
            end
            rr  = sqrt(aux)
            Ep += glob.Pot(rr)
        end
    end
    Ep
end; 

In [10]:
@btime Epot_opt(r,W)

  75.634 μs (1 allocation: 16 bytes)


7.5259324525120165e6

In [13]:
function Epot_opt(r,glob::mine)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        ri = view(r,i,:)
        for j in i+1:N
            aux = zero(T)
            for jd in 1:3
                aux += (ri[jd] - r[j,jd])^2
            end
            rr  = sqrt(aux)
            Ep += glob.Pot(rr)
        end
    end
    Ep
end; 

In [14]:
@btime Epot_opt(r,W)

  77.201 μs (1 allocation: 16 bytes)


7.5259324525120165e6

In [21]:
function Epot_opt(r,glob::mine)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        ri = view(r,i,:)
        for j in i+1:N
            rj = view(r,j,:)
            aux = zero(T)
            for jd in 1:3
                aux += (ri[jd] - rj[jd])^2
            end
            rr  = sqrt(aux)
            Ep += glob.Pot(rr)
        end
    end
    Ep
end; 

In [22]:
@btime Epot_opt(r,W)

  75.426 μs (1 allocation: 16 bytes)


7.5259324525120165e6

In [25]:
function Epot_opt(r,glob::mine)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        ri = view(r,i,:)
        for j in i+1:N
            rj = view(r,j,:)
            aux = zero(T)
            for jd in 1:3
                aux += (ri[jd] - rj[jd])^2
            end
            rr  = sqrt(aux)
            Ep += glob.Pot(rr)
        end
    end
    Ep
end; 

In [26]:
@btime Epot_opt(r,W)

  75.445 μs (1 allocation: 16 bytes)


7.5259324525120165e6

In [20]:
?reduce

search: [0m[1mr[22m[0m[1me[22m[0m[1md[22m[0m[1mu[22m[0m[1mc[22m[0m[1me[22m map[0m[1mr[22m[0m[1me[22m[0m[1md[22m[0m[1mu[22m[0m[1mc[22m[0m[1me[22m



```
reduce(op, itr; [init])
```

Reduce the given collection `itr` with the given binary operator `op`. If provided, the initial value `init` must be a neutral element for `op` that will be returned for empty collections. It is unspecified whether `init` is used for non-empty collections.

For empty collections, providing `init` will be necessary, except for some special cases (e.g. when `op` is one of `+`, `*`, `max`, `min`, `&`, `|`) when Julia can determine the neutral element of `op`.

Reductions for certain commonly-used operators may have special implementations, and should be used instead: `maximum(itr)`, `minimum(itr)`, `sum(itr)`, `prod(itr)`,  `any(itr)`, `all(itr)`.

The associativity of the reduction is implementation dependent. This means that you can't use non-associative operations like `-` because it is undefined whether `reduce(-,[1,2,3])` should be evaluated as `(1-2)-3` or `1-(2-3)`. Use [`foldl`](@ref) or [`foldr`](@ref) instead for guaranteed left or right associativity.

Some operations accumulate error. Parallelism will be easier if the reduction can be executed in groups. Future versions of Julia might change the algorithm. Note that the elements are not reordered if you use an ordered collection.

# Examples

```jldoctest
julia> reduce(*, [2; 3; 4])
24

julia> reduce(*, [2; 3; 4]; init=-1)
-24
```

---

```
reduce(f, A; dims=:, [init])
```

Reduce 2-argument function `f` along dimensions of `A`. `dims` is a vector specifying the dimensions to reduce, and the keyword argument `init` is the initial value to use in the reductions. For `+`, `*`, `max` and `min` the `init` argument is optional.

The associativity of the reduction is implementation-dependent; if you need a particular associativity, e.g. left-to-right, you should write your own loop or consider using [`foldl`](@ref) or [`foldr`](@ref). See documentation for [`reduce`](@ref).

# Examples

```jldoctest
julia> a = reshape(Vector(1:16), (4,4))
4×4 Array{Int64,2}:
 1  5   9  13
 2  6  10  14
 3  7  11  15
 4  8  12  16

julia> reduce(max, a, dims=2)
4×1 Array{Int64,2}:
 13
 14
 15
 16

julia> reduce(max, a, dims=1)
1×4 Array{Int64,2}:
 4  8  12  16
```


In [10]:
function Epot_from_glob_view(r,glob::mine)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    #ri  = zeros(T, ncols) # don't need this with views
    rij = zeros(T, ncols) 
    
    N   = size(r,1)
    
    @inbounds for i in 1:N-1 # disable bounds checks
        #for id in 1:3
        #    ri[id] = r[i,id]
        #end
        ri = view(r,i,:)
        for j in i+1:N
            aux = zero(T)
            rij .= ri .- r[j,:]
            rr  = norm(rij)
            Ep += glob.Pot(rr)
        end
    end
    Ep
end; 

In [11]:
@btime Epot_from_glob_view(r,W)

  216.851 μs (2081 allocations: 223.58 KiB)


1.4750469531604666e7

In [12]:
function Epot_from_glob_copy(r,glob::mine)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    #ri  = zeros(T, ncols) # don't need this with views
    rij = zeros(T, ncols) 
    
    N   = size(r,1)
    
    @inbounds for i in 1:N-1 # disable bounds checks
        #for id in 1:3
        #    ri[id] = r[i,id]
        #end
        ri = r[i,:]
        for j in i+1:N
            aux = zero(T)
            rij .= ri .- r[j,:]
            rr  = norm(rij)
            Ep += glob.Pot(rr)
        end
    end
    Ep
end; 

In [14]:
@btime Epot_from_glob_copy(r,W)

  215.008 μs (2081 allocations: 227.52 KiB)


1.4750469531604666e7

In [None]:
V(x) = 1.0/x^5 # no need for inline here.

struct mine{F<:Function} # doesn't need to be mutable, also parametrize on function type
    x   :: Float64
    Pot :: F
end


W     = mine(21.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_from_glob(r,glob::mine)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    ri  = zeros(T, ncols)
    rij = zeros(T, ncols)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        for id in 1:3
            ri[id] = r[i,id]
        end
        for j in i+1:N
            aux = zero(T)
            for jd in 1:3
                aux += (ri[jd] - r[j,jd])^2
            end
            rr  = sqrt(aux)
            Ep += glob.Pot(rr)
        end
    end
    Ep
end; 




### Getting to the solution

In [None]:
@inline function V(x)
    1.0/x^5
end;

mutable struct mine
    x   :: Float64
    Pot :: Function
    mine() = new()
end

In [None]:
W     = mine();
W.x   = 1.0;
W.Pot = V;      # here I assign the potential function
Np    = 64;
r     = rand(Np,3);

In [None]:
function Epot(r,glob::mine)
    Ep  = 0.0
    ri  = zero(r[1,:])
    rij = zero(r[1,:])
    N   = size(r,1)
    for i in 1:N-1
        for id in 1:3
            ri[id] = r[i,id]
        end
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;      

In [None]:
@btime Epot(r,W)

In [None]:
function Epot2view_refilling(r,glob::mine)
    Ep  = 0.0
    ri  = zero(r[1,:])
    rij = zero(r[1,:])
    N   = size(r,1)
    
    for i in 1:N-1
        ri .= view(r,i,:)
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;      

In [None]:
@btime Epot2view_refilling(r,W)

In [None]:
function Epot2view(r,glob::mine)
    Ep  = 0.0
    rij = zero(r[1,:])
    N   = size(r,1)
    
    for i in 1:N-1
        ri = view(r,i,:)
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;      

In [None]:
@btime Epot2view(r,W)

In [None]:
function Epot2copy(r,glob::mine)
    Ep  = 0.0
    ri  = zero(r[1,:])
    rij = zero(r[1,:])
    N   = size(r,1)
    
    for i in 1:N-1
        ri .= r[i,:]
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;      

In [None]:
@btime Epot2copy(r,W)

In [None]:
function Epot2noview(r,glob::mine)
    Ep  = 0.0
    ri  = zero(r[1,:])
    rij = zero(r[1,:])
    N   = size(r,1)
    
    for i in 1:N-1
        ri .=r[i,:]
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;      

In [None]:
function fill_vec_from_rowA!(vec, A, i)
    for j in 1:length(vec)
        vec[j] = A[i,j]
    end
end

In [None]:
function Epot3(r,glob::mine)
    Ep  = 0.0
    n_rows = size(r,1)
    n_cols = size(r,2)
    ri  = zeros(n_cols)
    rij = zeros(n_cols)
    
    for i in 1:n_rows-1
        fill_vec_from_rowA!(ri,r,i)
        for j in i+1:n_rows
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end

In [None]:
@btime Epot3(r,W)

### Making function form a struct fast

In [None]:
using BenchmarkTools
using LinearAlgebra

In [None]:
V(x) = 1.0/x^5 # no need for inline here.

struct mine2{F<:Function} # doesn't need to be mutable, also parametrize on function type
    x   :: Float64
    Pot :: F
end


W     = mine2(21.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_from_glob(r,glob::mine2)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    ri  = zeros(T, ncols)
    rij = zeros(T, ncols)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        for id in 1:3
            ri[id] = r[i,id]
        end
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            Ep += glob.Pot(rr)
            # Ep += V(rr)
        end
    end
    Ep
end; 


In [None]:
@btime Epot_from_glob($r,$W)

In [None]:
@time Epot_from_glob(r,W)

Compute the norm inplace

In [None]:
V(x) = 1.0/x^5 # no need for inline here.

struct mine{F<:Function} # doesn't need to be mutable, also parametrize on function type
    x   :: Float64
    Pot :: F
end


W     = mine(21.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_from_glob(r,glob::mine2)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    ri  = zeros(T, ncols)
    rij = zeros(T, ncols)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        for id in 1:3
            ri[id] = r[i,id]
        end
        for j in i+1:N
            aux = zero(T)
            for jd in 1:3
                aux += (ri[jd] - r[j,jd])^2
            end
            rr  = sqrt(aux)
            Ep += glob.Pot(rr)
        end
    end
    Ep
end; 


In [None]:
@btime Epot_from_glob(r,W)

Change rows by cols

In [None]:
V(x) = 1.0/x^5 # no need for inline here.

struct mine2{F<:Function} # doesn't need to be mutable, also parametrize on function type
    x   :: Float64
    Pot :: F
end


W     = mine2(21.0, V);
Np    = 64;
#r     = rand(3,Np);


function Epot_from_glob(r,glob::mine2)
    Ep  = 0.0
    T = eltype(r)
    aux_vec_size = size(r, 1)
    ri  = zeros(T, aux_vec_size)
    N   = size(r,2)
    @inbounds for i in 1:N-1 # disable bounds checks
        for id in 1:3
            ri[id] = r[i,id]
        end
        for j in i+1:N
            aux = zero(T)
            for jd in 1:3
                aux += (ri[jd] - r[jd,j])^2
            end
            rr  = sqrt(aux)
            Ep += glob.Pot(rr)
            # Ep += V(rr)
        end
    end
    Ep
end; 



In [None]:
@time Epot_from_glob(r,W)

In [None]:
@btime Epot_from_glob($r,$W)

In [None]:

W     = mine2(1.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_from_glob(r,glob::mine2)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    ri  = zeros(T, ncols)
    rij = zeros(T, ncols)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        ri = view(r,i,:)
        for j in i+1:N
            #for jd in 1:3
            #    rij[jd] = ri[jd] - r[j,jd]
            #end
            rij = ri - view(r,j,:)
            rr  = norm(rij)
            Ep += glob.Pot(rr)
            # Ep += V(rr)
        end
    end
    Ep
end; 



In [None]:
@btime Epot_from_glob($r,$W)

In [None]:
using StaticArrays

In [None]:

W     = mine2(1.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_from_glob(r,glob::mine2)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    ri  = zeros(T, ncols)
    rij = zeros(T, ncols)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        ri = SVector(r[i,:]...)
        for j in i+1:N
            rj = SVector(r[j,:]...)
            rij = ri - rj
            rr  = norm(rij)
            Ep += glob.Pot(rr)
            # Ep += V(rr)
        end
    end
    Ep
end; 



In [None]:
@btime Epot_from_glob($r,$W)

In [None]:
ri = SVector(zeros(Float64, 3)...)
rij = SVector(zeros(Float64, 3)...)

In [None]:
rij[1] = ri[1]

In [None]:
aux = rij + ri

In [None]:
function f(x,y,z)
    x .+ y .+ z
end
function g(x,y,z)
    x + y + z
end
using StaticArrays, BenchmarkTools
x = @SVector [2.0,3.0,4.0]
y = @SVector [2.0,3.0,4.0]
z = @SVector [2.0,3.0,4.0]

@btime f(x,y,z) # 33.372 μs (21 allocations: 768 bytes)
@btime g(x,y,z) # 19.085 ns (1 allocation: 32 bytes)

@btime f($x,$y,$z) # 3.220 ns (0 allocations: 0 bytes)
@btime g($x,$y,$z) # 3.220 ns (0 allocations: 0 bytes)

In [None]:
using StaticArrays

V(x) = 1.0/x^5 # no need for inline here.

struct mine{F<:Function} # doesn't need to be mutable, also parametrize on function type
    x   :: Float64
    Pot :: F
end


W     = mine(1.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_static(r,glob::mine)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    #ri  = zeros(T, ncols)
#    ri = SVector(zeros(T, ncols)...)    
    rij = zeros(T, ncols)
    #rij = SVector(zeros(T, ncols)...)

    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks

        ri = SVector(r[i,:]...)
        
        for j in i+1:N
            #for jd in 1:3
            #    rij[jd] = ri[jd] - r[j,jd]
            #end
            
            rr  = norm(ri - view(r,j,:))
            Ep += glob.Pot(rr)
            # Ep += V(rr)
        end
    end
    Ep
end; 

In [None]:
@btime Epot_static($r,$W)