In [1]:
using BenchmarkTools
using LinearAlgebra

### Getting to the solution

In [2]:
@inline function V(x)
    1.0/x^5
end;

mutable struct mine
    x   :: Float64
    Pot :: Function
    mine() = new()
end

In [3]:
W     = mine();
W.x   = 1.0;
W.Pot = V;      # here I assign the potential function
Np    = 64;
r     = rand(Np,3);

In [4]:
function Epot(r,glob::mine)
    Ep  = 0.0
    ri  = zero(r[1,:])
    rij = zero(r[1,:])
    N   = size(r,1)
    for i in 1:N-1
        for id in 1:3
            ri[id] = r[i,id]
        end
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;      

In [5]:
@btime Epot(r,W)

  100.639 μs (5 allocations: 464 bytes)


1.5565368692429937e7

In [12]:
function Epot(r,glob::mine)
    Ep  = 0.0
    rij = zero(r[1,:])
    N   = size(r,1)
    for i in 1:N-1
        ri = view(r, i,:)   # CREO N-1 VIEWS
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;  

In [13]:
@btime Epot(r,W)

  101.354 μs (3 allocations: 240 bytes)


1.5565368692429937e7

In [26]:
function Epot(r,glob::mine)
    Ep  = 0.0
    rij = zero(r[1,:])
    N   = size(r,1)
    for i in 1:N-1
        ri = view(r,i,:)    # CREO N-1 VIEWS
        for j in i+1:N 
            aux = view(r,j,:)    # CREO (N-1)*(i+1:N) VIEWS
            rr  = norm(ri - aux )
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;  

In [27]:
@btime Epot(r,W)

  231.000 μs (4098 allocations: 318.19 KiB)


1.5565368692429937e7

In [48]:
function Epot(r,glob::mine)
    Ep  = 0.0
    rij = zero(r[1,:])
    N   = size(r,1)
    for i in 1:N-1
        ri = view(r,i,:)
        for j in i+1:N
            rij = ri .- view(r,j,:) # tinc la memoria de rij prealocatada pero NO guardo "inplace"
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;  

In [49]:
@btime Epot(r,W)

  209.429 μs (4098 allocations: 318.19 KiB)


1.5565368692429937e7

In [50]:
function Epot(r,glob::mine)
    Ep  = 0.0
    rij = zero(r[1,:])
    N   = size(r,1)
    for i in 1:N-1
        ri = view(r,i,:)
        for j in i+1:N
            rij .= ri .- view(r,j,:) # tinc la memoria de rij prealocatada pero SI guardo "inplace"
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;  

In [51]:
@btime Epot(r,W)

  130.087 μs (2082 allocations: 97.69 KiB)


1.5565368692429937e7

In [None]:
function Epot(r, glob::mine)
    Ep  = 0.0
    rij = zero(r[1,:])
    N   = size(r,1)
    
    for i in 1:N-1
        ri = view(r,i,:)
        for j in i+1:N
            rij .= ri .- view(r,j,:) # tinc la memoria de rij prealocatada pero SI guardo "inplace"
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;  

In [6]:
function Epot2view_refilling(r,glob::mine)
    Ep  = 0.0
    ri  = zero(r[1,:])
    rij = zero(r[1,:])
    N   = size(r,1)
    
    for i in 1:N-1
        ri .= view(r,i,:)
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;      

In [7]:
@btime Epot2view_refilling(r,W)

  104.001 μs (68 allocations: 3.41 KiB)


8.705694478528883e6

In [8]:
function Epot2view(r,glob::mine)
    Ep  = 0.0
    rij = zero(r[1,:])
    N   = size(r,1)
    
    for i in 1:N-1
        ri = view(r,i,:)
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;      

In [9]:
@btime Epot2view(r,W)

  101.331 μs (3 allocations: 240 bytes)


8.705694478528883e6

In [18]:
function Epot2copy(r,glob::mine)
    Ep  = 0.0
    rij = zero(r[1,:])
    N   = size(r,1)
    
    for i in 1:N-1
        ri = r[i,:]
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;      

In [19]:
@btime Epot2copy(r,W)

  104.669 μs (66 allocations: 7.13 KiB)


8.705694478528883e6

In [None]:
function Epot2noview(r,glob::mine)
    Ep  = 0.0
    ri  = zero(r[1,:])
    rij = zero(r[1,:])
    N   = size(r,1)
    
    for i in 1:N-1
        ri .=r[i,:]
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end;      

In [None]:
function fill_vec_from_rowA!(vec, A, i)
    for j in 1:length(vec)
        vec[j] = A[i,j]
    end
end

In [None]:
function Epot3(r,glob::mine)
    Ep  = 0.0
    n_rows = size(r,1)
    n_cols = size(r,2)
    ri  = zeros(n_cols)
    rij = zeros(n_cols)
    
    for i in 1:n_rows-1
        fill_vec_from_rowA!(ri,r,i)
        for j in i+1:n_rows
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            #Ep += glob.Pot(rr)
            Ep += V(rr)
        end
    end
    Ep
end

In [None]:
@btime Epot3(r,W)

### Making function form a struct fast

In [None]:
using BenchmarkTools
using LinearAlgebra

In [None]:
V(x) = 1.0/x^5 # no need for inline here.

struct mine2{F<:Function} # doesn't need to be mutable, also parametrize on function type
    x   :: Float64
    Pot :: F
end


W     = mine2(21.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_from_glob(r,glob::mine2)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    ri  = zeros(T, ncols)
    rij = zeros(T, ncols)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        for id in 1:3
            ri[id] = r[i,id]
        end
        for j in i+1:N
            for jd in 1:3
                rij[jd] = ri[jd] - r[j,jd]
            end
            rr  = norm(rij)
            Ep += glob.Pot(rr)
            # Ep += V(rr)
        end
    end
    Ep
end; 


In [None]:
@btime Epot_from_glob($r,$W)

In [None]:
@time Epot_from_glob(r,W)

Compute the norm inplace

In [None]:
V(x) = 1.0/x^5 # no need for inline here.

struct mine{F<:Function} # doesn't need to be mutable, also parametrize on function type
    x   :: Float64
    Pot :: F
end


W     = mine(21.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_from_glob(r,glob::mine2)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    ri  = zeros(T, ncols)
    rij = zeros(T, ncols)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        for id in 1:3
            ri[id] = r[i,id]
        end
        for j in i+1:N
            aux = zero(T)
            for jd in 1:3
                aux += (ri[jd] - r[j,jd])^2
            end
            rr  = sqrt(aux)
            Ep += glob.Pot(rr)
        end
    end
    Ep
end; 


In [None]:
@btime Epot_from_glob(r,W)

Change rows by cols

In [None]:
V(x) = 1.0/x^5 # no need for inline here.

struct mine2{F<:Function} # doesn't need to be mutable, also parametrize on function type
    x   :: Float64
    Pot :: F
end


W     = mine2(21.0, V);
Np    = 64;
#r     = rand(3,Np);


function Epot_from_glob(r,glob::mine2)
    Ep  = 0.0
    T = eltype(r)
    aux_vec_size = size(r, 1)
    ri  = zeros(T, aux_vec_size)
    N   = size(r,2)
    @inbounds for i in 1:N-1 # disable bounds checks
        for id in 1:3
            ri[id] = r[i,id]
        end
        for j in i+1:N
            aux = zero(T)
            for jd in 1:3
                aux += (ri[jd] - r[jd,j])^2
            end
            rr  = sqrt(aux)
            Ep += glob.Pot(rr)
            # Ep += V(rr)
        end
    end
    Ep
end; 



In [None]:
@time Epot_from_glob(r,W)

In [None]:
@btime Epot_from_glob($r,$W)

In [None]:

W     = mine2(1.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_from_glob(r,glob::mine2)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    ri  = zeros(T, ncols)
    rij = zeros(T, ncols)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        ri = view(r,i,:)
        for j in i+1:N
            #for jd in 1:3
            #    rij[jd] = ri[jd] - r[j,jd]
            #end
            rij = ri - view(r,j,:)
            rr  = norm(rij)
            Ep += glob.Pot(rr)
            # Ep += V(rr)
        end
    end
    Ep
end; 



In [None]:
@btime Epot_from_glob($r,$W)

In [None]:
using StaticArrays

In [None]:

W     = mine2(1.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_from_glob(r,glob::mine2)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    ri  = zeros(T, ncols)
    rij = zeros(T, ncols)
    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks
        ri = SVector(r[i,:]...)
        for j in i+1:N
            rj = SVector(r[j,:]...)
            rij = ri - rj
            rr  = norm(rij)
            Ep += glob.Pot(rr)
            # Ep += V(rr)
        end
    end
    Ep
end; 



In [None]:
@btime Epot_from_glob($r,$W)

In [None]:
ri = SVector(zeros(Float64, 3)...)
rij = SVector(zeros(Float64, 3)...)

In [None]:
rij[1] = ri[1]

In [None]:
aux = rij + ri

In [None]:
function f(x,y,z)
    x .+ y .+ z
end
function g(x,y,z)
    x + y + z
end
using StaticArrays, BenchmarkTools
x = @SVector [2.0,3.0,4.0]
y = @SVector [2.0,3.0,4.0]
z = @SVector [2.0,3.0,4.0]

@btime f(x,y,z) # 33.372 μs (21 allocations: 768 bytes)
@btime g(x,y,z) # 19.085 ns (1 allocation: 32 bytes)

@btime f($x,$y,$z) # 3.220 ns (0 allocations: 0 bytes)
@btime g($x,$y,$z) # 3.220 ns (0 allocations: 0 bytes)

In [None]:
using StaticArrays

V(x) = 1.0/x^5 # no need for inline here.

struct mine{F<:Function} # doesn't need to be mutable, also parametrize on function type
    x   :: Float64
    Pot :: F
end


W     = mine(1.0, V);
Np    = 64;
r     = rand(Np,3);


function Epot_static(r,glob::mine)
    Ep  = 0.0
    T = eltype(r)
    ncols = size(r, 2)
    #ri  = zeros(T, ncols)
#    ri = SVector(zeros(T, ncols)...)    
    rij = zeros(T, ncols)
    #rij = SVector(zeros(T, ncols)...)

    N   = size(r,1)
    @inbounds for i in 1:N-1 # disable bounds checks

        ri = SVector(r[i,:]...)
        
        for j in i+1:N
            #for jd in 1:3
            #    rij[jd] = ri[jd] - r[j,jd]
            #end
            
            rr  = norm(ri - view(r,j,:))
            Ep += glob.Pot(rr)
            # Ep += V(rr)
        end
    end
    Ep
end; 

In [None]:
@btime Epot_static($r,$W)