# vectorization in juia


In [1]:
f(x) = 3x.^2 + 5x + 2

# traditional-style vectorization:
vec!(X) = X .= f(2X.^2 + 6X.^3 - sqrt.(X))

# new-style vectorization (dot operations = syntactic loop fusion):
newvec!(X) = X .= f.(2 .* X.^2 .+ 6 .* X.^3 .- sqrt.(X))

# devectorized (explicit loops):
function devec!(X)
    for i in eachindex(X)
        x = X[i]
        X[i] = f(2x^2 + 6x^3 - sqrt(x))
    end
    return X
end

devec! (generic function with 1 method)

In [2]:
using BenchmarkTools

[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /Users/david/.julia/lib/v0.6/JLD.ji for module JLD.
[39m

In [3]:
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 10 # use 10s benchmarks to reduce timing noise


10

In [4]:
X = zeros(10^6)
t_vec = @benchmark vec!($X)

BenchmarkTools.Trial: 
  memory estimate:  91.55 mb
  allocs estimate:  24
  --------------
  minimum time:     48.337 ms (33.89% GC)
  median time:      100.046 ms (63.22% GC)
  mean time:        102.237 ms (64.16% GC)
  maximum time:     171.490 ms (77.82% GC)
  --------------
  samples:          98
  evals/sample:     1
  time tolerance:   5.00%
  memory tolerance: 1.00%

In [5]:
t_devec = @benchmark devec!($X)

BenchmarkTools.Trial: 
  memory estimate:  0.00 bytes
  allocs estimate:  0
  --------------
  minimum time:     3.787 ms (0.00% GC)
  median time:      4.060 ms (0.00% GC)
  mean time:        4.398 ms (0.00% GC)
  maximum time:     10.183 ms (0.00% GC)
  --------------
  samples:          2271
  evals/sample:     1
  time tolerance:   5.00%
  memory tolerance: 1.00%

In [6]:
t_newvec = @benchmark newvec!($X)

BenchmarkTools.Trial: 
  memory estimate:  0.00 bytes
  allocs estimate:  0
  --------------
  minimum time:     4.261 ms (0.00% GC)
  median time:      4.525 ms (0.00% GC)
  mean time:        4.930 ms (0.00% GC)
  maximum time:     10.504 ms (0.00% GC)
  --------------
  samples:          2026
  evals/sample:     1
  time tolerance:   5.00%
  memory tolerance: 1.00%

In [8]:
X = zeros(10^2)
t_devec = @benchmark devec!($X)

BenchmarkTools.Trial: 
  memory estimate:  0.00 bytes
  allocs estimate:  0
  --------------
  minimum time:     385.862 ns (0.00% GC)
  median time:      387.049 ns (0.00% GC)
  mean time:        422.251 ns (0.00% GC)
  maximum time:     1.632 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     203
  time tolerance:   5.00%
  memory tolerance: 1.00%

In [9]:
t_newvec = @benchmark newvec!($X)

BenchmarkTools.Trial: 
  memory estimate:  0.00 bytes
  allocs estimate:  0
  --------------
  minimum time:     445.081 ns (0.00% GC)
  median time:      458.665 ns (0.00% GC)
  mean time:        505.428 ns (0.00% GC)
  maximum time:     1.656 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     197
  time tolerance:   5.00%
  memory tolerance: 1.00%

In [19]:
X= rand(10,5)

10×5 Array{Float64,2}:
 0.983754  0.440438   0.114031  0.684705   0.496662
 0.338432  0.998433   0.660499  0.157753   0.335764
 0.691727  0.543788   0.503528  0.205895   0.587706
 0.287199  0.652762   0.149192  0.648155   0.564834
 0.531764  0.806885   0.638482  0.538228   0.651558
 0.889328  0.721033   0.692548  0.799072   0.288586
 0.93185   0.0193211  0.557542  0.802727   0.286672
 0.387499  0.665174   0.238567  0.0782044  0.75774 
 0.843337  0.909      0.360855  0.320794   0.819854
 0.159578  0.217724   0.405082  0.952411   0.912674

### Use broadcasting in non numeric types

In [21]:
f.([3,4])

2-element Array{Int64,1}:
 6
 8

In [24]:
f.(2)

4

In [25]:
x = ["Casa","Pepe","maria","Elena"]

4-element Array{String,1}:
 "Casa" 
 "Pepe" 
 "maria"
 "Elena"

In [26]:
methodswith("Pepe")

LoadError: [91mMethodError: no method matching methodswith(::String)[0m
Closest candidates are:
  methodswith([91m::Type{T}[39m) at interactiveutil.jl:545
  methodswith([91m::Type{T}[39m, [91m::Function[39m) at interactiveutil.jl:519
  methodswith([91m::Type{T}[39m, [91m::Function[39m, [91m::Bool[39m) at interactiveutil.jl:519
  ...[39m

In [27]:
aux = 0

0

In [95]:
function suma(N)
    N_ = float(N)
    aux = 0.0
    for i in 1:N
        aux += i/N_
    end
    return aux
end

suma (generic function with 1 method)

In [96]:
@benchmark suma(10^6)

BenchmarkTools.Trial: 
  memory estimate:  0.00 bytes
  allocs estimate:  0
  --------------
  minimum time:     4.264 ms (0.00% GC)
  median time:      4.473 ms (0.00% GC)
  mean time:        4.848 ms (0.00% GC)
  maximum time:     8.498 ms (0.00% GC)
  --------------
  samples:          2062
  evals/sample:     1
  time tolerance:   5.00%
  memory tolerance: 1.00%

In [97]:
function suma_simd(N)
    N_ = float(N)
    aux = 0.0
    @simd for i in 1:N
        aux += i/N_
    end
    return aux
end

suma_simd (generic function with 1 method)

In [98]:
@benchmark suma_simd(10^6)

BenchmarkTools.Trial: 
  memory estimate:  0.00 bytes
  allocs estimate:  0
  --------------
  minimum time:     4.258 ms (0.00% GC)
  median time:      4.276 ms (0.00% GC)
  mean time:        4.661 ms (0.00% GC)
  maximum time:     8.321 ms (0.00% GC)
  --------------
  samples:          2145
  evals/sample:     1
  time tolerance:   5.00%
  memory tolerance: 1.00%

In [182]:
X = rand(10^4,3);

In [157]:
function triple!(X, positions)
    X[positions,:] *=3
end

triple! (generic function with 1 method)

In [158]:
positions = Array(1:10:size(X)[1]);

In [177]:
@benchmark triple!(X, positions)

BenchmarkTools.Trial: 
  memory estimate:  47.13 kb
  allocs estimate:  8
  --------------
  minimum time:     16.357 μs (0.00% GC)
  median time:      19.391 μs (0.00% GC)
  mean time:        28.756 μs (18.50% GC)
  maximum time:     2.994 ms (98.09% GC)
  --------------
  samples:          10000
  evals/sample:     1
  time tolerance:   5.00%
  memory tolerance: 1.00%

In [179]:
function triple(X, positions)
    aux = 3*X[positions,:]
    return aux
end

triple (generic function with 1 method)

In [180]:
@benchmark X[positions,:] = triple(X, positions)

BenchmarkTools.Trial: 
  memory estimate:  47.30 kb
  allocs estimate:  14
  --------------
  minimum time:     17.399 μs (0.00% GC)
  median time:      23.043 μs (0.00% GC)
  mean time:        33.345 μs (14.35% GC)
  maximum time:     4.563 ms (96.67% GC)
  --------------
  samples:          10000
  evals/sample:     1
  time tolerance:   5.00%
  memory tolerance: 1.00%