In [180]:
using BenchmarkTools
using LoopVectorization
function noAvxtIn()
n = 100
bar = randn(n,n)
foo = zeros(n,n)

@btime for i = 2:n-1
  for j = 2:n-1
    foo[i, j] = (1 / 4) * ( bar[i,j-1]+bar[i, j+1]+bar[i-1, j]+bar[i+1, j])
  end
end
    
end

function withAvxtIn()
n = 100
bar = randn(n,n)
foo = zeros(n,n)
@btime for i = 2:n-1
    @avxt for j = 2:n-1
    foo[i, j] = (1 / 4) * ( bar[i,j-1]+bar[i, j+1]+bar[i-1, j]+bar[i+1, j])
    end
end
end

noAvxtIn()
withAvxtIn()

# The for loop with @avxt seems to consume less time than otherwise.


  6.780 ms (67425 allocations: 1.18 MiB)
  543.900 μs (6175 allocations: 1.09 MiB)


In [178]:
using BenchmarkTools
using LoopVectorization

function noAvxt()
n = 100
bar = randn(n,n)
foo = zeros(n,n)

  for i = 2:n-1
  for j = 2:n-1
     foo[i, j] = (1 / 4) * ( bar[i,j-1]+bar[i, j+1]+bar[i-1, j]+bar[i+1, j])
  end
end
    return foo;
end 
    


function withAvxt()
    n = 100
bar = randn(n,n)
foo = zeros(n,n)
   
    for i = 2:n-1
    @avxt for j = 2:n-1
        foo[i, j] = (1 / 4) * ( bar[i,j-1]+bar[i, j+1]+bar[i-1, j]+bar[i+1, j])
    end
end
   return foo;
end

function test_AvxtContribution()
    println("No avxt and With avxt one time:");
    no = @btime noAvxt()
    with = @btime withAvxt()
    return no == with ? "No bugs." : "Something bad is happening";
end

function test_AvxtManyTimes()
    println("No avxt and With avxt many times:");
   no = @btime for i in 1:100 noAvxt() end
   with = @btime for i in 1:100 withAvxt() end
   
end

test_AvxtContribution()
test_AvxtManyTimes()

# No @avxt is better??? Why?



No avxt and With avxt one time:
  111.200 μs (4 allocations: 156.41 KiB)
  205.700 μs (3238 allocations: 1.14 MiB)
No avxt and With avxt many times:
  17.746 ms (400 allocations: 15.27 MiB)
  53.002 ms (323800 allocations: 114.42 MiB)
