In [7]:
function calc_pi(n)
    w = 1.0/n
    psum = 0.0

    for i in 1:n
        x = w * (i - 0.5)
        psum += 4.0 / (1.0 + x * x)
    end

    pi = w * psum
    return pi
end

calc_pi (generic function with 1 method)

In [8]:
function calc_pi_fastmath(n)
    w = 1.0/n
    psum = 0.0

    @fastmath for i in 1:n
        x = w * (i - 0.5)
        psum += 4.0 / (1.0 + x * x)
    end

    pi = w * psum
    return pi
end

calc_pi_fastmath (generic function with 1 method)

In [9]:
using LoopVectorization

function calc_pi_vectorisation(n)
    w = 1.0/n
    psum = 0.0

    @turbo for i in 1:n
        x = w * (i - 0.5)
        psum += 4.0 / (1.0 + x * x)
    end

    pi = w * psum
    return pi
end

calc_pi_vectorisation (generic function with 1 method)

In [10]:
using BenchmarkTools

n = 1_000_000_000

1000000000

## Naive

In [11]:
@btime calc_pi(n)

  870.024 ms (1 allocation: 16 bytes)


3.1415926535899708

## Vectorisation

In [12]:
@btime calc_pi_vectorisation(n)

  342.580 ms (1 allocation: 16 bytes)


3.141592653589845

## Fastmath

In [13]:
@btime calc_pi_fastmath(n)

  303.934 ms (1 allocation: 16 bytes)


3.1415926535897682