**Most of a program's run time is spent inside loops**

Mostly function calls inside loops.

**The act of calling these functions itself better be fast!**

### But it doesn't look like it should be fast!

To execute `foo(x,y)`, Julia chooses the **method for foo that is _most_ specific to types of x, and y**.

**Looking up this method can be expensive!** There are hundreds of methods for `+(x,y)` picking the most specific one can involve a lot of head-scratching.

But Julia can do it when **compiling** the code, once and forever!

In [7]:
function sum1(xs)
    s = zero(xs[1])

    for i=1:length(xs)
        s = +(s, xs[i])
    end
    s
end

sum1 (generic function with 1 method)

In [8]:
@code_typed optimize=false sum1([1+2im, 7+9im])

CodeInfo(
[90m[77G│[1G[39m[90m2 [39m1 ─ %1  = (Base.getindex)(xs, 1)[36m::Complex{Int64}[39m
[90m[77G│[1G[39m[90m  [39m│         (s = (Main.zero)(%1))[90m::Complex{Int64}[39m
[90m[77G│[1G[39m[90m4 [39m│   %3  = (Main.length)(xs)[36m::Int64[39m
[90m[77G│[1G[39m[90m  [39m│   %4  = (1:%3)[36m::UnitRange{Int64}[39m
[90m[77G│[1G[39m[90m  [39m│         (#temp# = (Base.iterate)(%4))[90m::Union{Nothing, Tuple{Int64,Int64}}[39m
[90m[77G│[1G[39m[90m  [39m│   %6  = (#temp# === nothing)[36m::Bool[39m
[90m[77G│[1G[39m[90m  [39m│   %7  = (Base.not_int)(%6)[36m::Bool[39m
[90m[77G│[1G[39m[90m  [39m└──       goto #4 if not %7
[90m[77G│[1G[39m[90m  [39m2 ┄ %9  = #temp#::Tuple{Int64,Int64}[36m::Tuple{Int64,Int64}[39m
[90m[77G│[1G[39m[90m  [39m│         (i = (Core.getfield)(%9, 1))[90m::Int64[39m
[90m[77G│[1G[39m[90m  [39m│   %11 = (Core.getfield)(%9, 2)[36m::Int64[39m
[90m[77G│[1G[39m[90m5 [39m│   %12 = s[36m::Co

In [9]:
@code_typed sum1([1+2im, 7+9im]) # Also gets inlined -- no call to + on Complex anymore!

CodeInfo(
[90m[55G│╻     getindex[1G[39m[90m2 [39m1 ──       (Base.arrayref)(true, xs, 1)[90m::Complex{Int64}[39m
[90m[55G││╻╷╷╷  oftype[1G[39m[90m  [39m│    %2  = %new(Complex{Int64}, 0, 0)[36m::Complex{Int64}[39m
[90m[55G│╻     length[1G[39m[90m4 [39m│    %3  = (Base.arraylen)(xs)[36m::Int64[39m
[90m[55G││╻╷╷╷  Type[1G[39m[90m  [39m│    %4  = (Base.sle_int)(1, %3)[36m::Bool[39m
[90m[55G│││╻     unitrange_last[1G[39m[90m  [39m│          (Base.sub_int)(%3, 1)[90m::Int64[39m
[90m[55G││││  [1G[39m[90m  [39m│    %6  = (Base.ifelse)(%4, %3, 0)[36m::Int64[39m
[90m[55G││╻╷╷   isempty[1G[39m[90m  [39m│    %7  = (Base.slt_int)(%6, 1)[36m::Bool[39m
[90m[55G││    [1G[39m[90m  [39m└───       goto #3 if not %7
[90m[55G││    [1G[39m[90m  [39m2 ──       goto #4
[90m[55G││    [1G[39m[90m  [39m3 ──       goto #4
[90m[55G│     [1G[39m[90m  [39m4 ┄─ %11 = φ (#2 => true, #3 => false)[36m::Bool[39m
[90m[55G│     [1G[39m

In [15]:
@noinline add(x,y) = x+y # forcefully avoid inlining

add (generic function with 1 method)

In [18]:
function sum2(xs)
    s = zero(xs[1])

    for i=1:length(xs)
        s = (s + xs[i])
    end
    s
end

sum2 (generic function with 1 method)

In [20]:
@code_native sum2([1+1im, 2+3im])

	.text
; Function sum2 {
; Location: In[18]:2
	pushq	%rbp
	movq	%rsp, %rbp
; Function getindex; {
; Location: array.jl:731
	movq	8(%rsi), %rax
	testq	%rax, %rax
	je	L107
;}
; Location: In[18]:4
	jle	L91
	movq	(%rsi), %rdx
	vpxor	%xmm0, %xmm0, %xmm0
	movl	$1, %ecx
	nopl	(%rax,%rax)
; Location: In[18]:5
; Function +; {
; Location: complex.jl:266
; Function +; {
; Location: int.jl:53
L32:
	vpaddq	(%rdx), %xmm0, %xmm0
;}}
; Function iterate; {
; Location: range.jl:575
; Function ==; {
; Location: promotion.jl:425
	cmpq	%rcx, %rax
;}}
	je	L95
; Function getindex; {
; Location: array.jl:731
	addq	$16, %rdx
	cmpq	%rax, %rcx
;}
; Function iterate; {
; Location: range.jl:576
; Function +; {
; Location: int.jl:53
	leaq	1(%rcx), %rcx
;}}
; Function getindex; {
; Location: array.jl:731
	jb	L32
	movq	%rsp, %rdx
	leaq	-16(%rdx), %rax
	movq	%rax, %rsp
	movq	%rcx, -16(%rdx)
	movabsq	$jl_bounds_error_ints, %rcx
	movl	$1, %edx
	movq	%rsi, %rdi
	movq	%rax, %rsi
	callq	*%rcx
;}}
; Function sum2 {
; Locati

Let's try to create a function that doesn't allow you to infer its result type

In [22]:
vec1 = [1,2]
@show typeof(vec1)

vec2 = Union{Int, Float64}[1,2.3]
@show typeof(vec2)

vec3 = [1,"hi"]
@show typeof(vec3)
vec3[1] # what is the output type of vec[i] for all i?


function bad_add(x,y)
    d=[0, "yes"] # intentionally create a Vector{Any}
    x+y+d[1]
end

typeof(vec1) = Array{Int64,1}
typeof(vec2) = Array{Union{Float64, Int64},1}
typeof(vec3) = Array{Any,1}


bad_add (generic function with 1 method)

In [23]:
@code_typed bad_add(1,2)

CodeInfo(
[90m[66G│  [1G[39m[90m13 [39m1 ─ %1 = invoke Base.vect(0::Int64, "yes"::Vararg{Any,N} where N)[36m::Array{Any,1}[39m
[90m[66G│╻  getindex[1G[39m[90m14 [39m│   %2 = (Base.arrayref)(true, %1, 1)[36m::Any[39m
[90m[66G││╻  +[1G[39m[90m   [39m│   %3 = (Base.add_int)(x, y)[36m::Int64[39m
[90m[66G││ [1G[39m[90m   [39m│   %4 = (%3 + %2)[36m::Any[39m
[90m[66G│  [1G[39m[90m   [39m└──      return %4
) => Any

In [24]:


function sum3(xs)
    s = zero(xs[1])

    for i=1:length(xs)
        s = bad_add(s, xs[i])
    end
    s
end

sum3 (generic function with 1 method)

In [25]:
@code_typed bad_add(1,2)

CodeInfo(
[90m[66G│  [1G[39m[90m13 [39m1 ─ %1 = invoke Base.vect(0::Int64, "yes"::Vararg{Any,N} where N)[36m::Array{Any,1}[39m
[90m[66G│╻  getindex[1G[39m[90m14 [39m│   %2 = (Base.arrayref)(true, %1, 1)[36m::Any[39m
[90m[66G││╻  +[1G[39m[90m   [39m│   %3 = (Base.add_int)(x, y)[36m::Int64[39m
[90m[66G││ [1G[39m[90m   [39m│   %4 = (%3 + %2)[36m::Any[39m
[90m[66G│  [1G[39m[90m   [39m└──      return %4
) => Any

In [26]:
@code_warntype sum3([1,2])

Body[91m[1m::Any[22m[39m
[90m[55G│╻     getindex[1G[39m[90m4 [39m1 ──       (Base.arrayref)(true, xs, 1)
[90m[55G│╻     length[1G[39m[90m6 [39m│    %2  = (Base.arraylen)(xs)[36m::Int64[39m
[90m[55G││╻╷╷╷  Type[1G[39m[90m  [39m│    %3  = (Base.sle_int)(1, %2)[36m::Bool[39m
[90m[55G│││╻     unitrange_last[1G[39m[90m  [39m│          (Base.sub_int)(%2, 1)
[90m[55G││││  [1G[39m[90m  [39m│    %5  = (Base.ifelse)(%3, %2, 0)[36m::Int64[39m
[90m[55G││╻╷╷   isempty[1G[39m[90m  [39m│    %6  = (Base.slt_int)(%5, 1)[36m::Bool[39m
[90m[55G││    [1G[39m[90m  [39m└───       goto #3 if not %6
[90m[55G││    [1G[39m[90m  [39m2 ──       goto #4
[90m[55G││    [1G[39m[90m  [39m3 ──       goto #4
[90m[55G│     [1G[39m[90m  [39m4 ┄─ %10 = φ (#2 => true, #3 => false)[36m::Bool[39m
[90m[55G│     [1G[39m[90m  [39m│    %11 = φ (#3 => 1)[36m::Int64[39m
[90m[55G│     [1G[39m[90m  [39m│    %12 = φ (#3 => 1)[36m::Int64[39m
[90m

### Common pitfall Number 1. use of global variables in loops

In [35]:
xs = rand(10^6)
function sum1_global()
    s = zero(xs[1])
    
    for i=1:length(xs)
        s = +(s, xs[i])
    end
    s
end


sum1_global (generic function with 1 method)

In [36]:
@code_warntype sum1_global()

Body[91m[1m::Any[22m[39m
[90m[56G│    [1G[39m[90m3 [39m1 ─ %1  = (Base.getindex)(Main.xs, 1)[91m[1m::Any[22m[39m
[90m[56G│    [1G[39m[90m  [39m│   %2  = (Main.zero)(%1)[91m[1m::Any[22m[39m
[90m[56G│    [1G[39m[90m5 [39m│   %3  = (Main.length)(Main.xs)[91m[1m::Any[22m[39m
[90m[56G│    [1G[39m[90m  [39m│   %4  = (isa)(%3, Int64)[36m::Bool[39m
[90m[56G│    [1G[39m[90m  [39m└──       goto #3 if not %4
[90m[56G│    [1G[39m[90m  [39m2 ─ %6  = π (%3, [36mInt64[39m)
[90m[56G│╻╷╷╷ Colon[1G[39m[90m  [39m│   %7  = (Base.sle_int)(1, %6)[36m::Bool[39m
[90m[56G││╻    Type[1G[39m[90m  [39m│         (Base.sub_int)(%6, 1)
[90m[56G│││┃│   unitrange_last[1G[39m[90m  [39m│   %9  = (Base.sub_int)(1, 1)[36m::Int64[39m
[90m[56G││││ [1G[39m[90m  [39m│   %10 = (Base.ifelse)(%7, %6, %9)[36m::Int64[39m
[90m[56G│││  [1G[39m[90m  [39m│   %11 = %new(UnitRange{Int64}, 1, %10)[36m::UnitRange{Int64}[39m
[90m[56G│    [1G[3

### Fixes
- Make the global an argument!
- Make the global a `const`

In [37]:
const arr = rand(10^6)
function sum2_global()
    s = zero(arr[1])
    
    for i=1:length(arr)
        s = s + arr[i]
    end
    s
end



sum2_global (generic function with 1 method)

In [39]:
@time sum1_global()
@time sum2_global()

  0.128758 seconds (5.00 M allocations: 91.537 MiB, 10.37% gc time)
  0.001536 seconds (5 allocations: 176 bytes)


499821.06931097474

**Let's practice propagating type information in our head**

Run through the program and find out the types assigned to each variable over time.

In [41]:
function logint2(n)
    r = 0
    while n > 1
        n = n/2
        r += 1
    end
    r
end

logint2 (generic function with 1 method)

In [32]:
function logint2(n) # assume n::Float64
    r = 0 # Int
    while n > 1 # n :: Float64
        n = n/2 # n :: Float64
        r += 1 # Int
    end
    r # Int
end

logint2 (generic function with 1 method)

In [42]:
@code_typed logint2(10.0)

CodeInfo(
[90m[69G│   [1G[39m[90m  [39m1 ─       nothing[90m::Nothing[39m
[90m[69G│   [1G[39m[90m3 [39m2 ┄ %2  = φ (#1 => 0, #3 => %12)[36m::Int64[39m
[90m[69G│   [1G[39m[90m  [39m│   %3  = φ (#1 => _2, #3 => %11)[36m::Float64[39m
[90m[69G│╻   >[1G[39m[90m  [39m│   %4  = π (1.0, [36mFloat64[39m)
[90m[69G││┃│  <[1G[39m[90m  [39m│   %5  = (Base.lt_float)(%4, %3)[36m::Bool[39m
[90m[69G│││ [1G[39m[90m  [39m│   %6  = π (1.0, [36mFloat64[39m)
[90m[69G│││╻   ==[1G[39m[90m  [39m│   %7  = (Base.eq_float)(%6, %3)[36m::Bool[39m
[90m[69G│││╻   &[1G[39m[90m  [39m│   %8  = (Base.and_int)(%7, false)[36m::Bool[39m
[90m[69G│││╻   |[1G[39m[90m  [39m│   %9  = (Base.or_int)(%5, %8)[36m::Bool[39m
[90m[69G│   [1G[39m[90m  [39m└──       goto #4 if not %9
[90m[69G│╻╷  /[1G[39m[90m4 [39m3 ─ %11 = (Base.div_float)(%3, 2.0)[36m::Float64[39m
[90m[69G│╻   +[1G[39m[90m5 [39m│   %12 = (Base.add_int)(%2, 1)[36m::Int64[39m
[

In [51]:
div(900.0,7)

128.0

In [57]:
function logint2(n) # assume n::Int
    r = 0 # Int
    n = Float64(n)
    while n > 1 # n :: Union{Int, Float64} (propagated from below)
        n = n/2 # n :: Union{Int, Float64}
        r += 1 # Int
    end
    r # Int
end

logint2 (generic function with 1 method)

In [58]:
@code_typed logint2(10)

CodeInfo(
[90m[69G│   [1G[39m[90m3 [39m1 ─ %1  = π (n, [36mInt64[39m)
[90m[69G│╻   Type[1G[39m[90m  [39m└── %2  = (Base.sitofp)(Float64, %1)[36m::Float64[39m
[90m[69G│   [1G[39m[90m4 [39m2 ┄ %3  = φ (#1 => 0, #3 => %15)[36m::Int64[39m
[90m[69G│   [1G[39m[90m  [39m│   %4  = φ (#1 => %2, #3 => %14)[36m::Float64[39m
[90m[69G│   [1G[39m[90m  [39m│   %5  = π (%4, [36mFloat64[39m)
[90m[69G│╻   >[1G[39m[90m  [39m│   %6  = π (1.0, [36mFloat64[39m)
[90m[69G││┃│  <[1G[39m[90m  [39m│   %7  = (Base.lt_float)(%6, %5)[36m::Bool[39m
[90m[69G│││ [1G[39m[90m  [39m│   %8  = π (1.0, [36mFloat64[39m)
[90m[69G│││╻   ==[1G[39m[90m  [39m│   %9  = (Base.eq_float)(%8, %5)[36m::Bool[39m
[90m[69G│││╻   &[1G[39m[90m  [39m│   %10 = (Base.and_int)(%9, false)[36m::Bool[39m
[90m[69G│││╻   |[1G[39m[90m  [39m│   %11 = (Base.or_int)(%7, %10)[36m::Bool[39m
[90m[69G│   [1G[39m[90m  [39m└──       goto #4 if not %11
[90m[69G│   

In [59]:
function sumlog(xs)
    s = zero(xs[1])
    
    for i=1:length(xs)
        s = s + logint2(xs[i]) # If type of xs[i] is known, result of logint2 will be known
    end
    s
end

sumlog (generic function with 1 method)

In [60]:
@code_typed optimize=false sumlog(xs)

CodeInfo(
[90m[77G│[1G[39m[90m2 [39m1 ─ %1  = (Base.getindex)(xs, 1)[36m::Float64[39m
[90m[77G│[1G[39m[90m  [39m│         (s = (Main.zero)(%1))[90m::Const(0.0, false)[39m
[90m[77G│[1G[39m[90m4 [39m│   %3  = (Main.length)(xs)[36m::Int64[39m
[90m[77G│[1G[39m[90m  [39m│   %4  = (1:%3)[36m::UnitRange{Int64}[39m
[90m[77G│[1G[39m[90m  [39m│         (#temp# = (Base.iterate)(%4))[90m::Union{Nothing, Tuple{Int64,Int64}}[39m
[90m[77G│[1G[39m[90m  [39m│   %6  = (#temp# === nothing)[36m::Bool[39m
[90m[77G│[1G[39m[90m  [39m│   %7  = (Base.not_int)(%6)[36m::Bool[39m
[90m[77G│[1G[39m[90m  [39m└──       goto #4 if not %7
[90m[77G│[1G[39m[90m  [39m2 ┄ %9  = #temp#::Tuple{Int64,Int64}[36m::Tuple{Int64,Int64}[39m
[90m[77G│[1G[39m[90m  [39m│         (i = (Core.getfield)(%9, 1))[90m::Int64[39m
[90m[77G│[1G[39m[90m  [39m│   %11 = (Core.getfield)(%9, 2)[36m::Int64[39m
[90m[77G│[1G[39m[90m5 [39m│   %12 = s[36m::Float6

In [61]:
@time logint2.([i for i in 1:2^20]);

  0.112937 seconds (257.51 k allocations: 28.957 MiB, 3.63% gc time)


# Summary

**Uncertainty** is the root cause of slowness.

It can come in many forms!

1. Globals -- uncertainty about type after compilation
2. Type instability -- uncertainty about the input or output type
3. Not fixed length -- arrays or strings don't have a fixed size
3. Mutability -- uncertainty that someone else may mutate the object
4. Boxing -- uncertainty about type or, size of values

Use `@time` and pay attention to allocations -- uncertainty leads to boxing which leads to GC overhead, so you will see it!

## References

- Performance tips from the manual https://docs.julialang.org/en/v0.6.1/manual/performance-tips/
- Engineering Julia for Speed - Lionel Zoubritzky
 https://www.youtube.com/watch?v=XWIZ_dCO6X8
- Information overload: tools for making program analysis and debugging manageable - Jameson Nash JuliaCon 2018