# Julia Multi-Threading 

[![ ](https://markdown-videos-api.jorgenkh.no/url?url=https%3A%2F%2Fyoutu.be%2FkX6_iY_BtG8%3Fsi%3DQELqXGFHALzGIeGR)](https://youtu.be/kX6_iY_BtG8?si=QELqXGFHALzGIeGR)

### 👉 number of threads 

In [None]:
versioninfo()

In [None]:
Threads.nthreads()

### 👉 creating a multi-thread kernel for jupyter

In [None]:
using IJulia

In [None]:
# Creating a new kernel where "Julia Multi-Thread" is the name 
# and "4" is the number of threads.
# you can change the name and number of threads

installkernel("Julia Multi-Thread", env=Dict("JULIA_NUM_THREADS"=>"4"))

#### 💡 Refresh the page and change the kernel (Julia Multi-Thread)

### 👉 single-thread vs multi-thread

In [None]:
# number of threads

N = Threads.nthreads()

In [None]:
# single-threaded

for i in 1:N
    println("i: ", i, "\t Thread ID: ", Threads.threadid())
end

In [None]:
# multi-threaded

Threads.@threads for i in 1:N
    println("i: ", i, "\t Thread ID: ", Threads.threadid())
end

### 👉 data-race example

In [None]:
# creating a vector and calculating the sum

n = 1_000_000
myvector = collect(1:n)
sum(myvector)

In [None]:
# single-threaded

function multi_sum1(myvector)
    temp = 0

    for i in eachindex(myvector)
        temp += myvector[i]
    end

    return temp
end

In [None]:
multi_sum1(myvector)

In [None]:
# multi-threaded

function multi_sum2(myvector)
    temp = 0

    Threads.@threads for i in eachindex(myvector)
        temp += myvector[i]
    end

    return temp
end

In [None]:
# re-run this cell several times and observe the output

multi_sum2(myvector)

🚩🚩🚩 The multi_sum2() function gives wrong values 

In [None]:
# multi-threaded without data-race test function

function multi_sum3test(myvector)
    temp = zeros(Int, Threads.nthreads())

    Threads.@threads for i in eachindex(myvector)
        temp[Threads.threadid()] += myvector[i]
    end

    for i in eachindex(temp)
        println(i, "\t = ", temp[i])
    end

    println()
    println("The sum is ", sum(temp))
end

In [None]:
multi_sum3test(myvector)

In [None]:
# multi-threaded without data-race

function multi_sum3(myvector)
    temp = zeros(Int, Threads.nthreads())

    Threads.@threads for i in eachindex(myvector)
        temp[Threads.threadid()] += myvector[i]
    end

    return sum(temp)
end

In [None]:
multi_sum3(myvector)

### speed test

In [None]:
# Re-run the cell several times

@time multi_sum1(myvector);

In [None]:
# Re-run the cell several times

@time multi_sum3(myvector);

In [None]:
# Re-run the cell several times

@time sum(myvector);

### speed test with a larger vector

In [None]:
# trying with a larger vector

n = 100_000_000
myvector = collect(1:n);

In [None]:
# Re-run the cell several times

@time multi_sum3(myvector);

In [None]:
# Re-run the cell several times

@time sum(myvector);

### 👉 multi-threaded axpy()

use multi-threading, 32-bit, in-place

In [None]:
# 32-bit constants

const m::Int32 = 100_000
const a::Float32 = 2.718

In [None]:
# define multi-threaded axpy() function

function multi_axpy!(a, x, y)
    Threads.@threads for i in eachindex(x)
        y[i] = a*x[i] + y[i]
        # @inbounds y[i] = a*x[i] + y[i]
    end

    return y
end

In [None]:
using Random

In [None]:
# Re-run the cell several times

Random.seed!(1)

x = rand(Float32, m)
y = rand(Float32, m)

@time multi_axpy!(a, x, y);