# Process: An instance of a program (e.g. a Python interpreter)

'+ Takes advantage of multiple CPUs and cores
'+ Separate memory space -> memory is not shared between processes
'+ Great for CPU-bound processing
'+ New precess is started independently for other processes
'+ Processes are interruptable / killable
'+ One GIL for each process -> avoids GIL limitations

'- Heavyweight
'- Starting a process is slower than starting a thread
'- More memory
'- IPC (inter-process communication) is more complicated

# Thread: an entity within a process that can be scheduled (also known as "lightweight process")
A process can spawn multiple threads.

'+ All threads within a process share the same memory
'+ Lightweight
'+ Starting a thread is faster than starting a process
'+ Great for I/O-bound tasks

'- Threading is limited by GIL: Only one thread at a time
'- No effect for CPU-bound tasks
'- Not interruptable / killable
'- Careful with race condition


# GIL: Global Interpreter Lock
- A lock that allows only one thread at a time to execute in Python
- Needed in CPython because memory management is not thread-safe


Avoid GIL:
- Use multiprocessing
- Use a different, free-threaded Python implementation (Jython, IronPython)
- Use Python as a wrapper for third-party libraries (C / C++) -> numpy, scipy

# Let's start from multiprocessing

In [5]:
import time
from multiprocessing import Process
import os


def square_numbers():
    for i in range(1_000):
        i * i
        time.sleep(0.1)
    print("DONE")


processes = []
num_processes = os.cpu_count()
print(num_processes)

# create processes
for i in range(num_processes):
    p = Process(target=square_numbers)
    processes.append(p)

# start processes
for p in processes:
    p.start()

# join processes:
for p in processes:
    p.join()

print("end main")

12
end main


# threads

In [2]:
import time
from threading import Thread
import os

print("hello")

def square_numbers():
    a = 0
    for i in range(1_000):
        a += i * i
        time.sleep(0.01)
    print(f"DONE {a}")


threads = []
num_threads = 10

# create processes
for i in range(num_threads):
    t = Thread(target=square_numbers)
    threads.append(t)

start_time = time.time()
# start processes
for t in threads:
    t.start()

# join processes:
for t in threads:
    t.join()

end_time = time.time()
print("end main")
print("--- %s seconds ---" % (end_time - start_time))

hello
DONE 332833500
DONE 332833500
DONE 332833500
DONE 332833500
DONE 332833500
DONE 332833500
DONE 332833500
DONE 332833500
DONE 332833500
DONE 332833500
end main
--- 15.56141996383667 seconds ---


In [23]:
from threading import Thread


class MyThread(Thread):
    def __init__(self, my_num, *args, **kwargs):
        self.my_num = my_num
        super().__init__(*args, **kwargs)

    def run(self) -> None:
        for _ in range(1_000):
            self.my_num += 1

    def get_my_num(self):
        return self.my_num


th = MyThread(my_num=10)
print(th.get_my_num())

th.start()
th.join()

print(th.get_my_num())

10
1010


# Race state example

In [19]:
database_value = 0


def increase():
    global database_value
    local_copy = database_value

    # processing
    for _ in range(10):
        local_copy += 1
        time.sleep(0.01)

    database_value = local_copy


print(f"start value {database_value}")

threads = []
num_threads = 10

# create processes
for i in range(num_threads):
    t = Thread(target=increase)
    threads.append(t)

start_time = time.time()
# start processes
for t in threads:
    t.start()

# join processes:
for t in threads:
    t.join()

print(f"env value {database_value}")

start value 0
env value 10


# Solve race with Lock

In [5]:
from threading import Lock

database_value = 0


def increase(lock):
    global database_value

    lock.acquire()
    local_copy = database_value

    # processing
    for _ in range(10):
        local_copy += 1
        time.sleep(0.1)

    database_value = local_copy
    lock.release()


def increase_2(lock):
    global database_value

    # use Lock as context manager
    with lock:
        local_copy = database_value

        # processing
        for _ in range(10):
            local_copy += 1
            time.sleep(0.1)

        database_value = local_copy


print(f"start value {database_value}")

lock = Lock()
th_1 = Thread(target=increase, args=(lock,))
th_2 = Thread(target=increase_2, args=(lock,))

th_1.start()
th_2.start()

th_1.join()
th_2.join()

print(f"env value {database_value}")

start value 0
env value 20


# Queue in threading

In [7]:
from queue import Queue

q = Queue()
q.put(1)
q.put(2)
q.put(3)

# 3 2 1 -->
first = q.get()
print(first)
second = q.get()
print(second)

q.task_done()
q.join()

1
2


In [31]:
from threading import current_thread


def worker(q_, lock_):
    while True:
        value = q.get()

        # processing ...
        with lock_:
            print(f"in {current_thread().name}, got {value}")
        q.task_done()


q = Queue()
lock = Lock()
num_treads = 10

for i in range(num_treads):
    th = Thread(target=worker, args=(q, lock))
    th.daemon = True
    th.start()


for i in range(1, 21):
    q.put(i)

q.join()

in Thread-91, got 1
in Thread-92, got 2
in Thread-93, got 3
in Thread-94, got 4
in Thread-95, got 5
in Thread-98, got 6
in Thread-96, got 7
in Thread-97, got 8
in Thread-99, got 9
in Thread-100, got 10
in Thread-91, got 11
in Thread-92, got 12
in Thread-93, got 13
in Thread-94, got 14
in Thread-95, got 15
in Thread-98, got 16
in Thread-96, got 17
in Thread-97, got 18
in Thread-99, got 19
in Thread-100, got 20


In [32]:
from threading import current_thread


def worker(q_, lock_):
    while True:
        value = q.get()

        # processing ...
        with lock_:
            print(f"in {current_thread().name}, got {value}")
        q.task_done()


q = Queue()
lock = Lock()
num_treads = 10

for i in range(num_treads):
    th = Thread(target=worker, args=(q, lock))
    th.daemon = False
    th.start()


for i in range(1, 21):
    q.put(i)

q.join()

in Thread-101, got 1
in Thread-103, got 2
in Thread-102, got 3
in Thread-104, got 4
in Thread-105, got 5
in Thread-106, got 6
in Thread-107, got 7
in Thread-108, got 8
in Thread-109, got 9
in Thread-110, got 10
in Thread-101, got 11
in Thread-103, got 12
in Thread-102, got 13
in Thread-104, got 14
in Thread-105, got 15
in Thread-106, got 16
in Thread-107, got 17
in Thread-108, got 18
in Thread-109, got 19
in Thread-110, got 20


In [33]:
from threading import current_thread


def worker(q_, lock_):
    while True:
        value = q.get()

        # processing ...
        with lock_:
            print(f"in {current_thread().name}, got {value}")
        q.task_done()


q = Queue()
lock = Lock()
num_treads = 10

for i in range(num_treads):
    th = Thread(target=worker, args=(q, lock))
    th.daemon = True
    th.start()


for i in range(1, 21):
    q.put(i)

for i in range(100_000):
    q.put(i)
q.join()

in Thread-117, got 1
in Thread-118, got 2
in Thread-111, got 3
in Thread-120, got 4
in Thread-113, got 5
in Thread-116, got 6
in Thread-112, got 7
in Thread-119, got 8
in Thread-114, got 9
in Thread-115, got 10
in Thread-118, got 11
in Thread-120, got 12
in Thread-116, got 13
in Thread-119, got 14
in Thread-114, got 15
in Thread-115, got 16
in Thread-111, got 17
in Thread-120, got 18
in Thread-116, got 19
in Thread-119, got 20
in Thread-114, got 0
in Thread-115, got 1
in Thread-111, got 2
in Thread-120, got 3
in Thread-120, got 4
in Thread-117, got 5
in Thread-114, got 6
in Thread-113, got 7
in Thread-111, got 8
in Thread-119, got 9
in Thread-118, got 10
in Thread-116, got 11
in Thread-114, got 12
in Thread-113, got 13
in Thread-111, got 14
in Thread-119, got 15
in Thread-118, got 16
in Thread-116, got 17
in Thread-114, got 18
in Thread-113, got 19
in Thread-120, got 20
in Thread-119, got 21
in Thread-118, got 22
in Thread-116, got 23
in Thread-114, got 24
in Thread-113, got 25
in Thre