In [1]:
# https://www.youtube.com/watch?v=vbtxtvuCFRM&list=PLqnslRFeH2UqLwzS0AwKDKLrpYBKzLBy2&index=15

# Threading vs Multiprocessing in Python
#running code in paraellel and speed up code


In [2]:
# Process: an instance of a program (ex. Python Interpreter)

# + Takes advantage of multiple cpus and cores

# + Separate memory space -> memory not shared between processes

# + Great for CPU bound processing 

# + new process is stated independently from other processes

# + processes are interuptable/killable

# + one GIL for each process -> avoids GIL Limitation

# - Heavy weight
# - Starting a process is slower than starting a thread
# - More Memory
# - IPC (interprocess communication) is more complicated

In [3]:
# Threads: an entity within a process that can be scheduled (also known as a lightweight process), a process can spawn multiple threads

# + All threads within a process share the same memory
# + lightweight
# + starting a thread is faster than starting a process
# + great for i/o bound tasks

# - threading is limited by GIL : only one thread at a time
# - no effect for cpu-bound tasks
# - not interruptable / killable
# - careful with race conditions
# race conditions occur when 2 or more threads want to access the same variable at the same time

In [4]:
# GIL: Global interpreter lock
# - a lock that allows only 1 thread to execture at a time in Python

# Needed in CPython because memory management is not safe

# - Avoid:
#     - use multiprocessing
#     - use a different, free-threaded pythong implementation (Jython, IronPython)
#     - use Python as a wrapper for third-party libraries (C/C++) -> numpy, scipy

In [5]:
#lets start with Multiprocessing

from multiprocessing import Process
import os
import time

processes = []
num_processes = os.cpu_count() # a good number of processes to pick is the number of cpus you have

print(f'This machine has {num_processes} cpus')

#create function we want to run

def square_nums():
    for i in range(100):
        i * i
        time.sleep(0.1)


This machine has 12 cpus


In [6]:
for i in range(num_processes):
    # p = Process(target=square_nums, args=()) if function has args we specify them as a tuple in the args section
    p = Process(target=square_nums)
    processes.append(p)

# start
for p in processes:
        p.start()
# join
for p in processes:
        p.join()
        #here we are waiting for all processes to finish while blocking the main thread as we wait

print('end main') #will hit here when all processes are done
    

end main


In [7]:
from threading import Thread

threads = []
num_threads = 10

for i in range(num_processes):
    # p = Process(target=square_nums, args=()) if function has args we specify them as a tuple in the args section
    t = Thread(target=square_nums)
    threads.append(t)

# start
for t in threads:
        t.start()
# join
for t in threads:
        t.join()
        #here we are waiting for all processes to finish while blocking the main thread as we wait

print('end main') #will hit here when all processes are done
    

end main
