# Multiprocessing

Multiprocessing in Python leverages multiple processes to parallelize computation, with each process having its own memory space. 

In [1]:
from time import sleep
import multiprocessing as mp

def square(num):
    # print the process ID
    print(f'ID: {mp.current_process().name}, num: {num}, square: {num * num} \n')
    sleep(1)

with mp.Pool(processes=4) as p:
    p.map(square, range(4))

ID: ForkPoolWorker-1, num: 0, square: 0 
ID: ForkPoolWorker-2, num: 1, square: 1 
ID: ForkPoolWorker-3, num: 2, square: 4 
ID: ForkPoolWorker-4, num: 3, square: 9 






Storing output in a list

In [2]:
import multiprocessing as mp

def square(num):
    return num * num

with mp.Pool(processes=4) as p:
    result =  p.map(square, range(10))
print(result)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


`Pool.apply_async()` allows you to execute a function asynchronously in a separate process

In [3]:
from multiprocessing import Pool


def task(n, m):
    return n * n + m


n = 4

with Pool(processes=4) as p:
    async_results = [
        p.apply_async(task, args=(i, j)) for i in range(n) for j in range(n // 2)
    ]
    results = [r.get() for r in async_results]
print(results)

[0, 1, 1, 2, 4, 5, 9, 10]


In [4]:
for i in range(n):
    for j in range(n // 2):
        print(i, j, i * i + j)

0 0 0
0 1 1
1 0 1
1 1 2
2 0 4
2 1 5
3 0 9
3 1 10


### Adding progress bar

In [5]:
# !conda install tqdm -y 

In [6]:
import tqdm

n = 20

with mp.Pool(processes=4) as pool:
    results = []
    for result in tqdm.tqdm(pool.map(square, range(n)), total=n):
        results.append(result)

100%|██████████| 20/20 [00:00<00:00, 236298.82it/s]


In [20]:
import numpy as np


def unequal_task(n):
    sleep(np.random.randint(1, 4))
    return n * n


n = 12

with mp.Pool(processes=4) as pool:
    with tqdm.tqdm(total=n, desc="Processing") as pbar:
        async_res = [
            pool.apply_async(
                unequal_task, 
                args=(i,), 
                callback=lambda _: pbar.update(1))
            for i in range(n)
        ]
        results = [res.get() for res in async_res]

Processing:   0%|          | 0/12 [00:00<?, ?it/s]

Processing: 100%|██████████| 12/12 [00:06<00:00,  2.00it/s]


### Sharing data among processes

If you want to share data between processes, Python's multiprocessing module provides mechanisms like Value and Array for shared memory.

In [21]:
from multiprocessing import Value, Lock

def increment(shared_counter, lock):
    for _ in range(100):
        with lock:
            shared_counter.value += 1


counter = Value('i', 0)  # Shared integer ('i' for int)
lock = Lock()  # Lock to synchronize access to shared data

processes = [mp.Process(target=increment, args=(counter, lock)) for _ in range(4)]
for p in processes:
    p.start()
for p in processes:
    p.join()

print(f"Final counter value: {counter.value}")


Final counter value: 400


### Sharing a List using `Manager`
Here’s how you can use a Manager to share a list among multiple processes:

In [22]:
def append_to_list(shared_list, value):
    """Append a value to the shared list."""
    shared_list.append(value)
    print(f"Process {mp.current_process().name} appended value {value}")


with mp.Manager() as manager:
    shared_list = manager.list()
    processes = [
        mp.Process(target=append_to_list, args=(shared_list, i)) for i in range(4)
    ]
    for p in processes:
        p.start()
    for p in processes:
        p.join()
    print(f"Final list: {shared_list}")

Process Process-86 appended value 0
Process Process-87 appended value 1
Process Process-88 appended value 2
Process Process-89 appended value 3
Final list: [0, 1, 2, 3]
