# Week 3 Examples â€” Processes, Threads, and Concurrency

This notebook demonstrates building a simple thread pool, benchmarking I/O-bound vs CPU-bound workloads, and observing contention and saturation.

Python note: The GIL limits CPU-bound threading speedups; use multiprocessing for parallel CPU-bound tasks. Threads work well for overlapping I/O.

---

## A minimal thread pool with a work queue
We implement:
- `ThreadPool(workers)` with `submit(fn, *args, **kwargs)`
- Graceful shutdown via `shutdown(wait=True)`
- A bounded queue to provide backpressure


In [None]:
import threading, queue, time, math, random
from typing import Callable


class ThreadPool:
    def __init__(self, workers: int = 4, max_queue: int = 100):
        self.tasks = queue.Queue(maxsize=max_queue)
        self.workers = []
        self.running = True
        for _ in range(workers):
            t = threading.Thread(target=self._worker, daemon=True)
            t.start()
            self.workers.append(t)

    def _worker(self):
        while self.running:
            try:
                fn, args, kwargs = self.tasks.get(timeout=0.2)
            except queue.Empty:
                continue
            try:
                fn(*args, **kwargs)
            finally:
                self.tasks.task_done()

    def submit(self, fn: Callable, *args, **kwargs):
        self.tasks.put((fn, args, kwargs))

    def shutdown(self, wait=True):
        self.running = False
        if wait:
            for t in self.workers:
                t.join(timeout=1)


## I/O-bound workload simulation
We simulate I/O by sleeping a random amount; threads can overlap waiting.


In [None]:
class Metrics:
    def __init__(self):
        self.lock = threading.Lock()
        self.count = 0
        self.total = 0.0
        self.samples = []

    def record(self, dt):
        with self.lock:
            self.count += 1
            self.total += dt
            self.samples.append(dt)


def io_task(metrics: Metrics, mean_ms=5.0):
    t0 = time.perf_counter()
    # Exponential-ish sleep distribution
    delay = random.expovariate(1.0/mean_ms) / 1000.0
    time.sleep(delay)
    metrics.record(time.perf_counter() - t0)


def run_benchmark_io(workers_list=(1,2,4,8,16), tasks=2000, mean_ms=5.0):
    results = []
    for w in workers_list:
        pool = ThreadPool(workers=w)
        m = Metrics()
        t0 = time.perf_counter()
        for _ in range(tasks):
            pool.submit(io_task, m, mean_ms)
        pool.tasks.join()
        elapsed = time.perf_counter() - t0
        pool.shutdown()
        avg_ms = (m.total/m.count)*1e3 if m.count else 0
        results.append({
            'workers': w,
            'tasks': tasks,
            'elapsed_s': elapsed,
            'throughput_rps': tasks/elapsed,
            'avg_service_ms': avg_ms,
        })
    return results

io_results = run_benchmark_io()
for r in io_results:
    print(r)


## CPU-bound workload caveat (GIL)
This simulates CPU work with a tight loop. Expect limited speedup with more threads due to the GIL. Consider `multiprocessing` for true CPU parallelism.


In [None]:
def cpu_task(metrics: Metrics, n=1500000):
    t0 = time.perf_counter()
    s = 0
    for i in range(n):
        s += (i % 7)
    metrics.record(time.perf_counter() - t0)
    return s


def run_benchmark_cpu(workers_list=(1,2,4), tasks=12, n=800000):
    results = []
    for w in workers_list:
        pool = ThreadPool(workers=w)
        m = Metrics()
        t0 = time.perf_counter()
        for _ in range(tasks):
            pool.submit(cpu_task, m, n)
        pool.tasks.join()
        elapsed = time.perf_counter() - t0
        pool.shutdown()
        avg_ms = (m.total/m.count)*1e3 if m.count else 0
        results.append({
            'workers': w,
            'tasks': tasks,
            'elapsed_s': elapsed,
            'throughput_rps': tasks/elapsed,
            'avg_service_ms': avg_ms,
        })
    return results

cpu_results = run_benchmark_cpu()
for r in cpu_results:
    print(r)


## Exercises
1) Add a bounded queue and demonstrate backpressure by making it small (e.g., 10). Measure enqueue wait time.\
2) Add per-task timestamps to compute queue wait vs service time percentiles.\
3) Replace CPU threads with a `multiprocessing.Pool` and compare results.
