# Concurrency

### Loading Libraries

In [1]:
# Math
import math
from math import hypot, factorial, sqrt, ceil

# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import seaborn
import matplotlib.pyplot as plt

# Loggers
import logging
import logging.handlers

# SQLite
import sqlite3

# Enum
from enum import Enum, auto

# Print
from pprint import pprint

# OS
import io
import re
import sys
import abc
import csv
import time
import gzip
import queue
import heapq
import socket
import string
import random
import bisect
import operator
import datetime
import contextlib
import subprocess
from decimal import Decimal
from abc import ABC, abstractmethod

# Types & Annotations
import collections
from __future__ import annotations
from collections import defaultdict, Counter
from collections.abc import Container, Mapping, Hashable
from typing import TYPE_CHECKING
from typing import Pattern, Match
from typing import Hashable, Mapping, TypeVar, Any, overload, Union, Sequence, Dict, Deque, TextIO, Callable, ContextManager
from typing import List, Protocol, NoReturn, Union, Set, Tuple, Optional, Iterable, Iterator, cast, NamedTuple, TYPE_CHECKING
# from typing import 

# Functional Tools
from functools import wraps, total_ordering, lru_cache

# Files & Path
import tarfile
import logging
import zipfile
import fnmatch
from pathlib import Path
from urllib.request import urlopen
from urllib.parse import urlparse

# Dataclass
from dataclasses import dataclass, field

### Threads

In [2]:
from threading import Thread, Lock

In [3]:
class Chef(Thread):
    def __init__(self, name: str) -> None:
        super().__init__(name=name)
        self.total = 0

    def get_order(self) -> None:
        self.order = THE_ORDERS.pop(0)

    def prepare(self) -> None:
        """Simulate doing a lot of work with a BIG computation"""
        start = time.monotonic()
        target = start + 1 + random.random()
        for i in range(1_000_000_000):
            self.total += math.factorial(i)
            if time.monotonic() >= target:
                break
        print(f"{time.monotonic():.3f} {self.name} made {self.order}")

    def run(self) -> None:
        while True:
            try:
                self.get_order()
                self.prepare()
            except IndexError:
                break  # No more orders

In [4]:
THE_ORDERS = [
    "Reuben",
    "Ham and Cheese",
    "Monte Cristo",
    "Tuna Melt",
    "Cuban",
    "Grilled Cheese",
    "French Dip",
    "BLT",
]

In [5]:
Mo = Chef("Michael")
Constantine = Chef("Constantine")

if __name__ == "__main__":
    random.seed(42)
    Mo.start()
    Constantine.start()

### Multiprocessing

In [6]:
from multiprocessing import Process, cpu_count

In [7]:
class MuchCPU(Process):
    def run(self) -> None:
        print(f"OS PID {os.getpid()}")

        s = sum(2 * i + 1 for i in range(100_000_000))

920385.335 Constantine made Ham and Cheese


In [8]:
class MoreCPU(Thread):
    def run(self) -> None:
        print(f"OS PID {os.getpid()}")

        s = sum(2 * i + 1 for i in range(100_000_000))


if __name__ == "__main__":
    # workers = [MuchCPU() for f in range(cpu_count())]
    workers = [MoreCPU() for f in range(cpu_count())]

    t = time.perf_counter()
    for p in workers:
        p.start()
    for p in workers:
        p.join()
    print(f"work took {time.perf_counter() - t:.3f} seconds")

920385.940 Michael made Reuben


Exception in thread Thread-5:
Traceback (most recent call last):
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
Exception in thread Thread-6:
Traceback (most recent call last):
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
    self.run()
  File "/var/folders/dk/f9pbhbp52qxc0613mpbtx8lm0000gn/T/ipykernel_40033/243109373.py", line 3, in run
Exception in thread Thread-7:
Traceback (most recent call last):
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
Exception in thread Thread-8:
Traceback (most recent call last):
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
Exception in thread Thread-9:
Traceback (most recent call last):
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
    self.run()
  File "/var

920386.635 Constantine made Monte Cristo
920387.163 Michael made Tuna Melt
work took 1.067 seconds


### Multiprocessing Pools

In [9]:
from multiprocessing.pool import Pool

In [None]:
def prime_factors(value: int) -> list[int]:
    """
    >>> set(prime_factors(42))
    {2, 3, 7}
    >>> set(prime_factors(97))
    {97}
    """
    if value in {2, 3}:
        return [value]
    factors: list[int] = []
    for divisor in range(2, ceil(sqrt(value)) + 1):
        quotient, remainder = divmod(value, divisor)
        if not remainder:
            factors.extend(prime_factors(divisor))
            factors.extend(prime_factors(quotient))
            break
    else:
        factors = [value]
    return factors


if __name__ == "__main__":
    to_factor = [random.randint(100_000_000, 1_000_000_000) for i in range(40_960)]
    
    with Pool() as pool:
        results = pool.map(prime_factors, to_factor)
    
    primes = [
        value for value, factor_list in zip(to_factor, results) if len(factor_list) == 1
    ]
    
    print(f"9-digit primes: {primes}")

920388.375 Constantine made Cuban
920388.847 Michael made Grilled Cheese
920390.277 Constantine made French Dip
920390.690 Michael made BLT

Process SpawnPoolWorker-1:
Process SpawnPoolWorker-2:
Process SpawnPoolWorker-3:
Process SpawnPoolWorker-4:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/multiprocessing/pool.py", line 114, in worker
    task = get()
           ^^^^^
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/multiprocessing/




Process SpawnPoolWorker-13:
Traceback (most recent call last):
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/multiprocessing/pool.py", line 114, in worker
    task = get()
           ^^^^^
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute 'prime_factors' on <module '__main__' (built-in)>
Process SpawnPoolWorker-14:
Traceback (most recent call last):
Process SpawnPoolWorker-15:
Traceback (most recent call last):
  File "/Users/isisromero/anaconda3/envs/OOP/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.ru

### Queues

In [None]:
if TYPE_CHECKING:
    Query_Q = Queue[Union[str, None]]
    Result_Q = Queue[List[str]]


def search(paths: list[Path], query_q: Query_Q, results_q: Result_Q) -> None:
    print(f"PID: {os.getpid()}, paths {len(paths)}")
    lines: list[str] = []
    for path in paths:
        lines.extend(l.rstrip() for l in path.read_text().splitlines())

    while True:
        if (query_text := query_q.get()) is None:
            break
        results = [l for l in lines if query_text in l]
        results_q.put(results)

In [None]:
class DirectorySearch:
    def __init__(self) -> None:
        self.query_queues: list[Query_Q]
        self.results_queue: Result_Q
        self.search_workers: list[Process]

    def setup_search(self, paths: list[Path], cpus: Optional[int] = None) -> None:
        if cpus is None:
            cpus = cpu_count()
        worker_paths = [paths[i::cpus] for i in range(cpus)]
        self.query_queues = [Queue() for p in range(cpus)]
        self.results_queue = Queue()

        self.search_workers = [
            Process(target=search, args=(paths, q, self.results_queue))
            for paths, q in zip(worker_paths, self.query_queues)
        ]
        for proc in self.search_workers:
            proc.start()

    def teardown_search(self) -> None:
        # Signal process termination
        for q in self.query_queues:
            q.put(None)

        for proc in self.search_workers:
            proc.join()

    def search(self, target: str) -> Iterator[str]:
        print(f"search queues={self.query_queues}")
        for q in self.query_queues:
            q.put(target)

        for i in range(len(self.query_queues)):
            for match in self.results_queue.get():
                yield match

In [None]:
def all_source(path: Path, pattern: str) -> Iterator[Path]:
    for root, dirs, files in os.walk(path):
        for skip in {".tox", ".mypy_cache", "__pycache__", ".idea"}:
            if skip in dirs:
                dirs.remove(skip)
        yield from (Path(root) / f for f in files if fnmatch(f, pattern))


In [None]:
from multiprocessing import Process, Queue, cpu_count
import time

In [None]:
if __name__ == "__main__":
    ds = DirectorySearch()
    base = Path.cwd().parent
    all_paths = list(all_source(base, "*.py"))
    ds.setup_search(all_paths)
    for target in ("import", "class", "def"):
        start = time.perf_counter()
        count = 0
        for line in ds.search(target):
            # print(line)
            count += 1
        milliseconds = 1000 * (time.perf_counter() - start)
        print(
            f"Found {count} {target!r} in {len(all_paths)} files "
            f"in {milliseconds:.3f}ms"
        )
    ds.teardown_search()

## Futures

In [None]:
class ImportResult(NamedTuple):
    path: Path
    imports: set[str]

    @property
    def focus(self) -> bool:
        return "typing" in self.imports


class ImportVisitor(ast.NodeVisitor):
    def __init__(self) -> None:
        self.imports: set[str] = set()

    def visit_Import(self, node: ast.Import) -> None:
        # print(ast.dump(node))
        for alias in node.names:
            self.imports.add(alias.name)

    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
        # print(ast.dump(node))
        if node.module:
            self.imports.add(node.module)


def find_imports(path: Path) -> ImportResult:
    tree = ast.parse(path.read_text())
    iv = ImportVisitor()
    iv.visit(tree)
    return ImportResult(path, iv.imports)

In [None]:
def main(base: Path = Path.cwd()) -> None:
    print(f"\n{base}")
    start = time.perf_counter()
    with futures.ThreadPoolExecutor(24) as pool:
        analyzers = [
            pool.submit(find_imports, path) for path in all_source(base, "*.py")
        ]
        analyzed = (worker.result() for worker in futures.as_completed(analyzers))
    for example in sorted(analyzed):
        print(
            f"{'->' if example.focus else '':2s} "
            f"{example.path.relative_to(base)} {example.imports}"
        )
    end = time.perf_counter()
    rate = 1000 * (end - start) / len(analyzers)
    print(f"Searched {len(analyzers)} files in {base} at {rate:.3f}ms/file")


In [None]:
if __name__ == "__main__":
    options = get_options()
    for path in options.path:
        main(path)

In [None]:
## AsyncIO in Action