# 第4课：多线程与多进程

## 学习目标
- 理解并发与并行的区别
- 掌握多线程编程
- 掌握多进程编程
- 了解线程池和进程池

## 1. 并发与并行

- **并发（Concurrency）**：多个任务交替执行，看起来像同时进行
- **并行（Parallelism）**：多个任务真正同时执行（需要多核CPU）

### Python 中的选择
- **I/O 密集型**（网络请求、文件读写）→ 多线程
- **CPU 密集型**（计算、数据处理）→ 多进程

## 2. 多线程基础

In [None]:
import threading
import time

def task(name, delay):
    print(f"线程 {name} 开始")
    time.sleep(delay)
    print(f"线程 {name} 完成")

# 创建线程
t1 = threading.Thread(target=task, args=("A", 2))
t2 = threading.Thread(target=task, args=("B", 1))

start = time.time()

# 启动线程
t1.start()
t2.start()

# 等待线程完成
t1.join()
t2.join()

print(f"总耗时: {time.time() - start:.2f}秒")

In [None]:
# 继承 Thread 类
class MyThread(threading.Thread):
    def __init__(self, name, delay):
        super().__init__()
        self.name = name
        self.delay = delay
    
    def run(self):
        print(f"线程 {self.name} 开始")
        time.sleep(self.delay)
        print(f"线程 {self.name} 完成")

threads = [MyThread(f"Thread-{i}", 1) for i in range(3)]

for t in threads:
    t.start()

for t in threads:
    t.join()

print("所有线程完成")

## 3. 线程同步

In [None]:
# 线程安全问题
counter = 0

def increment():
    global counter
    for _ in range(100000):
        counter += 1

threads = [threading.Thread(target=increment) for _ in range(5)]

for t in threads:
    t.start()
for t in threads:
    t.join()

print(f"期望值: 500000, 实际值: {counter}")

In [None]:
# 使用锁解决
counter = 0
lock = threading.Lock()

def increment_safe():
    global counter
    for _ in range(100000):
        with lock:
            counter += 1

threads = [threading.Thread(target=increment_safe) for _ in range(5)]

for t in threads:
    t.start()
for t in threads:
    t.join()

print(f"期望值: 500000, 实际值: {counter}")

## 4. 线程池

In [None]:
from concurrent.futures import ThreadPoolExecutor
import time

def download(url):
    print(f"下载 {url}")
    time.sleep(1)  # 模拟下载
    return f"{url} 完成"

urls = [f"http://example.com/page{i}" for i in range(5)]

start = time.time()

with ThreadPoolExecutor(max_workers=3) as executor:
    results = executor.map(download, urls)
    for result in results:
        print(result)

print(f"总耗时: {time.time() - start:.2f}秒")

In [None]:
# 使用 submit 和 Future
from concurrent.futures import ThreadPoolExecutor, as_completed

def task(n):
    time.sleep(n)
    return n * n

with ThreadPoolExecutor(max_workers=3) as executor:
    futures = {executor.submit(task, i): i for i in [3, 1, 2]}
    
    for future in as_completed(futures):
        n = futures[future]
        result = future.result()
        print(f"task({n}) = {result}")

## 5. 多进程基础

In [None]:
import multiprocessing
import os

def worker(name):
    print(f"进程 {name}, PID: {os.getpid()}")
    time.sleep(1)
    return name

if __name__ == "__main__":
    print(f"主进程 PID: {os.getpid()}")
    
    processes = []
    for i in range(3):
        p = multiprocessing.Process(target=worker, args=(f"Process-{i}",))
        processes.append(p)
        p.start()
    
    for p in processes:
        p.join()
    
    print("所有进程完成")

## 6. 进程池

In [None]:
from concurrent.futures import ProcessPoolExecutor

def cpu_intensive(n):
    """CPU 密集型任务"""
    total = 0
    for i in range(n):
        total += i * i
    return total

if __name__ == "__main__":
    numbers = [10000000] * 4
    
    # 串行执行
    start = time.time()
    results_serial = [cpu_intensive(n) for n in numbers]
    print(f"串行耗时: {time.time() - start:.2f}秒")
    
    # 并行执行
    start = time.time()
    with ProcessPoolExecutor(max_workers=4) as executor:
        results_parallel = list(executor.map(cpu_intensive, numbers))
    print(f"并行耗时: {time.time() - start:.2f}秒")

## 7. 进程间通信

In [None]:
from multiprocessing import Process, Queue

def producer(queue):
    for i in range(5):
        queue.put(f"item-{i}")
        print(f"生产: item-{i}")

def consumer(queue):
    while True:
        item = queue.get()
        if item is None:
            break
        print(f"消费: {item}")

if __name__ == "__main__":
    queue = Queue()
    
    p1 = Process(target=producer, args=(queue,))
    p2 = Process(target=consumer, args=(queue,))
    
    p1.start()
    p2.start()
    
    p1.join()
    queue.put(None)  # 结束信号
    p2.join()

## 8. 实际应用示例

In [None]:
# 批量处理文件（模拟）
from concurrent.futures import ThreadPoolExecutor
import random

def process_file(filename):
    """模拟文件处理"""
    time.sleep(random.uniform(0.1, 0.5))
    return f"{filename}: {random.randint(100, 1000)} 行"

files = [f"file_{i}.txt" for i in range(10)]

print("开始处理文件...")
start = time.time()

with ThreadPoolExecutor(max_workers=5) as executor:
    results = executor.map(process_file, files)
    for result in results:
        print(result)

print(f"\n处理完成，耗时: {time.time() - start:.2f}秒")

## 9. 练习题

### 练习：并行计算素数
使用多进程并行计算一定范围内的素数个数

In [None]:
def is_prime(n):
    if n < 2:
        return False
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True

def count_primes(start, end):
    """计算 [start, end) 范围内的素数个数"""
    # 在这里编写代码
    pass

# 使用进程池并行计算 1 到 1000000 的素数个数


## 10. 本课小结

1. **多线程**：适合 I/O 密集型任务
2. **多进程**：适合 CPU 密集型任务
3. **线程同步**：使用 Lock 避免竞态条件
4. **线程池/进程池**：ThreadPoolExecutor、ProcessPoolExecutor
5. **进程间通信**：Queue、Pipe