**使用多线程并行运行程序**  
@Author: Rui  
@Build time: 2022.08.23  
@Cite: 
1. https://mofanpy.com/tutorials/python-basic/threading/

In [1]:
import threading
import time

# ^ 禁用同一单元格内的输出覆盖
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## 线程信息查询

In [2]:
threading.active_count()  # 返回当前激活的线程数
threading.activeCount()  # 两种写法都可以

8

8

In [3]:
threading.enumerate()  # 列举当前活跃的线程

[<_MainThread(MainThread, started 4369384832)>,
 <Thread(Thread-4, started daemon 6117044224)>,
 <Heartbeat(Thread-5, started daemon 6133870592)>,
 <Thread(Thread-6, started daemon 6151843840)>,
 <Thread(Thread-7, started daemon 6168670208)>,
 <ControlThread(Thread-3, started daemon 6185496576)>,
 <HistorySavingThread(IPythonHistorySavingThread, started 6202322944)>,
 <ParentPollerUnix(Thread-2, started daemon 6219722752)>]

In [4]:
threading.currentThread()  # 当前程序所在的线程
threading.current_thread()  # 两种写法相同

<_MainThread(MainThread, started 4369384832)>

<_MainThread(MainThread, started 4369384832)>

## 创建线程 -> `threading.Thread()`

In [5]:
print("线程执行前，活跃的线程数:%s" % threading.activeCount())
threading.enumerate()

线程执行前，活跃的线程数:8


[<_MainThread(MainThread, started 4369384832)>,
 <Thread(Thread-4, started daemon 6117044224)>,
 <Heartbeat(Thread-5, started daemon 6133870592)>,
 <Thread(Thread-6, started daemon 6151843840)>,
 <Thread(Thread-7, started daemon 6168670208)>,
 <ControlThread(Thread-3, started daemon 6185496576)>,
 <HistorySavingThread(IPythonHistorySavingThread, started 6202322944)>,
 <ParentPollerUnix(Thread-2, started daemon 6219722752)>]

In [6]:
# 定义子任务
def thread_job():
    print("This is an added Thread, name is %s" % threading.currentThread())
    print("线程开始，活跃的线程数:%s" % threading.activeCount())
    print(threading.enumerate())
    
added_thread = threading.Thread(target=thread_job, name='new Thread')  # 创建一个子线程

added_thread.start()

This is an added Thread, name is <Thread(new Thread, started 6236549120)>
线程开始，活跃的线程数:9
[<_MainThread(MainThread, started 4369384832)>, <Thread(Thread-4, started daemon 6117044224)>, <Heartbeat(Thread-5, started daemon 6133870592)>, <Thread(Thread-6, started daemon 6151843840)>, <Thread(Thread-7, started daemon 6168670208)>, <ControlThread(Thread-3, started daemon 6185496576)>, <HistorySavingThread(IPythonHistorySavingThread, started 6202322944)>, <ParentPollerUnix(Thread-2, started daemon 6219722752)>, <Thread(new Thread, started 6236549120)>]


In [7]:
print("线程结束后，活跃的线程数:%s" % threading.activeCount())
threading.enumerate()

线程结束后，活跃的线程数:8


[<_MainThread(MainThread, started 4369384832)>,
 <Thread(Thread-4, started daemon 6117044224)>,
 <Heartbeat(Thread-5, started daemon 6133870592)>,
 <Thread(Thread-6, started daemon 6151843840)>,
 <Thread(Thread-7, started daemon 6168670208)>,
 <ControlThread(Thread-3, started daemon 6185496576)>,
 <HistorySavingThread(IPythonHistorySavingThread, started 6202322944)>,
 <ParentPollerUnix(Thread-2, started daemon 6219722752)>]

## 线程等待 -> `join()`
* 可使用join方法将子线程加入到主线程中，使子线程结束后主线程再结束

In [8]:
# 示例1:
# 没有使用join()进行线程等待时，多个线程会同时进行。
# 此程序的结果就是，print("All done\n")都已经执行，新线程的print("new thread is finished\n")后执行完毕

def thread_job():
    print("new thread is start\n")
    for i in range(10):
        time.sleep(0.1)
    print("new thread is finished\n")

added_thread = threading.Thread(target=thread_job, name='new thread')
added_thread.start()

print("All done\n")

new thread is start
All done




new thread is finished



In [92]:
# 示例2:
# 使用join()进行线程等待时，该线程执行完才会进行接下来的语句。

def thread_job():
    print("new thread is start\n")
    for i in range(10):
        time.sleep(0.1)
    print("new thread is finished\n")

added_thread = threading.Thread(target=thread_job, name='new thread')
added_thread.start()
added_thread.join() # 等待该线程结束后，继续主线程

print("All done\n")

new thread is start

new thread is finished

new thread is finished

All done



## 使用Queue收集多线程的返回值  
* 多线程的任务函数需要用Q接着返回值

In [93]:
from queue import Queue

# 定义任务函数
def job(ls, Q):
    for i in range(len(ls)):
        ls[i] = ls[i]**2
    Q.put(ls)  # 将返回值放到队列Q中
    return ls

Q = Queue()  # 创建队列Q，用于收集各个子线程的返回值
threads = []  # 收集创建的子线程，方便之后做线程等待

data = [
    [1, 1, 1],  # 给线程1的数据
    [2, 2, 2],  # 给线程2的数据
    [3, 3, 3],  # ...
    [4, 4, 4]
]

# 创建多个子线程
for i in range(4):
    subthread = threading.Thread(target=job, args=(data[i], Q))
    subthread.start()
    threads.append(subthread)  # 将线程对象加到线程列表里

# 等待线程全部结束
for subthread in threads:
    subthread.join()

# 从Q中读取线程结果
results = []
for i in range(4):
    results.append(Q.get())
results

[[1, 1, 1], [4, 4, 4], [9, 9, 9], [16, 16, 16]]

## 时间对比：多线程不一定会更快，计算密集型程序会慢特别多
* 由于python中`全局解释器锁(Global Interpreter Lock, GIL)`的存在, python的多线程实际上还是使用1个CPU核心进行运算。
* 一个CPU核心在多个线程中来回切换，当前运行的线程在执行时，其他线程在等待
* IO密集型应用，推荐`多线程`；计算密集型应用，推荐`多进程`

In [94]:
# 定义任务函数
def job(ls, Q):
    ls = ls.copy()
    for i in range(len(ls)):
        ls[i] = 1000*ls[i]**20 + ls[i]**30
    Q.put(ls)  # 将返回值放到队列Q中
    return ls

data = [
    [1, 1, 1],  # 给线程1的数据
    [2, 2, 2],  # 给线程2的数据
    [3, 3, 3],  # ...
    [4, 4, 4], 
    [5, 5, 5], 
    [6, 6, 6], 
    [7, 7, 7], 
    [8, 8, 8],
    [9, 9, 9], 
    [10, 10, 10]
]

In [95]:
# 使用多线程
# 创建多个子线程
start_time = time.time()

Q = Queue()  # 创建队列Q，用于收集各个子线程的返回值

for i in range(10):
    subthread = threading.Thread(target=job, args=(data[i], Q))
    subthread.start()
    threads.append(subthread)  # 将线程对象加到线程列表里

# 等待线程全部结束
for subthread in threads:
    subthread.join()

# 从Q中读取线程结果
results = []
for i in range(10):
    results.append(Q.get())

end_time = time.time()
print("cost time: {}".format(end_time-start_time))
# results

cost time: 0.0031507015228271484


In [96]:
# 不使用多线程
start_time = time.time()
results = []

Q = Queue()  # 在这个cell里不起作用，单纯是为了函数调用
for ls in data:
    results.append(job(ls, Q))

end_time = time.time()
print("cost time: {}".format(end_time-start_time))
# results

cost time: 0.00010800361633300781


## 线程锁Lock

In [97]:
# 多线程在运行时会交替运行
def job1():
    global A  # * 通过全局变量实现变量在多线程中共享，从运行结果中也可以看到A是在不同线程中来回传递的
    for i in range(10):
        A+=1
        print('job1', A)

def job2():
    global A
    for i in range(10):
        A+=10
        print('job2', A)

A = 0
thread1 = threading.Thread(target=job1)
thread2 = threading.Thread(target=job2)
thread1.start()
thread2.start()

job1job2 1
job1 12
job1 13
job1 14
job1 15
 11
job2 25
job2 35
job2 45
job2 55
job2 65
job2 75
job2 85
job2 95
job2 105
job1 106
job1 107
job1 108
job1 109
job1 110


In [98]:
# 使用Lock可以使线程互不干扰
from threading import Lock

def job1():
    global A, lock
    lock.acquire()  # 开启lock
    for i in range(10):
        A+=1
        print('job1', A)
    lock.release()  # 关闭lock

def job2():
    global A, lock
    lock.acquire()
    for i in range(10):
        A+=10
        print('job2', A)
    lock.release()

lock = threading.Lock()
A = 0
thread1 = threading.Thread(target=job1)
thread2 = threading.Thread(target=job2)
thread1.start()
thread2.start()

job1 1
job1 2
job1 3
job1 4
job1 5
job1 6
job1 7
job1 8
job1 9
job1 10
job2 20
job2 30
job2 40
job2 50
job2 60
job2 70
job2 80
job2 90
job2 100
job2 110
