In [18]:
import threading
import time
import copy
from queue import Queue

### 使用join来控制多个线程的执行顺序
根据join的顺序，越早加入，则越早被执行，越晚加入则越晚执行

In [None]:
def task1():
    print('task1 start \n')
    time.sleep(0.1)
    print('task1 finish \n')
    
def task2():
    print('task2 start \n')
    time.sleep(0.1)
    print('task2 finish \n')

### join位置不同的对比
如果一个线程比另外一个线程加入主线程早，那么该线程执行顺序优先

In [11]:
thread1 = threading.Thread(target=task1)
thread2 = threading.Thread(target=task2)
thread1.start()
thread1.join()
thread2.start()
thread2.join()
print('all done \n')

task1 start 

task1 finish 

task2 start 

task2 finish 

all done 



如果两个同时开始后加入，则执行时间越短越早结束

In [13]:
thread1 = threading.Thread(target=task1)
thread2 = threading.Thread(target=task2)
thread1.start()
thread2.start()

thread2.join()
thread1.join()
print('all done \n')

task1 start 

task2 start 

task1 finish 
task2 finish 


all done 



### 带有输入的多线程
由于线程并不能返回值，因此我们将结果压入一个队列中。多个线程中仍然存在先后执行问题。

In [16]:
from queue import Queue

In [41]:
def job1(data, q):
    res = []
    for i in data:
        res.append(i**2)
    q.put(res)
    
def job2(data, q):
    res = []
    time.sleep(0.2)
    for i in data:
        res.append(i**4+3-6-7%2)
    q.put(res)

In [42]:
def multithreads():
    q = Queue()
    data = [[1,2,3,4],[1,2,3,4],[2,3,4,5]]
    thread1 = threading.Thread(target=job1, args=(data[0], q))
    thread1.start()
    thread2 = threading.Thread(target=job2, args=(data[1], q))
    thread2.start()
    thread3 = threading.Thread(target=job2, args=(data[2], q))
    thread3.start()
    threads = [thread1, thread2, thread3]
    for thread in threads:
        thread.join()
    for i in range(3):
        print(q.get())
multithreads()    

[1, 4, 9, 16]
[-3, 12, 77, 252]
[12, 77, 252, 621]


### 全局解释器锁 Global Interpreter Lock
一个解释器只有一个GIL，来控制所有的线程执行，以保证不会出现内存泄漏或者内存错误释放的问题，同时只有一个GIL可以保证两个锁之间的死锁问题。Cpython解释通过一个变量计数来控制变量的释放。因此需要GIL来保证，每个变量或者内存空间，不会被其他线程占用或者释放。
这会造成一个针对CPU-bound任务的瓶颈，因为实际上多个线程并不会没有在同时工作。这可以通过multiprocess来解决，但是这个方法也存在不具有scalability的问题。但是对于I/O-bound的任务，该方法可以发挥它的作用，因为I/O操作，大部分时间处于等待时间，因此大量等待时间，可以用来多个线程之间同时切换，实现加速。下面给出两个CPU-bound和I/O-bound的例子

In [6]:
def job_writing_file(file_name):
    file = 'files/' + file_name
    with open(file, 'w') as f:
        for i in range(1000):
            f.write('test_file.\n')
    f.close()
    
def job_couting(l, q):
    q.put(sum(l))

In [15]:
# multiple threading for file I/O
th1 = threading.Thread(target=job_writing_file, args=(["file_1.txt"]))
th2 = threading.Thread(target=job_writing_file, args=(["file_2.txt"]))
multi_start = time.time()
th1.start()
th2.start()
th1.join()
th2.join()
multi_end = time.time()
print('multiple thread for file writing time:', multi_end-multi_start)
single_start = time.time()
job_writing_file('file_3.txt')
job_writing_file('files_4.txt')
single_end = time.time()
print('single thread for file writing time:', single_end-single_start)

multiple thread for file writing time: 0.010971546173095703
single thread for file writing time: 0.007891416549682617


In [20]:
# multiple threading for file I/O
l = [i for i in range(1000)]
q = Queue()
th3 = threading.Thread(target=job_couting, args=(l, q))
th4 = threading.Thread(target=job_couting, args=(copy.copy(l), q))
multi_start = time.time()
th3.start()
th4.start()
th3.join()
th4.join()
total = 0
for _ in range(2):
    total += q.get()
print('mulitple sum:', total)
multi_end = time.time()
print('multiple thread for file writing time:', multi_end-multi_start)
single_start = time.time()
job_couting(l, q)
job_couting(l, q)
total = 0
for _ in range(2):
    total += q.get()
print('single sum:', total)
single_end = time.time()
print('single thread for file writing time:', single_end-single_start)

mulitple sum: 999000
multiple thread for file writing time: 0.005985260009765625
single sum: 999000
single thread for file writing time: 0.003854513168334961


In [21]:
def job(l, q):
    res = sum(l)
    q.put(res)

def multithreading(l):
    q = Queue()
    threads = []
    for i in range(4):
        t = threading.Thread(target=job, args=(copy.copy(l), q), name='T%i' % i)
        t.start()
        threads.append(t)
    [t.join() for t in threads]
    total = 0
    for _ in range(4):
        total += q.get()
    print(total)

def normal(l):
    total = sum(l)
    print(total)

In [23]:
# 多线程可能比单线程更慢
l = list(range(1000000))
s_t = time.time()
normal(l*4)
print('normal: ',time.time()-s_t)
s_t = time.time()
multithreading(l)
print('multithreading: ', time.time()-s_t)

1999998000000
normal:  0.2635996341705322
1999998000000
multithreading:  0.3425111770629883


### 线程锁
主要用来锁定当前线程的变量不被其他线程更改

In [None]:
# 一个简单的例子
def job1():
    global A
    lock.acquire()
    for i in range(10):
        A += i
        print('job1', 'A=', A)
    lock.release()
    
def job2():
    global A
    lock.acquire()
    for i in range(10):
        A += i*10
        print('job2', 'A=', A)
    lock.release()

A = 0
lock = threading.Lock()
th1 = threading.Thread(target=job1)
th2 = threading.Thread(target=job2)
th1.start()
th2.start()
th1.join()
th2.join()