## python多进程和多线程
python3中thread已经遗弃，改用threading

In [9]:
#多进程的例子
import time
import multiprocessing
def func():
    print("子进程开始：%s"%time.ctime())
    time.sleep(5)
    print("子进程结束：%s"%time.ctime())
#开启多进程
def main():
    p= multiprocessing.Process(target=func)
    p.start()
    p.join()
if __name__ == "__main__":
    print("开始：%s"%time.ctime())
    main()
    time.sleep(5)
    print("主进程结束：%s"%time.ctime())

开始：Tue Jul 30 14:26:17 2019
主进程结束：Tue Jul 30 14:26:22 2019


Jupyter上运行结果和Vscode上不一样，这里子进程居然不允许，奇了怪

In [18]:
#查看CPU有多少核
import os
os.cpu_count()

4

In [6]:
#多线程的例子
import time
import threading
def func():
    print("子线程开始：%s"%time.ctime())
    time.sleep(5)
    print("子线程结束：%s"%time.ctime())
#开启多进程
p= threading.Thread(target=func)
if __name__ == "__main__":
    print("开始：%s"%time.ctime())
    p.start()
    p.join()
    time.sleep(5)
    print("主线程结束：%s"%time.ctime())

开始：Tue Jul 30 12:39:23 2019
子线程开始：Tue Jul 30 12:39:23 2019
子线程结束：Tue Jul 30 12:39:28 2019
主线程结束：Tue Jul 30 12:39:33 2019


In [5]:
#通过一些简单的例子来逐步了解多线程
from time import ctime,sleep
def music():
    for i in range(2):
        print("听音乐 %s"%ctime())
        sleep(1)
def move():
    for i in range(2):
        print("看电影 %s"%ctime())
        sleep(5)
music()
move()
print("娱乐结束 %s"%ctime())

听音乐 Mon Jul 29 21:29:21 2019
听音乐 Mon Jul 29 21:29:22 2019
看电影 Mon Jul 29 21:29:23 2019
看电影 Mon Jul 29 21:29:28 2019
娱乐结束 Mon Jul 29 21:29:33 2019


如何做到同时听音乐并看电影？

In [15]:
import threading
def music(func):
    for i in range(2):
        print("听音乐 %s"%ctime())
        sleep(5)
def movie(func):
    for i in range(2):
        print("看电影 %s"%ctime())
        sleep(5)
threads= []
t1= threading.Thread(target=music,args=("sososo",))
threads.append(t1)
t2= threading.Thread(target=movie,args=("li",))
threads.append(t2)
if __name__=="__main__":
    print("all start %s"%ctime())
    for t in threads:
        t.setDaemon(True)
        t.start()
    t.join()#这里的join()提供了一个阻塞，防止子线程未执行完
    print("all over %s"%ctime())

all start Mon Jul 29 21:57:30 2019
听音乐 Mon Jul 29 21:57:30 2019
看电影 Mon Jul 29 21:57:30 2019
听音乐 Mon Jul 29 21:57:35 2019看电影 Mon Jul 29 21:57:35 2019

all over Mon Jul 29 21:57:40 2019


貌似多线程可以让多个程序在同一时间执行，这样就节省了排队执行的时间，你想想看，看电影花费5的时间，听音乐花费1的时间，因此总的花费时间取决于花费时间多的

上面的代码有个地方让人不满意，每次创建新的线程还得需要threading.Thread()一个个的添加，这样很麻烦说实话，能不能通过for循环来实现

In [17]:
from time import *
import threading
def music(func):
    for i in range(2):
        print("听音乐 %s"%ctime())
        sleep(5)
def movie(func):
    for i in range(2):
        print("看电影 %s"%ctime())
        sleep(5)
def player(name):
    r= name.split(".")[1]
    if r=="mp3":
        music(name)
    elif r=="mp4":
        movie(name)
    else:
        print("fuck you!")
L= ["爱情买卖.mp3","天下无贼.mp4"]
threads=[]
for i in range(len(L)):
    t= threading.Thread(target=player,args=(L[i],))
    threads.append(t)
if __name__=="__main__":
    for i in range(len(L)):
        threads[i].start()
    for i in range(len(L)):
        threads[i].join()
    print("结束 %s"%ctime())

听音乐 Mon Jul 29 22:12:48 2019
看电影 Mon Jul 29 22:12:48 2019
听音乐 Mon Jul 29 22:12:53 2019看电影 Mon Jul 29 22:12:53 2019

结束 Mon Jul 29 22:12:58 2019


In [20]:
#将电影和音乐放到一起
from time import *
import threading
def super_player(file, time):
    for i in range(2):
        print("Start playing:%s!%s"%(file,ctime()))
        sleep(time)
L={"我不是黄蓉.mp3":3,"监狱风云.mp4":5}
threads=[]
for file, time in L.items():
    t= threading.Thread(target= super_player,args=(file,time,))
    threads.append(t)
if __name__=="__main__":
    for i in range(len(L)):
        threads[i].start()
    for i in range(len(L)):
        threads[i].join()
    print("结束 %s" %ctime())

Start playing:监狱风云.mp4!Mon Jul 29 22:19:35 2019
Start playing:我不是黄蓉.mp3!Mon Jul 29 22:19:35 2019
Start playing:我不是黄蓉.mp3!Mon Jul 29 22:19:38 2019
Start playing:监狱风云.mp4!Mon Jul 29 22:19:40 2019
结束 Mon Jul 29 22:19:45 2019


In [27]:
import threading
#创建自己的多线程类
class MyThread(threading.Thread):
    def __init__(self,func, args, name=""):
        threading.Thread.__init__(self)
        self.name= name
        self.func= func
        self.args= args
    def run(self):
        self.func(*self.args)
def super_player(file, time):
    for i in range(2):
        print("Start playing:%s!%s"%(file,ctime()))
        sleep(time)
L={"我不是黄蓉.mp3":3,"监狱风云.mp4":5}
threads=[]
for k,v in L.items():
    t= MyThread(super_player,(k,v),super_player.__name__)
    threads.append(t)
if __name__=="__main__":
    for i in range(len(L)):
        threads[i].start()
    for i in range(len(L)):
        threads[i].join()
    print("结束 %s" %ctime())

Start playing:监狱风云.mp4!Mon Jul 29 22:48:52 2019
Start playing:我不是黄蓉.mp3!Mon Jul 29 22:48:52 2019
Start playing:我不是黄蓉.mp3!Mon Jul 29 22:48:55 2019
Start playing:监狱风云.mp4!Mon Jul 29 22:48:57 2019
结束 Mon Jul 29 22:49:02 2019


In [13]:
#爬取图片并下载，测试速度，然后用多线程看速度提高多少
import urllib.request
import time
def downloadImage(url,filename):
    print("下载图片......".format(time.ctime()))
    urllib.request.urlretrieve(url,filename)
def main():
    for i in range(10):
        pic_name= "{}.jpg".format(i)
        downloadImage(r'https://gss0.baidu.com/-Po3dSag_xI4khGko9WTAnF6hhy/zhidao/wh%3D600%2C800/sign=08660b2573f0f736d8ab44073a659f21/fc1f4134970a304e861d9da0dac8a786c9175ca8.jpg',pic_name)
if __name__=="__main__":
    t0= time.time()
    main()
    t1= time.time()
    print("耗时：",t1-t0)

从https://gss0.baidu.com/-Po3dSag_xI4khGko9WTAnF6hhy/zhidao/wh%3D600%2C800/sign=08660b2573f0f736d8ab44073a659f21/fc1f4134970a304e861d9da0dac8a786c9175ca8.jpg下载图片......
从https://gss0.baidu.com/-Po3dSag_xI4khGko9WTAnF6hhy/zhidao/wh%3D600%2C800/sign=08660b2573f0f736d8ab44073a659f21/fc1f4134970a304e861d9da0dac8a786c9175ca8.jpg下载图片......
从https://gss0.baidu.com/-Po3dSag_xI4khGko9WTAnF6hhy/zhidao/wh%3D600%2C800/sign=08660b2573f0f736d8ab44073a659f21/fc1f4134970a304e861d9da0dac8a786c9175ca8.jpg下载图片......
从https://gss0.baidu.com/-Po3dSag_xI4khGko9WTAnF6hhy/zhidao/wh%3D600%2C800/sign=08660b2573f0f736d8ab44073a659f21/fc1f4134970a304e861d9da0dac8a786c9175ca8.jpg下载图片......
从https://gss0.baidu.com/-Po3dSag_xI4khGko9WTAnF6hhy/zhidao/wh%3D600%2C800/sign=08660b2573f0f736d8ab44073a659f21/fc1f4134970a304e861d9da0dac8a786c9175ca8.jpg下载图片......
从https://gss0.baidu.com/-Po3dSag_xI4khGko9WTAnF6hhy/zhidao/wh%3D600%2C800/sign=08660b2573f0f736d8ab44073a659f21/fc1f4134970a304e861d9da0dac8a786c9175ca8.jpg下载图片.....

In [14]:
#开启多线程
import urllib.request
import time
import threading
url=r"https://gss0.baidu.com/-Po3dSag_xI4khGko9WTAnF6hhy/zhidao/wh%3D600%2C800/sign=08660b2573f0f736d8ab44073a659f21/fc1f4134970a304e861d9da0dac8a786c9175ca8.jpg"
def downloadImage(url,filename):
    print("下载图片......".format(time.ctime()))
    urllib.request.urlretrieve(url,filename)
def Thread(i):
    pic_name="{}.jpg".format(i)
    downloadImage(url,pic_name)
def main():
    threads=[]
    for i in range(10):
        thread= threading.Thread(target= Thread,args=(i,))
        threads.append(thread)
        thread.start()
    for i in threads:
        i.join()
if __name__=="__main__":
    t0= time.time()
    main()
    t1= time.time()
    print("耗时：",t1-t0)

下载图片......下载图片......

下载图片......
下载图片......
下载图片......
下载图片......
下载图片......
下载图片......
下载图片......
下载图片......
耗时： 0.29180479049682617


很明显使用多线程快了很多，多线程适合用于I/O密集型，多进程适合计算密集型

In [18]:
#多进程计算实例：找出100万个20000到10000000之间的随机数中的质数
import time 
import random
#快速判断是不是质数
def isPrime(n):
    i=2
    while i*i<=n:
        if n%i==0:
            return False
        i+=1
    return True
def main():
    for i in range(10000):
        rand= random.randint(2000,100000000)
        if isPrime(rand):
            print(rand)
if __name__=="__main__":
    print("开始计算")
    t1= time.clock()
    main()
    t2= time.clock()
    print("耗时：",t2-t1)

开始计算
7647949
53550643
35821001
26638253
17966681
54386599
69882577
27271781
57600791
33998369
57953449
70365041
23782757
47046611
54612427
46943473
1841003
40908893
36580169
35137253
51663883
65525969
6995231
24336089
39940009
72501797
72997619
12727381
63055801
49565743
18646231
76805117
39189967
67000937
68397887
70217177
97546373
83208017
5691029
73785007
47101573
23383727
41423827
85888273
21848923
78370111
77507011
49969807
17782217
8764927
80533823
16847893
60519353
43124017
13018153
84026941
57366919
84126299
38118679
19557623
58899019
18620141
81935939
51357899
30561269
91958947
93198419
76064057
35158639
86177843
78082201
23991203
1971469
494237
89704081
67595963
1584059
62273243
86980363
80625437
3387997
56422673
45466501
65084137
49297681
48650879
43913543
21106957
13376707
19383647
18047747
86083253
23831953
6102589
62415697
95799427
75670501
90086839
6998779
63454439
9542167
98581969
41676233
60324431
38889989
78505513
46334081
4896377
53936963
45779869
64846363
35526487
9

In [None]:
#用多进程来运算
import time
import random
import multiprocessing
from multiprocessing import Pool 
def Process(i):
    rand= random.randint(2000,100000000)
    def isPrime(n):
        i=2
        while i*i<=n:
            if n%i==0:
                return False
            i+=1
        return True
    if isPrime(rand):
        print(rand)
if __name__=="__main__":
    print("开始运算")
    t1= time.clock()
    pool= Pool(processes=4)
    pool.map_async(Process,range(10))
    pool.close()
    pool.join()
    t2= time.clock()
    print("耗时：",t2-t1)

开始运算


不知道为什么Jupyter就是没办法实现多进程，我在vscode下面可以
我查了一下，发现说是windows下Jupyter的锅，解决方法是将func写成一个脚本运行

In [7]:
import time
import random
import multiprocessing
import helper
from multiprocessing import Pool
if __name__=="__main__":
    print("开始运算")
    t1= time.clock()
    p= Pool(processes=4)
    p.map(helper.Process,[i for i in range(10000)])
    t2= time.clock()
    print("耗时：",t2-t1)

开始运算
耗时： 0.4667006715973798


### 进程池

In [None]:
from multiprocessing import Pool
import os,time,random
def worker(info):
    start= time.clock()
    print("%s开始，进程号为：%s,%s"%(info,os.getpid(),time.ctime()))
    time.sleep(random.random()*2)
    stop= time.clock()
    print("%s执行完毕，耗时%f"%(info,stop-start))
if __name__ == "__main__":
    pool= Pool(4)
    pool.apply_async(worker,("lebron",))
    pool.apply_async(worker,("kobe",))
    pool.close()
    pool.join()

join()要放到close()后

## python的时间模块
经常在程序中遇到和时间有关的部分，这里记录一下都有哪些关于时间的调用

In [1]:
#时间戳的概念，它是指格林威治时间下距今的秒数
import time
time.time()

1564451244.7586515

In [2]:
#获取当前时间
time.localtime(time.time())

time.struct_time(tm_year=2019, tm_mon=7, tm_mday=30, tm_hour=9, tm_min=48, tm_sec=56, tm_wday=1, tm_yday=211, tm_isdst=0)

time.localtime()是返回时间戳下的时间元组，元组里面包括年、月、日、时、分、秒、周几、一年中的第几天、夏令时

In [3]:
#获取格式化的时间有两种方法
#方法一
time.asctime(time.localtime(time.time()))

'Tue Jul 30 09:51:29 2019'

In [14]:
#方法二
time.ctime()

'Tue Jul 30 10:07:28 2019'

In [10]:
#自定义格式化时间
time.strftime("%y-%m-%d %I:%M:%S",time.localtime())

'19-07-30 10:01:53'

python中时间日期格式化符号：  
%y：年份(19)  
%Y：年份(2019)  
%m：月份  
%d：日  
%H：24小时制时  
%I：12小时制时  
%M：分钟

In [1]:
#返回当前CPU时间，经常用来计时
time.clock()

NameError: name 'time' is not defined

## 回调函数

回调函数是一个作为另一个函数参数的函数，等主函数执行完毕，再执行作为参数的回调函数

In [1]:
#回调函数的例子
def funcA(callback):
    callback()
    print("执行主函数")
def funcB():
    print("执行回调函数")

In [2]:
funcA(funcB)

执行回调函数
执行主函数
