In [1]:
sites = [
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42416',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42417',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42418',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42419',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42420',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42421',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42422',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42423',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42424',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42425',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42426',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42427',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42428',
    'https://www.zgbk.com/ecph/words?SiteID=1&ID=635890&SubID=60819',
    'https://www.zgbk.com/ecph/words?SiteID=1&ID=236286&SubID=61175',
    'https://www.zgbk.com/ecph/words?SiteID=1&ID=38664&SubID=43458',
]

In [18]:
# 异步并发版本
import requests
import asyncio
import aiohttp
import time

import nest_asyncio
nest_asyncio.apply()

async def download_page(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as resp:
            print('Read {} from {}'.format(len(await resp.text()), url))

async def download_all(sites):
    tasks = [asyncio.create_task(download_page(url)) for url in sites]
    await asyncio.gather(*tasks)

async def main():
    start_time = time.perf_counter()
    # await download_all(sites)
    asyncio.run(download_all(sites))
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    asyncio.run(main())

Read 60167 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42417
Read 60167 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42421
Read 60167 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42426
Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42424
Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42422
Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42418
Read 60167 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42419
Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42420
Read 60167 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42427
Read 60167 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42428
Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42425
Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42423
Read 58859 from https://www.zgbk.com/ecph/words?SiteID=1&ID=38664&SubID=43458
Read 60094 from https://www.zgbk.com/ecph/words?SiteID=1&ID=635890&SubID=60819
Read 60166 from https://www

In [None]:
if io_bound:
    if io_slow:
        print('use asyncio')
    else:
        print('use multithread')
else:
    print('use multithread')

In [19]:
import time

def cpu_bound(num):
    print(sum([i * i for i in range(0, num)]))

def calc_sums(nums):
    for num in nums:
        cpu_bound(num)

def main():
    nums = [10000000 + i for i in range(20)]
    start_time = time.perf_counter()
    calc_sums(nums)
    end_time = time.perf_counter()
    print('Calculate takes {} senconds'.format(end_time - start_time))

if __name__ == '__main__':
    main()

333333283333335000000
333333383333335000000
333333483333355000001
333333583333395000005
333333683333455000014
333333783333535000030
333333883333635000055
333333983333755000091
333334083333895000140
333334183334055000204
333334283334235000285
333334383334435000385
333334483334655000506
333334583334895000650
333334683335155000819
333334783335435001015
333334883335735001240
333334983336055001496
333335083336395001785
333335183336755002109
Calculate takes 19.437007000669837 senconds


In [None]:
# JUPYTER 环境下执行多进程存在问题
import time
import concurrent.futures
import sys

def cpu_bound(num):
    return sum(i * i for i in range(num))

def calc_sums(nums):
    with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
        futures = [executor.submit(cpu_bound, num)  for num in nums]
        for future in concurrent.futures.as_completed(futures):
            try:
                print(future.result())
            except:
                print('subprocess error')
                sys.exit(1)

def main():
    nums = [10000000 + i for i in range(20)]
    start_time = time.perf_counter()
    calc_sums(nums)
    end_time = time.perf_counter()
    print('Calculate takes {} senconds'.format(end_time - start_time))

if __name__ == '__main__':
    main()

subprocess error


SystemExit: 1

In [18]:
%%writefile cpu_bound.py
def cpu_bound(num):
    return sum(i * i for i in range(num))

Writing cpu_bound.py


In [19]:
import time
import concurrent.futures
import sys

# 导入外部模块内的函数作为进程函数
from cpu_bound import cpu_bound

def calc_sums(nums):
    with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
        futures = [executor.submit(cpu_bound, num)  for num in nums]
        for future in concurrent.futures.as_completed(futures):
            try:
                print(future.result())
            except:
                print('subprocess error')
                sys.exit(1)

def main():
    nums = [10000000 + i for i in range(20)]
    start_time = time.perf_counter()
    calc_sums(nums)
    end_time = time.perf_counter()
    print('Calculate takes {} senconds'.format(end_time - start_time))

if __name__ == '__main__':
    main()

333333283333335000000
333333383333335000000
333333483333355000001
333333583333395000005
333333683333455000014
333333783333535000030
333333883333635000055
333333983333755000091
333334083333895000140
333334183334055000204
333334283334235000285
333334383334435000385
333334483334655000506
333334583334895000650
333334683335155000819
333334783335435001015
333334883335735001240
333334983336055001496
333335083336395001785
333335183336755002109
Calculate takes 7.1526327999308705 senconds


In [20]:
# 清除中间文件
!rm cpu_bound.py