In [25]:
sites = [
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42416',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42417',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42418',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42419',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42420',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42421',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42422',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42423',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42424',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42425',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42426',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42427',
    'https://www.zgbk.com/ecph/subject?SiteID=1&ID=42428',
    'https://www.zgbk.com/ecph/words?SiteID=1&ID=635890&SubID=60819',
    'https://www.zgbk.com/ecph/words?SiteID=1&ID=236286&SubID=61175',
    'https://www.zgbk.com/ecph/words?SiteID=1&ID=38664&SubID=43458',
]

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
    "Accept-Language": "zh-CN,zh;q=0.9",
}

In [34]:
# 单线程同步版本
import requests
import time

def download_page(url):
    resp = requests.get(url)
    print('Read {} from {}'.format(len(resp.content), url))

def download_all(sites):
    for url in sites:
        download_page(url)

def main():
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    main()

Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42416
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42417
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42418
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42419
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42420
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42421
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42422
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42423
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42424
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42425
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42426
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42427
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42428
Read 61636 from https://www.zgbk.com/ecph/words?SiteID=1&ID=635890&SubID=60819
Read 61994 from https://www.zgbk.com/

In [37]:
# 异步并发版本
import requests
import asyncio
import aiohttp
import time

import nest_asyncio
nest_asyncio.apply()

async def download_page(url):
    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
        async with session.get(url) as resp:
            content = await resp.text()
            print('Read {} from {}'.format(len(content), url))

async def download_all(sites):
    tasks = [asyncio.create_task(download_page(url)) for url in sites]
    await asyncio.gather(*tasks)

async def main():
    start_time = time.perf_counter()
    await download_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    asyncio.run(main())

Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42416
Read 60167 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42417
Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42425
Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42423
Read 60167 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42419
Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42422
Read 60167 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42421
Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42420
Read 60166 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42424
Read 60449 from https://www.zgbk.com/ecph/words?SiteID=1&ID=236286&SubID=61175
Read 60167 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42428
Read 58859 from https://www.zgbk.com/ecph/words?SiteID=1&ID=38664&SubID=43458
Read 60167 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42426
Read 60094 from https://www.zgbk.com/ecph/words?SiteID=1&ID=635890&SubID=60819
Read 60167 from 

In [38]:
# 多线程并行版本
import concurrent.futures
import requests
import time

def download_one(url):
    resp = requests.get(url)
    print('Read {} from {}'.format(len(resp.content), url))

def download_all(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        executor.map(download_one, sites)

def main():
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    main()

Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42417
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42420
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42419
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42418
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42416
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42423
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42424
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42422
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42421
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42425
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42426
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42428
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42427
Read 61636 from https://www.zgbk.com/ecph/words?SiteID=1&ID=635890&SubID=60819
Read 61994 from https://www.zgbk.com/

In [42]:
# 多线程并行版本
import concurrent.futures
import requests
import time

def download_one(url):
    resp = requests.get(url)
    print('Read {} from {}'.format(len(resp.content), url))

def download_all(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        to_do = []
        for site in sites:
            future = executor.submit(download_one, site)
            to_do.append(future)
        
        for future in concurrent.futures.as_completed(to_do):
            future.result()

def main():
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    main()

Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42416
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42420
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42418
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42419
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42417
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42421
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42423
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42424
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42422
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42425
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42426
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42427
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42428
Read 61636 from https://www.zgbk.com/ecph/words?SiteID=1&ID=635890&SubID=60819
Read 61994 from https://www.zgbk.com/

In [44]:
# 多线程并行版本(简化)
import concurrent.futures
import requests
import time

def download_one(url):
    resp = requests.get(url)
    print('Read {} from {}'.format(len(resp.content), url))

def download_all(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        futures = [executor.submit(download_one, site) for site in sites]
        for future in concurrent.futures.as_completed(futures):
            pass

def main():
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    main()

Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42419
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42420
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42418
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42416
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42422
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42421
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42417
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42424
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42425
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42427
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42428
Read 60877 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42426
Read 61636 from https://www.zgbk.com/ecph/words?SiteID=1&ID=635890&SubID=60819
Read 60874 from https://www.zgbk.com/ecph/subject?SiteID=1&ID=42423
Read 60275 from https://www.zgbk.com/