In [1]:
# 单线程处理方式
import requests
import time

def download_one(url):
    resp = requests.get(url)
    print('Read {} from {}'.format(len(resp.content), url))

def download_all(sites):
    for site in sites:
        download_one(site)

def main():
    sites = [
        'https://en.wikipedia.org/wiki/Portal:Arts', 
        'https://en.wikipedia.org/wiki/Portal:History', 
        'https://en.wikipedia.org/wiki/Portal:Society', 
        'https://en.wikipedia.org/wiki/Portal:Biography', 
        'https://en.wikipedia.org/wiki/Portal:Mathematics', 
        'https://en.wikipedia.org/wiki/Portal:Technology', 
        'https://en.wikipedia.org/wiki/Portal:Geography', 
        'https://en.wikipedia.org/wiki/Portal:Science', 
        'https://en.wikipedia.org/wiki/Computer_science', 
        'https://en.wikipedia.org/wiki/Python_(programming_language)', 
        'https://en.wikipedia.org/wiki/Java_(programming_language)', 
        'https://en.wikipedia.org/wiki/PHP', 
        'https://en.wikipedia.org/wiki/Node.js', 
        'https://en.wikipedia.org/wiki/The_C_Programming_Language', 
        'https://en.wikipedia.org/wiki/Go_(programming_language)' 
    ]
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    main()

Read 197346 from https://en.wikipedia.org/wiki/Portal:Arts
Read 327710 from https://en.wikipedia.org/wiki/Portal:History
Read 276196 from https://en.wikipedia.org/wiki/Portal:Society
Read 376473 from https://en.wikipedia.org/wiki/Portal:Biography
Read 318387 from https://en.wikipedia.org/wiki/Portal:Mathematics
Read 274383 from https://en.wikipedia.org/wiki/Portal:Technology
Read 320376 from https://en.wikipedia.org/wiki/Portal:Geography
Read 243076 from https://en.wikipedia.org/wiki/Portal:Science
Read 368798 from https://en.wikipedia.org/wiki/Computer_science
Read 556520 from https://en.wikipedia.org/wiki/Python_(programming_language)
Read 341949 from https://en.wikipedia.org/wiki/Java_(programming_language)
Read 572031 from https://en.wikipedia.org/wiki/PHP
Read 197019 from https://en.wikipedia.org/wiki/Node.js
Read 66502 from https://en.wikipedia.org/wiki/The_C_Programming_Language
Read 338496 from https://en.wikipedia.org/wiki/Go_(programming_language)
Download 15 sites in 10.6099

In [2]:
# 多线程版本
import concurrent.futures
import requests
import threading
import time

def down_one(url):
    resp = requests.get(url)
    print('Read {} from {}'.format(len(resp.content), url))

def down_all(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers = 5) as executor:
        executor.map(down_one, sites)

def main():
    sites = [
        'https://en.wikipedia.org/wiki/Portal:Arts', 
        'https://en.wikipedia.org/wiki/Portal:History', 
        'https://en.wikipedia.org/wiki/Portal:Society', 
        'https://en.wikipedia.org/wiki/Portal:Biography', 
        'https://en.wikipedia.org/wiki/Portal:Mathematics', 
        'https://en.wikipedia.org/wiki/Portal:Technology', 
        'https://en.wikipedia.org/wiki/Portal:Geography', 
        'https://en.wikipedia.org/wiki/Portal:Science', 
        'https://en.wikipedia.org/wiki/Computer_science', 
        'https://en.wikipedia.org/wiki/Python_(programming_language)', 
        'https://en.wikipedia.org/wiki/Java_(programming_language)', 
        'https://en.wikipedia.org/wiki/PHP', 
        'https://en.wikipedia.org/wiki/Node.js', 
        'https://en.wikipedia.org/wiki/The_C_Programming_Language', 
        'https://en.wikipedia.org/wiki/Go_(programming_language)' 
    ]
    start_time = time.perf_counter()    
    down_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    main()


Read 276196 from https://en.wikipedia.org/wiki/Portal:Society
Read 197346 from https://en.wikipedia.org/wiki/Portal:Arts
Read 327710 from https://en.wikipedia.org/wiki/Portal:History
Read 376473 from https://en.wikipedia.org/wiki/Portal:Biography
Read 318387 from https://en.wikipedia.org/wiki/Portal:Mathematics
Read 274383 from https://en.wikipedia.org/wiki/Portal:Technology
Read 243076 from https://en.wikipedia.org/wiki/Portal:Science
Read 320376 from https://en.wikipedia.org/wiki/Portal:Geography
Read 368798 from https://en.wikipedia.org/wiki/Computer_science
Read 556520 from https://en.wikipedia.org/wiki/Python_(programming_language)
Read 341949 from https://en.wikipedia.org/wiki/Java_(programming_language)
Read 197019 from https://en.wikipedia.org/wiki/Node.js
Read 66502 from https://en.wikipedia.org/wiki/The_C_Programming_Language
Read 572031 from https://en.wikipedia.org/wiki/PHP
Read 338496 from https://en.wikipedia.org/wiki/Go_(programming_language)
Download 15 sites in 1.44814

In [7]:
import concurrent.futures
import requests
import time

def down_one(url):
    resp = requests.get(url)
    print('Read {} from {}'.format(len(resp.content), url))

def down_all(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers = 5) as executor:
        to_do = []
        for site in sites:
            future = executor.submit(download_one, site)
            to_do.append(future)
        for future in concurrent.futures.as_completed(to_do):
            future.result()

def main():
    sites = [
        'https://en.wikipedia.org/wiki/Portal:Arts', 
        'https://en.wikipedia.org/wiki/Portal:History', 
        'https://en.wikipedia.org/wiki/Portal:Society', 
        'https://en.wikipedia.org/wiki/Portal:Biography', 
        'https://en.wikipedia.org/wiki/Portal:Mathematics', 
        'https://en.wikipedia.org/wiki/Portal:Technology', 
        'https://en.wikipedia.org/wiki/Portal:Geography', 
        'https://en.wikipedia.org/wiki/Portal:Science', 
        'https://en.wikipedia.org/wiki/Computer_science', 
        'https://en.wikipedia.org/wiki/Python_(programming_language)', 
        'https://en.wikipedia.org/wiki/Java_(programming_language)', 
        'https://en.wikipedia.org/wiki/PHP', 
        'https://en.wikipedia.org/wiki/Node.js', 
        'https://en.wikipedia.org/wiki/The_C_Programming_Language', 
        'https://en.wikipedia.org/wiki/Go_(programming_language)' 
    ]
    start_time = time.perf_counter()
    down_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    main()



Read 276196 from https://en.wikipedia.org/wiki/Portal:Society
Read 197346 from https://en.wikipedia.org/wiki/Portal:Arts
Read 318387 from https://en.wikipedia.org/wiki/Portal:Mathematics
Read 327710 from https://en.wikipedia.org/wiki/Portal:History
Read 376473 from https://en.wikipedia.org/wiki/Portal:Biography
Read 274383 from https://en.wikipedia.org/wiki/Portal:Technology
Read 320376 from https://en.wikipedia.org/wiki/Portal:Geography
Read 243076 from https://en.wikipedia.org/wiki/Portal:Science
Read 368798 from https://en.wikipedia.org/wiki/Computer_science
Read 556520 from https://en.wikipedia.org/wiki/Python_(programming_language)
Read 66502 from https://en.wikipedia.org/wiki/The_C_Programming_Language
Read 197019 from https://en.wikipedia.org/wiki/Node.js
Read 341949 from https://en.wikipedia.org/wiki/Java_(programming_language)
Read 572031 from https://en.wikipedia.org/wiki/PHP
Read 338496 from https://en.wikipedia.org/wiki/Go_(programming_language)
Download 15 sites in 1.48943