## single threaded version

In [6]:
import requests
import time

def download_one(url):
    resp = requests.get(url)
    print('Read {} from {}'.format(len(resp.content), url))
    
def download_all(sites):
    for site in sites:
        download_one(site)

def main():
    sites = [
        'https://en.wikipedia.org/wiki/Portal:Arts',
        'https://en.wikipedia.org/wiki/Portal:History',
        'https://en.wikipedia.org/wiki/Portal:Society',
        'https://en.wikipedia.org/wiki/Portal:Biography',
        'https://en.wikipedia.org/wiki/Portal:Mathematics',
        'https://en.wikipedia.org/wiki/Portal:Technology',
        'https://en.wikipedia.org/wiki/Portal:Geography',
        'https://en.wikipedia.org/wiki/Portal:Science',
        'https://en.wikipedia.org/wiki/Computer_science',
        'https://en.wikipedia.org/wiki/Python_(programming_language)',
        'https://en.wikipedia.org/wiki/Java_(programming_language)',
        'https://en.wikipedia.org/wiki/PHP',
        'https://en.wikipedia.org/wiki/Node.js',
        'https://en.wikipedia.org/wiki/The_C_Programming_Language',
        'https://en.wikipedia.org/wiki/Go_(programming_language)'
    ]
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))
    
if __name__ == '__main__':
    main()


Read 131527 from https://en.wikipedia.org/wiki/Portal:Arts
Read 197643 from https://en.wikipedia.org/wiki/Portal:History
Read 225519 from https://en.wikipedia.org/wiki/Portal:Society
Read 318783 from https://en.wikipedia.org/wiki/Portal:Biography
Read 143755 from https://en.wikipedia.org/wiki/Portal:Mathematics
Read 152654 from https://en.wikipedia.org/wiki/Portal:Technology
Read 160574 from https://en.wikipedia.org/wiki/Portal:Geography
Read 98880 from https://en.wikipedia.org/wiki/Portal:Science
Read 331339 from https://en.wikipedia.org/wiki/Computer_science
Read 401800 from https://en.wikipedia.org/wiki/Python_(programming_language)
Read 322061 from https://en.wikipedia.org/wiki/Java_(programming_language)
Read 483369 from https://en.wikipedia.org/wiki/PHP
Read 179329 from https://en.wikipedia.org/wiki/Node.js
Read 56595 from https://en.wikipedia.org/wiki/The_C_Programming_Language
Read 328783 from https://en.wikipedia.org/wiki/Go_(programming_language)
Download 15 sites in 15.90567

## Multi-threaded version

In [13]:
import concurrent.futures
import requests
import threading
import time

def download_one(url):
    resp = requests.get(url)
    print('Read {} from {}'.format(len(resp.content), url))


def download_all(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        executor.map(download_one, sites)

def main():
    sites = [
        'https://en.wikipedia.org/wiki/Portal:Arts',
        'https://en.wikipedia.org/wiki/Portal:History',
        'https://en.wikipedia.org/wiki/Portal:Society',
        'https://en.wikipedia.org/wiki/Portal:Biography',
        'https://en.wikipedia.org/wiki/Portal:Mathematics',
        'https://en.wikipedia.org/wiki/Portal:Technology',
        'https://en.wikipedia.org/wiki/Portal:Geography',
        'https://en.wikipedia.org/wiki/Portal:Science',
        'https://en.wikipedia.org/wiki/Computer_science',
        'https://en.wikipedia.org/wiki/Python_(programming_language)',
        'https://en.wikipedia.org/wiki/Java_(programming_language)',
        'https://en.wikipedia.org/wiki/PHP',
        'https://en.wikipedia.org/wiki/Node.js',
        'https://en.wikipedia.org/wiki/The_C_Programming_Language',
        'https://en.wikipedia.org/wiki/Go_(programming_language)'
    ]
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    main()


Read 225519 from https://en.wikipedia.org/wiki/Portal:Society
Read 131527 from https://en.wikipedia.org/wiki/Portal:Arts
Read 197643 from https://en.wikipedia.org/wiki/Portal:History
Read 318783 from https://en.wikipedia.org/wiki/Portal:Biography
Read 98880 from https://en.wikipedia.org/wiki/Portal:Science
Read 152654 from https://en.wikipedia.org/wiki/Portal:Technology
Read 160574 from https://en.wikipedia.org/wiki/Portal:Geography
Read 143755 from https://en.wikipedia.org/wiki/Portal:Mathematics
Read 331339 from https://en.wikipedia.org/wiki/Computer_science
Read 56595 from https://en.wikipedia.org/wiki/The_C_Programming_Language
Read 401800 from https://en.wikipedia.org/wiki/Python_(programming_language)
Read 322061 from https://en.wikipedia.org/wiki/Java_(programming_language)
Read 179329 from https://en.wikipedia.org/wiki/Node.js
Read 483369 from https://en.wikipedia.org/wiki/PHP
Read 328783 from https://en.wikipedia.org/wiki/Go_(programming_language)
Download 15 sites in 3.882121

## Multi-process version

In [14]:
import concurrent.futures
import requests
import threading
import time

def download_one(url):
    resp = requests.get(url)
    print('Read {} from {}'.format(len(resp.content), url))


def download_all(sites):
    with concurrent.futures.ProcessPoolExecutor() as executor:
        executor.map(download_one, sites)

def main():
    sites = [
        'https://en.wikipedia.org/wiki/Portal:Arts',
        'https://en.wikipedia.org/wiki/Portal:History',
        'https://en.wikipedia.org/wiki/Portal:Society',
        'https://en.wikipedia.org/wiki/Portal:Biography',
        'https://en.wikipedia.org/wiki/Portal:Mathematics',
        'https://en.wikipedia.org/wiki/Portal:Technology',
        'https://en.wikipedia.org/wiki/Portal:Geography',
        'https://en.wikipedia.org/wiki/Portal:Science',
        'https://en.wikipedia.org/wiki/Computer_science',
        'https://en.wikipedia.org/wiki/Python_(programming_language)',
        'https://en.wikipedia.org/wiki/Java_(programming_language)',
        'https://en.wikipedia.org/wiki/PHP',
        'https://en.wikipedia.org/wiki/Node.js',
        'https://en.wikipedia.org/wiki/The_C_Programming_Language',
        'https://en.wikipedia.org/wiki/Go_(programming_language)'
    ]
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    main()


Download 15 sites in 0.13887379999999894 seconds


In [12]:
import concurrent.futures
import requests
import time

def download_one(url):
    resp = requests.get(url)
    print('Read {} from {}'.format(len(resp.content), url))

def download_all(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        to_do = []
        for site in sites:
            future = executor.submit(download_one, site)
            to_do.append(future)
            
        for future in concurrent.futures.as_completed(to_do):
            future.result()
def main():
    sites = [
        'https://en.wikipedia.org/wiki/Portal:Arts',
        'https://en.wikipedia.org/wiki/Portal:History',
        'https://en.wikipedia.org/wiki/Portal:Society',
        'https://en.wikipedia.org/wiki/Portal:Biography',
        'https://en.wikipedia.org/wiki/Portal:Mathematics',
        'https://en.wikipedia.org/wiki/Portal:Technology',
        'https://en.wikipedia.org/wiki/Portal:Geography',
        'https://en.wikipedia.org/wiki/Portal:Science',
        'https://en.wikipedia.org/wiki/Computer_science',
        'https://en.wikipedia.org/wiki/Python_(programming_language)',
        'https://en.wikipedia.org/wiki/Java_(programming_language)',
        'https://en.wikipedia.org/wiki/PHP',
        'https://en.wikipedia.org/wiki/Node.js',
        'https://en.wikipedia.org/wiki/The_C_Programming_Language',
        'https://en.wikipedia.org/wiki/Go_(programming_language)'
    ]
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))

if __name__ == '__main__':
    main()



Read 225519 from https://en.wikipedia.org/wiki/Portal:Society
Read 143755 from https://en.wikipedia.org/wiki/Portal:Mathematics
Read 131527 from https://en.wikipedia.org/wiki/Portal:Arts
Read 197643 from https://en.wikipedia.org/wiki/Portal:History
Read 318783 from https://en.wikipedia.org/wiki/Portal:Biography
Read 152654 from https://en.wikipedia.org/wiki/Portal:Technology
Read 160574 from https://en.wikipedia.org/wiki/Portal:Geography
Read 98880 from https://en.wikipedia.org/wiki/Portal:Science
Read 331339 from https://en.wikipedia.org/wiki/Computer_science
Read 401800 from https://en.wikipedia.org/wiki/Python_(programming_language)
Read 56595 from https://en.wikipedia.org/wiki/The_C_Programming_Language
Read 322061 from https://en.wikipedia.org/wiki/Java_(programming_language)
Read 483369 from https://en.wikipedia.org/wiki/PHP
Read 179329 from https://en.wikipedia.org/wiki/Node.js
Read 328783 from https://en.wikipedia.org/wiki/Go_(programming_language)
Download 15 sites in 3.331071