## Use threading for IO-bound processing

In [5]:
import time
import random
import requests
import threading

In [6]:
urls = []
url = "https://www.finsmes.com/older-posts/page/{page_number}"
for page_number in range(5):
    page_url = url.format(page_number=page_number)
    urls.append(page_url)


def scrape_page(page_url):
    for _ in range(random.randint(1, 10000000)):
        pass
    print(f"Calling for url: {page_url}")
    response = requests.get(page_url)
    return response


In [7]:
# synchronous version
t1 = time.perf_counter()

for url in urls:
    response = scrape_page(url)

t2 = time.perf_counter()
print(f"Running time: {(t2-t1):3.2f} s.")

Calling for url: https://www.finsmes.com/older-posts/page/0
Calling for url: https://www.finsmes.com/older-posts/page/1
Calling for url: https://www.finsmes.com/older-posts/page/2
Calling for url: https://www.finsmes.com/older-posts/page/3
Calling for url: https://www.finsmes.com/older-posts/page/4
Running time: 4.33 s.


In [8]:
# threading version
t1 = time.perf_counter()

threads = []
for url in urls:
    thread = threading.Thread(target=scrape_page, args=(url, ))
    threads.append(thread)
    thread.start()
    
for thread in threads:
    thread.join()

t2 = time.perf_counter()
print(f"Running time: {(t2-t1):3.2f} s.")


Calling for url: https://www.finsmes.com/older-posts/page/0
Calling for url: https://www.finsmes.com/older-posts/page/1
Calling for url: https://www.finsmes.com/older-posts/page/3
Calling for url: https://www.finsmes.com/older-posts/page/2
Calling for url: https://www.finsmes.com/older-posts/page/4
Running time: 1.07 s.
