In [2]:
import time
import threading
import multiprocessing
import itertools
import os
import logging
import random
import string
import requests
from functools import partial
from multiprocessing import Queue
from multiprocessing.pool import ThreadPool

# Threads

In [3]:
class timer():
    def __init__(self, message):
        self.message = message

    def __enter__(self):
        self.start = time.time()
        return None

    def __exit__(self, type, value, traceback):
        elapsed_time = (time.time() - self.start)
        print(self.message.format(elapsed_time))

TIME_TO_SLEEP = 1
        
def long_running_task(n=TIME_TO_SLEEP):
    #print(threading.current_thread())
    time.sleep(n)

with timer('Elapsed: {}s'):
#     long_running_task()
    long_running_task()


Elapsed: 1.0010664463043213s


In [4]:
with timer('Elapsed: {}s'):
    t1 = threading.Thread(target=long_running_task, args=(TIME_TO_SLEEP/2,))
    t2 = threading.Thread(target=long_running_task, args=(TIME_TO_SLEEP/2,))
    t1.start()
    t2.start()
    print(threading.current_thread())
    t1.join() # be carefull
    t2.join() 
    
print('next step')
# work with date

<_MainThread(MainThread, started 140294979577664)>
Elapsed: 0.5049533843994141s
next step


In [5]:
def run_threads(func, data, workers):
    threads = [
        threading.Thread(target=func, args=(data / workers, ))
        for _ in range(workers)
    ]
    for t in threads:
        t.start()
    for t in threads:
        t.join()

In [6]:
workers = 10
DATA_SIZE = 1

with timer('Elapsed: {}s'):
    run_threads(long_running_task, DATA_SIZE, workers)

Elapsed: 0.125870943069458s


In [7]:
input_data = [DATA_SIZE / workers for _ in range(workers)]
print(input_data)

with timer('Elapsed: {}s'):
    with ThreadPool(workers) as pool:
        input_data = [DATA_SIZE / workers for _ in range(workers)]
        pool.map(long_running_task, input_data)
#         for chunk in input_data:
#             long_running_task(chunk)

[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
Elapsed: 0.1674344539642334s


# Real world task

In [8]:
def fetch_pic(num_pic):
# def fetch_pic(num_pic, path):
    url = 'https://picsum.photos/400/600'
    path = './pics'
    for _ in range(num_pic):
        random_name = ''.join(random.choices(string.ascii_letters + string.digits, k=5))
        response = requests.get(url)
        if response.status_code == 200:
            with open(f'{path}/{random_name}.jpg', 'wb') as f:
                f.write(response.content)
                print(f"Fetched pic [{os.getpid()}]: {f.name}")

In [9]:
with timer('Elapsed: {}s'):
    with ThreadPool(workers) as pool:
        input_data = [DATA_SIZE // workers for _ in range(workers)]
        # input_data = [(DATA_SIZE // workers, './pics') for _ in range(workers)]
        pool.map(fetch_pic, input_data)

Elapsed: 0.01610851287841797s


# IO vs CPU bound tasks

In [66]:
DATA_SIZE = 1_000_000
        
def countdown(n):
    # print(threading.current_thread())
    while n > 0:
        n -= 1

def fill_data(n, lst):
    # print(threading.current_thread())
    while n > 0:
        n -= 1
        lst.append(random.randint(1, 100))

        
lst = []
with timer('Elapsed: {}s'):
#     fill_data(DATA_SIZE, lst)
    fill_data(DATA_SIZE, lst)

Elapsed: 0.7117810249328613s


In [67]:
with timer('Elapsed: {}s'):
    t1 = threading.Thread(target=fill_data, args=(DATA_SIZE // 2, lst))
    t2 = threading.Thread(target=fill_data, args=(DATA_SIZE // 2, lst))
    t1.start()
    t2.start()
    t1.join()
    t2.join()

Elapsed: 1.1411914825439453s


In [70]:
workers = 16
with timer('Elapsed: {}s'):
    with ThreadPool(workers) as pool:
        input_data = [DATA_SIZE // workers for _ in range(workers)]
        pool.map(partial(fill_data, lst=lst), input_data)

Elapsed: 1.2244949340820312s


In [10]:
workers = 16
with timer('Elapsed: {}s'):
    with multiprocessing.Pool(workers) as pool:
        input_data = [DATA_SIZE // workers for _ in range(workers)]
        pool.map(partial(fill_data, lst=lst), input_data)

Elapsed: 0.07917976379394531s


NameError: name 'fill_data' is not defined