## GIL (Global Interpreter Lock)
*파이썬의 아킬레스건*

- CPython이 단일 바이트코드를 수행할 때
- Thread가 interrupt하지 못하도록 전역 인터프리터 잠금을 수행합니다.
- Thread가 여러개 돌고 있는 상황에서 인터프리터가 올바르게 동작하도록

<span style='color:blue'>**그 결과.**</span>
- <u>Multithreading 상황에서도 GIL은 한 번에 한 스레드에서만 동작하게 합니다.</u>
- Multithreading의 본래의 장점이 잘 발휘되지 않습니다.
- Multiprocess는 물론 영향을 받지 않습니다. 다만 *multithread보다는 다루기 어렵습니다.*

In [1]:
def factorize(number):
    return [i for i in range(1, number+1) if number % i == 0]

import time

numbers = [2139079, 1214759, 1516637, 1852285] * 4 # 값을 바꿔서 heavy하게 돌려보세요.
start = time.time()
for number in numbers:
    factorize(number)
end = time.time()
print('Took %.3f seconds' % (end-start))

Took 1.828 seconds


In [2]:
from concurrent.futures import ThreadPoolExecutor

pool = ThreadPoolExecutor(max_workers=4)
start = time.time()
results = list(pool.map(factorize, numbers))
end = time.time()

print('Took %.3f seconds' % (end-start))

Took 2.217 seconds


In [3]:
from concurrent.futures import ProcessPoolExecutor

pool = ProcessPoolExecutor(max_workers=4)
start = time.time()
results = list(pool.map(factorize, numbers))
end = time.time()

print('Took %.3f seconds' % (end-start))

Took 0.576 seconds


## 그렇다면 만약 IO를 다룰때는 어떨까요?

In [4]:
import select
import time
def slow_systemcall():
    select.select([], [], [], 0.1)

start = time.time()
for _ in range(5):
    slow_systemcall()
end = time.time()

print('Took %.3f seconds' % (end-start))

Took 0.507 seconds


In [5]:
from concurrent.futures import ThreadPoolExecutor

start = time.time()
with ThreadPoolExecutor(max_workers=4) as e:
    for _ in range(5):
        e.submit(slow_systemcall)
end = time.time()

print('Took %.3f seconds' % (end-start))

Took 0.209 seconds


In [6]:
from concurrent.futures import ProcessPoolExecutor

start = time.time()
with ProcessPoolExecutor(max_workers=4) as e:
    for _ in range(5):
        e.submit(slow_systemcall)
end = time.time()

print('Took %.3f seconds' % (end-start))

Took 0.223 seconds


IO에 의한 대기는 GIL이 영향을 받지 않으므로, IO부하가 큰 작업은 Thread로도 충분합니다. 상황에 맞게 Concurrency 방법을 선택하세요. 시스템 프로그래머가 아니면 thread를 heavy하게 쓸 일은 거의 없고, 위에 소개된 concurrent.futures는 쉽고 강력한 cuncurrency를 구현할 수 있습니다.

## EXERCISE

In [12]:
from IPython.display import display, Image
import time, os.path, logging
from urllib.request import urlopen
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import ThreadPoolExecutor

POP20_CC = (
    ' CN IN US ID BR PK NG BD RU JP KR '
    'MX PH VN ET EG DE IR TR CD FR'
).split()
BASE_URL = 'http://flupy.org/data/flags'
DEST_DIR = 'downloads/'

def get_flag(cc):
    url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc = cc.lower())
    # resp = requests.get(url)
    resp = urlopen(url)
    path = os.path.join(DEST_DIR, f'{cc.lower()}.gif')
    with open(path, 'wb') as fp:
        fp.write(resp.read())
    path = os.path.join(DEST_DIR, f'{cc.lower()}.gif')
#     display(Image(url=path))  # thread-safe하지 않은듯.
    logging.critical(path)

def proc():
    t0 = time.time()
    ######################################################
    # 1. cc별로 get_flag를 수행해서 download받는 로직을 구현해주세요.
    # 2. threading으로 처리해보세요.
    # 3. processing으로 처리해보세요.
    map_ = map  # 8sec
    map_ = ProcessPoolExecutor(max_workers=4).map  # 2.40
#     map_ = ThreadPoolExecutor(max_workers=4).map  # 1.81

    list(map_(get_flag, POP20_CC))  # 2.36sec
    ######################################################
    elapsed = time.time() - t0
    print('elapsed in {:.2f}s'.format(elapsed))
    
os.makedirs(DEST_DIR, exist_ok=True)
proc()

CRITICAL:root:downloads/cn.gif
CRITICAL:root:downloads/id.gif
CRITICAL:root:downloads/us.gif
CRITICAL:root:downloads/br.gif
CRITICAL:root:downloads/in.gif
CRITICAL:root:downloads/pk.gif
CRITICAL:root:downloads/ng.gif
CRITICAL:root:downloads/bd.gif
CRITICAL:root:downloads/ru.gif
CRITICAL:root:downloads/jp.gif
CRITICAL:root:downloads/kr.gif
CRITICAL:root:downloads/mx.gif
CRITICAL:root:downloads/vn.gif
CRITICAL:root:downloads/ph.gif
CRITICAL:root:downloads/et.gif
CRITICAL:root:downloads/eg.gif
CRITICAL:root:downloads/de.gif
CRITICAL:root:downloads/tr.gif
CRITICAL:root:downloads/ir.gif
CRITICAL:root:downloads/fr.gif
CRITICAL:root:downloads/cd.gif


elapsed in 2.40s


In [1]:
def worker():
    import time
    import random
    wait_seconds = random.randrange(3)
    time.sleep(wait_seconds)
    #if random.randrange(10) > 6:
    #    raise Exception(f'{wait_seconds}')
    return wait_seconds


In [2]:
worker()

2

In [5]:
import threading
import time

#coordinator
c_event_start = threading.Event()

threads = []
workdone_events = []

def or_set(self):
    self._set()
    self.changed()

def or_clear(self):
    self._clear()
    self.changed()

def orify(e, changed_callback):
    e._set = e.set
    e._clear = e.clear
    e.changed = changed_callback
    e.set = lambda: or_set(e)
    e.clear = lambda: or_clear(e)

def AllEvents(*events):
    or_event = threading.Event()
    def changed():
        bools = [e.is_set() for e in events]
        if all(bools):
            or_event.set()
        else:
            or_event.clear()
    for e in events:
        orify(e, changed)
    changed()
    return or_event

def runner(fin_event, wait_event, value):
    tname = threading.current_thread().name

    wait_event.wait()
    k = worker()
    print('Thread work done(%ds): %s' % (k, tname))
    fin_event.set()

def coordinator(all_events, workdone_events, notify_event):
    # order
    notify_event.set()
    notify_event.clear()

    # waiting..
    all_events.wait()
    
    # job completing.. coordinating..
    print('coordinating...')
    time.sleep(0.3)
    print('coordinating... done..')
    # all_events.set()
    for e in workdone_events:
        e.clear()
    all_events.clear()


# events
for t in range(5):
    e = threading.Event()
    workdone_events.append(e)
all_e = AllEvents(*workdone_events)

# threads
threads = [
    threading.Thread(
        target=runner,
        args=(workdone_events[t], c_event_start, t)
    )
    for t in range(5)
]
cthread = threading.Thread(
    target=coordinator,
    args=(all_e, workdone_events, c_event_start,)
)

[
    thread.start()
    for thread in threads
]
cthread.start()

[
    thread.join()
    for thread in threads
]
cthread.join()
print('all done.')

Thread work done(0s): Thread-19Thread work done(0s): Thread-20

Thread work done(1s): Thread-17
Thread work done(2s): Thread-16Thread work done(2s): Thread-18

coordinating...
coordinating... done..
all done.
