In [5]:
"""
순차 내려받기 스크립트.
"""

import os, time, sys, requests

POP20_CC = ('CN IN US ID BP PK NG BD RU JP'
            'MX PH VN ET EG DE IR TR CD FR').split()

BASE_URL = 'http://flupy.org/data/flags'

DEST_DIR = 'downloads/'

def save_flag(img, filename) :
    path = os.path.join(DEST_DIR, filename)
    with open(path, 'wb') as fp : 
        fp.write(img)
        
def get_flag(cc) : 
    url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
    resp = requests.get(url)
    return resp.content

def show(text) :
    print(text, end=' ')
    sys.stdout.flush()
    
def download_many(cc_list) : 
    for cc in sorted(cc_list) : 
        image = get_flag(cc)
        show(cc)
        save_flag(image, cc.lower() + '.gif')
        

def main(download_many) : 
    t0 = time.time()
    count = download_many(POP20_CC)
    elapsed = time.time() - t0
    msg = '\n{} flags downloaded in {:.2f}s'
    print(msg.format(count, elapsed))
    
if __name__ == '__main__' : 
    main(download_many)

BD BP CD CN DE EG ET FR ID IN IR JPMX NG PH PK RU TR US VN 
None flags downloaded in 20.98s


In [10]:
from concurrent import futures

MAX_WORKERS = 20

def download_one(cc) : 
    image = get_flag(cc)
    show(cc)
    save_flag(image, cc.lower() + '.gif')
    return cc

def download_many(cc_list) : 
    # 변경 전 코드 
#     workers = min(MAX_WORKERS, len(cc_list))
#     with futures.ThreadPoolExecutor(workers) as executor : 
#         res = executor.map(download_one, sorted(cc_list)) # 여러 스레드를 활용하여 작업 처리 -> 성능 향상 
        
#     return len(list(res))

    # 변경 후 코드 
    cc_list = cc_list[:5]
    with futures.ThreadPoolExecutor(max_workers = 3) as executor : 
        to_do = []
        for cc in sorted(cc_list) : 
            future = executor.submit(download_one, cc)
            to_do.append(future)
            msg = 'Scheduled for {}: {}'
            print(msg.format(cc, future))
            
        
        results = []
        for future in futures.as_completed(to_do) : 
            res = future.result()
            msg = '{} result: {!r}'
            print(msg.format(future, res))
            results.append(res)

    return len(results)
    
if __name__ == '__main__' : 
    main(download_many)
    

Scheduled for BP: <Future at 0x7fab217b73a0 state=running>
Scheduled for CN: <Future at 0x7faac803cca0 state=running>
Scheduled for ID: <Future at 0x7faac803c6a0 state=running>
Scheduled for IN: <Future at 0x7faae0062400 state=pending>
Scheduled for US: <Future at 0x7faac803ce50 state=pending>
ID CN <Future at 0x7faac803c6a0 state=finished returned str> result: 'ID'
<Future at 0x7faac803cca0 state=finished returned str> result: 'CN'
BP <Future at 0x7fab217b73a0 state=finished returned str> result: 'BP'
US IN <Future at 0x7faac803ce50 state=finished returned str> result: 'US'
<Future at 0x7faae0062400 state=finished returned str> result: 'IN'

5 flags downloaded in 2.76s


In [11]:
"""
GIL(전역 인터프리터 락)
- CPython 인터프리터는 내부적으로 스레드 안전하지 않으므로, GIL을 가지고 있음 
- 한 번에 한 스레드만 파이썬 바이트코드를 실행하도록 제한함
- 파이썬 프로세스가 동시에 다중 CPU 코어를 사용할 수 없음 
"""

'\nGIL(전역 인터프리터 락)\n- CPython 인터프리터는 내부적으로 스레드 안전하지 않으므로, GIL을 가지고 있음 \n- 한 번에 한 스레드만 파이썬 바이트코드를 실행하도록 제한함\n- 파이썬 프로세스가 동시에 다중 CPU 코어를 사용할 수 없음 \n'

In [12]:
from concurrent import futures

MAX_WORKERS = 20

def download_one(cc) : 
    image = get_flag(cc)
    show(cc)
    save_flag(image, cc.lower() + '.gif')
    return cc

def download_many(cc_list) : 
    cc_list = cc_list[:5]
    with futures.ProcessPoolExecutor() as executor : 
        to_do = []
        for cc in sorted(cc_list) : 
            future = executor.submit(download_one, cc)
            to_do.append(future)
            msg = 'Scheduled for {}: {}'
            print(msg.format(cc, future))
            
        
        results = []
        for future in futures.as_completed(to_do) : 
            res = future.result()
            msg = '{} result: {!r}'
            print(msg.format(future, res))
            results.append(res)

    return len(results)
    
if __name__ == '__main__' : 
    main(download_many)

Scheduled for BP: <Future at 0x7faae006a4c0 state=running>
Scheduled for CN: <Future at 0x7fab217e4cd0 state=pending>
Scheduled for ID: <Future at 0x7faae0071730 state=pending>
Scheduled for IN: <Future at 0x7fab217e4ac0 state=pending>
Scheduled for US: <Future at 0x7fab217e4ee0 state=pending>


Process SpawnProcess-4:
Process SpawnProcess-1:
Process SpawnProcess-5:
Process SpawnProcess-2:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/qwefghnm1212/opt/anaconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/qwefghnm1212/opt/anaconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/qwefghnm1212/opt/anaconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/qwefghnm1212/opt/anaconda3/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/qwefghnm1212/opt/anaconda3/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/qwefghnm1212/opt/anaconda3/lib/python3.9/concurrent/futures/process.py", line 237, in _process_worker
   

BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.

In [13]:
from time import sleep, strftime
from concurrent import futures

def display(*args) : 
    print(strftime('[%H:%M:%S]'), end=' ')
    print(*args)
    
def loiter(n) : 
    msg = '{}loiter({}) : doing nothing for {}s...'
    display(msg.format('\t'*n, n, n))
    sleep(n)
    
    msg = '{}loiter({}) : done.'
    display(msg.format('\t'*n, n))
    return n * 10

def main() : 
    display('Script starting.')
    executor = futures.ThreadPoolExecutor(max_workers=3) # 쓰레드 3개 생성 
    results = executor.map(loiter, range(5))
    display('results : ', results)
    display('Waiting for individual results : ')
    for i, result in enumerate(results) : 
        display('result {} : {}'.format(i, result))
        
main()

[11:54:28] Script starting.
[11:54:28] loiter(0) : doing nothing for 0s...
[11:54:28] loiter(0) : done.
[11:54:28][11:54:28] 		loiter(2) : doing nothing for 2s...
 	loiter(1) : doing nothing for 1s...
[11:54:28][11:54:28] results :  <generator object Executor.map.<locals>.result_iterator at 0x7fab217b45f0>
[11:54:28] Waiting for individual results : 
[11:54:28] result 0 : 0
 			loiter(3) : doing nothing for 3s...
[11:54:30] 	loiter(1) : done.
[11:54:30] 				loiter(4) : doing nothing for 4s...
[11:54:30] result 1 : 10
[11:54:31] 		loiter(2) : done.
[11:54:31] result 2 : 20
[11:54:32] 			loiter(3) : done.
[11:54:32] result 3 : 30
[11:54:34] 				loiter(4) : done.
[11:54:34] result 4 : 40


In [14]:
def get_flag(base_url, cc) : 
    url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
    resp = requests.get(url)
    
    if resp.status_code != 200 : 
        resp.raise_for_status()
        
    return resp.content

def download_one(cc, base_url, verbose=False) : 
    try : 
        image = get_flag(base_url, cc)
    except requests.exceptions.HTTPError as exc : 
        res = exc.response
        
        if res.status_code == 404 :
            status = HTTPStatus.not_found
            msg = 'not found'
        else : 
            raise
    else : 
        save_flag(image, cc.lower() + '.gif')
        status = HTTPStatus.ok
        msg = 'OK'
        
    if verbose : 
        print(cc, msg)
        
    return Result(status, cc)

def download_many(cc_list, base_url, verbose, max_req) : 
    conuter = collections.Counter()
    cc_iter = sorted(cc_list)
    
    if not verbose : 
        cc_iter = tqdm.tqdm(cc_iter)
    
    for cc in cc_iter : 
        try : 
            res = download_one(cc, base_url, verbose)
        except requests.exceptions.HTTPError as exc : 
            error_msg = 'HTTP error {res.status_code} - {res.reason}'
            error_msg = error_msg.format(res=exc.response)
        except requests.exceptions.ConnectionError as exc : 
            error_msg = 'Connection error'
        else : 
            error_msg = ''
            status = res.status
            
        if error_msg : 
            status = HTTPStatus.error
            counter[status] += 1
            
            if verbose and error_msg : 
                print('*** Error for {} : {}'.format(cc, error_msg))
                
    return counter

    

In [17]:

#!/usr/bin/env python3

"""Download flags of countries (with error handling).

ThreadPool version

Sample run::

    $ python3 flags2_threadpool.py -s ERROR -e
    ERROR site: http://localhost:8003/flags
    Searching for 676 flags: from AA to ZZ
    30 concurrent connections will be used.
    --------------------
    150 flags downloaded.
    361 not found.
    165 errors.
    Elapsed time: 7.46s

"""

# tag::FLAGS2_THREADPOOL[]
from collections import Counter
from concurrent.futures import ThreadPoolExecutor, as_completed

import requests
import tqdm  # type: ignore


DEFAULT_CONCUR_REQ = 30  # <2>
MAX_CONCUR_REQ = 1000  # <3>


def download_many(cc_list: list[str],
                  base_url: str,
                  verbose: bool,
                  concur_req: int) -> Counter[DownloadStatus]:
    counter: Counter[DownloadStatus] = Counter()
    with ThreadPoolExecutor(max_workers=concur_req) as executor:  # <4>
        to_do_map = {}  # <5>
        for cc in sorted(cc_list):  # <6>
            future = executor.submit(download_one, cc,
                                     base_url, verbose)  # <7>
            to_do_map[future] = cc  # <8>
        done_iter = as_completed(to_do_map)  # <9>
        if not verbose:
            done_iter = tqdm.tqdm(done_iter, total=len(cc_list))  # <10>
        for future in done_iter:  # <11>
            try:
                status = future.result()  # <12>
            except httpx.HTTPStatusError as exc:  # <13>
                error_msg = 'HTTP error {resp.status_code} - {resp.reason_phrase}'
                error_msg = error_msg.format(resp=exc.response)
            except httpx.RequestError as exc:
                error_msg = f'{exc} {type(exc)}'.strip()
            except KeyboardInterrupt:
                break
            else:
                error_msg = ''

            if error_msg:
                status = DownloadStatus.ERROR
            counter[status] += 1
            if verbose and error_msg:
                cc = to_do_map[future]  # <14>
                print(f'{cc} error: {error_msg}')

    return counter


if __name__ == '__main__':
    main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
# end::FLAGS2_THREADPOOL[]


NameError: name 'DownloadStatus' is not defined