In [2]:
import os
import requests, time

In [15]:
def download_site(url, sess):
    with sess.get(url) as response:
        print("Read len(response.content): {} from {}".format(len(response.content), url))

In [22]:
def download_all_sites_ST(sites):
    with requests.Session() as sess:
        for url in sites:
            download_site(url, sess)

In [23]:
if __name__ == '__main__':
    sites = ["https://www.jython.org", "http://olympus.realpython.org/dice",]
    
    start_time = time.time()
    download_all_sites_ST(sites)
    duration = time.time()-start_time
    print("Download sites len:{} in duration: {} seconds".format(len(sites), duration))

Read len(response.content): 10286 from https://www.jython.org
Read len(response.content): 277 from http://olympus.realpython.org/dice
Download sites len:2 in duration: 0.586657047272 seconds


In [13]:
#threading version
import concurrent.futures
import requests
import threading
import time
import Queue
import urllib2

In [2]:
thread_local = threading.local

In [3]:
def get_session():
    if not hasattr(thread_local, "session"):
        thread_local.session = requests.Session()
    return thread_local.session

In [4]:
def download_site(url):
    sess = get_session()
    with sess.get(url) as response:
        print("Read len res.content: {} from {}".format(len(response.content), url))

In [5]:
def download_all_sites_MT(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        executor.map(download_site, sites)

In [6]:
if __name__ == '__main__':
    sites = ["https://www.jython.org", "http://olympus.realpython.org/dice",]
    
    start_time = time.time()
    download_all_sites_MT(sites)
    duration = time.time()-start_time
    print("Download sites len:{} in duration: {} seconds".format(len(sites), duration))

Download sites len:2 in duration: 0.00809788703918 seconds


In [4]:
class consumer(threading.Thread):
    def __init__(self, queue):
        threading.Thread.__init__(self)
        self._queue = queue
        
    def run(self):
        while True:
            msg = self._queue.get()
            if isinstance(msg, str) and msg == 'quit':
                break
            print("i am a thread, and i received %s!!", msg)
            
        print("bye bye!")

In [11]:
def producer():
    queue = Queue.Queue()
    
    worker = consumer(queue)
    
    print("before start")
    worker.start()
    print("after start")
    
    start_time = time.time()
    
    while time.time() - start_time < 5:
        queue.put("something at %s"% time.time())
        time.sleep(1)
        
    queue.put('quit')
    worker.join()

In [12]:
if __name__ == '__main__':
    producer()

before start
after start
('i am a thread, and i received %s!!', 'something at 1576059347.67')
('i am a thread, and i received %s!!', 'something at 1576059348.67')
('i am a thread, and i received %s!!', 'something at 1576059349.67')
('i am a thread, and i received %s!!', 'something at 1576059350.67')
('i am a thread, and i received %s!!', 'something at 1576059351.67')
bye bye!


In [20]:
class urlConsumer(threading.Thread):
    def __init__(self, queue):
        threading.Thread.__init__(self)
        self._queue = queue
        
    def run(self):
        while True:
            content = self._queue.get()
            if isinstance(content, str) and content == 'quit':
                break
            response = urllib2.urlopen(content)
            print(response)
            
        print('bye bye')

In [17]:
def producer():
    urls = ['http://www.python.org', 'http://www.yahoo.com', 'http://www.scala.org', 'http://www.google.com']
    queue = Queue.Queue()
    worker_threads = build_worker_pool(queue, 4)
    start_time = time.time()
    
    for url in urls:
        queue.put(url)
        
    for worker in worker_threads:
        queue.put('quit')
        
    for worker in worker_threads:
        worker.join()
        
    print("Done! time taker: {}".format(time.time()-start_time))

In [18]:
def build_worker_pool(queue, size):
    workers = []
    for _ in range(size):
        worker = urlConsumer(queue)
        worker.start()
        workers.append(worker)
    return workers

In [21]:
if __name__ == '__main__':
    producer()

<addinfourl at 140504890647256 whose fp = <socket._fileobject object at 0x7fc9df83c4d0>>
bye bye
<addinfourl at 140504890648120 whose fp = <socket._fileobject object at 0x7fc9df83c150>>
bye bye
<addinfourl at 140504890648192 whose fp = <socket._fileobject object at 0x7fc9df80d6d0>>
bye bye
<addinfourl at 140504890649272 whose fp = <socket._fileobject object at 0x7fc9df83c4d0>>
bye bye
Done! time taker: 0.49712896347
