# try `requests-futures` for async requests

The code is [here](https://github.com/ross/requests-futures).

In [1]:
! pip install requests-futures

[33mYou are using pip version 9.0.1, however version 10.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


## benchmarks

Good news:
- with 1 worker, 10 requests takes ~10X faster than 1 request
- with 5 workers, 10 requests takes ~2X faster than 1 request
- for 1 request, 1 worker is about as fast as 5 workers

http://httpbin.org/get is used in the `requests-futures` examples.

In [2]:
from concurrent.futures import ThreadPoolExecutor
from requests_futures.sessions import FuturesSession


def get_httpbin_contents(session, n_requests):
    # gather futures
    futures = []
    for i in range(n_requests):
        url = 'http://httpbin.org/get?foo={}'.format(i)
        futures.append(session.get(url))
    # wait for responses, get the contents
    contents = []
    for f in futures:
        response = f.result()
        if response.status_code == 200:
            contents.append(response.content)
        else:
            raise Exception(response)
    return contents


session_1 = FuturesSession(max_workers=1)
session_5 = FuturesSession(max_workers=5)

In [3]:
contents = get_httpbin_contents(session_1, 1)
print(eval(contents[0]))

{'args': {'foo': '0'}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'close', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.18.4'}, 'origin': '12.31.132.194', 'url': 'http://httpbin.org/get?foo=0'}


In [4]:
%%timeit
# 1 worker, 1 request
get_httpbin_contents(session_1, 1)

109 ms ± 7.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [5]:
%%timeit
# 1 worker, 10 requests
get_httpbin_contents(session_1, 10)

1.05 s ± 160 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
%%timeit
# 5 workers, 1 request
get_httpbin_contents(session_5, 1)

98.7 ms ± 6.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [7]:
%%timeit
# 5 workers, 10 requests
get_httpbin_contents(session_5, 10)

231 ms ± 29.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## more sophisticated requests

good news:
- `requests-futures` can do anything `requests` can do

bad news:
- `requests-futures` is poorly documented

In [8]:
help(session_1.get)
type(session_1.get('http://httpbin.org/get'))

Help on method get in module requests.sessions:

get(url, **kwargs) method of requests_futures.sessions.FuturesSession instance
    Sends a GET request. Returns :class:`Response` object.
    
    :param url: URL for the new :class:`Request` object.
    :param \*\*kwargs: Optional arguments that ``request`` takes.
    :rtype: requests.Response



concurrent.futures._base.Future

In [9]:
import os
from dotenv import load_dotenv

exploratory_directory = os.getcwd()
notebooks_directory = os.path.abspath(os.path.join(exploratory_directory, os.pardir))
experiment_directory = os.path.abspath(os.path.join(notebooks_directory, os.pardir))

dotenv_path = os.path.join(experiment_directory, '.env')
load_dotenv(dotenv_path)

YELP_API_KEY = os.environ.get('YELP_API_KEY')
API_HOST = os.environ.get('API_HOST')
SEARCH_PATH = os.environ.get('SEARCH_PATH')

In [10]:
# import os
# import json
import requests

from urllib.error import HTTPError
from urllib.parse import quote
from urllib.parse import urlencode


lat, lng = 41.88, -87.63
radius = 100
categories = ['zoos', 'beaches', 'restaurants']

url_params = {
    'categories': '+'.join(categories),
    'latitude': lat,
    'longitude': lng,
    'radius': radius,
    'limit': 50
}

url = '{0}{1}'.format(API_HOST, quote(SEARCH_PATH.encode('utf8')))
headers = {
    'Authorization': 'Bearer %s' % YELP_API_KEY,
}

response = requests.request('GET', url, headers=headers, params=url_params)
rj_1 = response.json()
rj_1['businesses'][0]

{'id': '4wmRu3bpaSpW999SHGK34g',
 'alias': 'revival-food-hall-chicago',
 'name': 'Revival Food Hall',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/jTYJnk7AnRd-zNjoDUcrDQ/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/revival-food-hall-chicago?adjust_creative=y27hFOR4ZHFRcL4Kq4aq9w&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=y27hFOR4ZHFRcL4Kq4aq9w',
 'review_count': 300,
 'categories': [{'alias': 'food_court', 'title': 'Food Court'}],
 'rating': 4.0,
 'coordinates': {'latitude': 41.87979, 'longitude': -87.63051},
 'transactions': ['pickup', 'delivery'],
 'price': '$$',
 'location': {'address1': '125 S Clark St',
  'address2': '',
  'address3': None,
  'city': 'Chicago',
  'zip_code': '60603',
  'country': 'US',
  'state': 'IL',
  'display_address': ['125 S Clark St', 'Chicago, IL 60603']},
 'phone': '+17739999411',
 'display_phone': '(773) 999-9411',
 'distance': 37.54272999315398}

In [11]:
future = session_1.get(url, headers=headers, params=url_params)
response = future.result()
rj_2 = response.json()
rj_2['businesses'][0]

{'id': '4wmRu3bpaSpW999SHGK34g',
 'alias': 'revival-food-hall-chicago',
 'name': 'Revival Food Hall',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/jTYJnk7AnRd-zNjoDUcrDQ/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/revival-food-hall-chicago?adjust_creative=y27hFOR4ZHFRcL4Kq4aq9w&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=y27hFOR4ZHFRcL4Kq4aq9w',
 'review_count': 300,
 'categories': [{'alias': 'food_court', 'title': 'Food Court'}],
 'rating': 4.0,
 'coordinates': {'latitude': 41.87979, 'longitude': -87.63051},
 'transactions': ['delivery', 'pickup'],
 'price': '$$',
 'location': {'address1': '125 S Clark St',
  'address2': '',
  'address3': None,
  'city': 'Chicago',
  'zip_code': '60603',
  'country': 'US',
  'state': 'IL',
  'display_address': ['125 S Clark St', 'Chicago, IL 60603']},
 'phone': '+17739999411',
 'display_phone': '(773) 999-9411',
 'distance': 37.54272999315398}

In [12]:
def _submit_request(future_session, url, headers, parameters):
    categories = parameters['response']['categories']
    lat = parameters['response']['circle']['center']['lat']
    lng = parameters['response']['circle']['center']['lng']
    radius = parameters['response']['circle']['radius']
    url_params = {
        'categories': '+'.join(categories),
        'latitude': lat,
        'longitude': lng,
        'radius': radius,
        'limit': 50 #TODO paginate
    }
    return future_session.get(url, headers=headers, params=url_params)


def make_async_requests(future_session, parameter_lists, API_HOST,
                        SEARCH_PATH, YELP_API_KEY):
    url = '{0}{1}'.format(API_HOST, quote(SEARCH_PATH.encode('utf8')))
    headers = {'Authorization': 'Bearer %s' % YELP_API_KEY}
    # submit async requests
    futures = []
    for parameters in parameter_lists:
        f = _submit_request(future_session, url, headers, parameters)
        futures.append(f)
    # wait for responses, get the contents and record errors
    contents = []
    errors_responses = []
    for f in futures:
        response = f.result()
        if response.status_code == 200:
            contents.append(response.json())
        else:
            errors_responses.append(response)
    return contents, errors_responses

In [13]:
circle = {'center': {'lat': lat, 'lng': lng}, 'radius': radius}
parameters = {'response': {'circle': circle, 'categories': categories}}
parameter_lists = [parameters]

contents, errors_responses = make_async_requests(
    session_1, parameter_lists, API_HOST, SEARCH_PATH, YELP_API_KEY
)

contents[0]['businesses']

[{'id': '4wmRu3bpaSpW999SHGK34g',
  'alias': 'revival-food-hall-chicago',
  'name': 'Revival Food Hall',
  'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/jTYJnk7AnRd-zNjoDUcrDQ/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/revival-food-hall-chicago?adjust_creative=y27hFOR4ZHFRcL4Kq4aq9w&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=y27hFOR4ZHFRcL4Kq4aq9w',
  'review_count': 300,
  'categories': [{'alias': 'food_court', 'title': 'Food Court'}],
  'rating': 4.0,
  'coordinates': {'latitude': 41.87979, 'longitude': -87.63051},
  'transactions': ['pickup', 'delivery'],
  'price': '$$',
  'location': {'address1': '125 S Clark St',
   'address2': '',
   'address3': None,
   'city': 'Chicago',
   'zip_code': '60603',
   'country': 'US',
   'state': 'IL',
   'display_address': ['125 S Clark St', 'Chicago, IL 60603']},
  'phone': '+17739999411',
  'display_phone': '(773) 999-9411',
  'distance': 37.54272999315398},
 {'id': 'IRa05qXL2uU_WotrRgWNi