In [65]:
from aiolimiter import AsyncLimiter
import asyncio
import httpx
from typing import TypedDict, Callable, Tuple
import pandas as pd
import plotly.express as px
from datetime import datetime
from asyncio import Semaphore


# Why use callables in Functional Programming?

The more I learn about Functional Programming (**FP**) the more I like it. I find a more FP style much easier to debug and undertand. This post discusses something I first learned about in [Grokking Simplicity](https://learning.oreilly.com/library/view/grokking-simplicity/9781617296208/): passing callables to a function, rather than calling the sub-function directly. I didn't understand why passing the callable is a much better approach, until I came across it myself.

TLDR: it's more modular, and easier to test.

To better explain, in my case I have a function to make a `GET` request and return the response and another function for error handling. Initially, here are those two functions.

In [66]:
async def get_url(
    url: str, client: httpx.AsyncClient, limiter: asyncio.Semaphore = None
) -> httpx.Response:
    """Send and async request to the given URL using the client

    Allows for the optimal use of a Semaphore to restrict simultaneous Async calls.

    Parameters
    ---------
    url : str
        the url to get
    client : httpx.AsyncClient
        the httpx client to use for the call
    limiter : asyncio.Semaphore
        the Semaphore instance to use. Optional

    Returns
    -------
    httpx.Response
        the reponse object from the `url`
    """
    assert isinstance(client, httpx.AsyncClient)
    if not limiter:
        response = await client.get(url)
    else:
        async with limiter:
            response = await client.get(url)
    response.raise_for_status()
    return response


class GetterArgs(TypedDict):
    url: str
    client: httpx.AsyncClient
    limiter: asyncio.Semaphore | None


async def get_response(
    getter_args: GetterArgs, retries: int = 10, current_try: int = 0
) -> httpx.Response:
    """Handles errors that are returned from `get_url`

    Parameters
    ----------
    getter : Callable
        the function to use to get the data
    getter_args : GetterArgs
        the arguments to be passed to `get_url`
    retries : int
        the number of times to retry a call, for calls that should be retried

    Returns
    -------
    httpx.Response
        the response from the url
    """
    try:
        response: httpx.Response = await get_url(**getter_args)
    except httpx.HTTPStatusError as e:
        code = e.response.status_code
        if code == 429 or (code >= 100 and code < 200):
            if current_try >= retries or retries <= 0:
                print("Max retries exceeded for 429 error")
                raise e
            current_try += 1
            response = await get_response(
                getter_args=getter_args, retries=retries, current_try=current_try
            )
        else:
            raise e
        # TODO: account for other error codes; e.g. redirections
    return response


You can see that I've defined `get_url` and `get_reponse`. I call `get_url` from `get_response` and this approach works just fine.

In [67]:
async with httpx.AsyncClient() as client:
    getter_args = GetterArgs(
        url='http://httpbin.org/get',
        client=client
    )
    response = await get_response(getter_args=getter_args)
assert response.status_code == 200
print('Success')

Success


Exactly as intended. The tricky part is testing `get_response`. Among other things, I wanted to test whether a 429 error would cause `get_response` to retry the correct number of times. As it stands, I could mock a new function to replace `get_url`, which would likely achieve my goal. But it would be much cleaner to abstract the call to `get_url` by passing the function to `get_response` as an argument.

In [68]:
async def get_response(
    getter: Callable,
    getter_args: GetterArgs,
    retries: int = 10,
    current_try: int = 0
) -> httpx.Response:
    '''Handles errors that are returned from `get_url`
    
    Parameters
    ----------
    getter : Callable
        the function to use to get the data
    getter_args : GetterArgs
        the arguments to be passed to `get_url`
    retries : int
        the number of times to retry a call, for calls that should be retried

    Returns
    -------
    httpx.Response
        the response from the url
    '''
    try:
        response: httpx.Response = await getter(**getter_args)
    except httpx.HTTPStatusError as e:
        code = e.response.status_code
        if code == 429 or (code >= 100 and code < 200):
            if current_try >= retries or retries <= 0:
                print("Max retries exceeded for 429 error")
                raise e
            current_try += 1
            response = await get_response(getter=getter, getter_args=getter_args, retries=retries, current_try=current_try)
        else:
            raise e
        # TODO: account for other error codes; e.g. redirections
    return response

This code follows FP principles much better because it is composable and is independent from the getter. In addition to being easier to test, `get_response` won't be affected by changes to the getter function name or arguments. The only requirement is that the getter implements the correct interface.

In [69]:
# Define a function to replace `get_url`
count_tries = 0
retries = 10


async def mock_getter():
    """Mocked getter function to test `get_response

    This getter uses the global count_tries variable to count the total number of times that it is called

    Returns
    -------
    httpx.AsyncClient
        the httpx response
    """    
    global count_tries
    count_tries += 1
    async with httpx.AsyncClient() as client:
        resp = await client.get("http://httpbin.org/status/429")
    resp.raise_for_status()
    return resp


try:
    await get_response(getter=mock_getter, getter_args={}, retries=retries)
except httpx.HTTPStatusError as e:
    assert e.response.status_code == 429
    assert (
        count_tries - 1 == retries
    ), f"There should have been {retries} retries, but there were {count_tries}"
    print("Success!")


Max retries exceeded for 429 error
Success!


## Is it really more simple?

The mocked getter counts the number of times that it is called, which confirms that when `get_response` encounters a 429 error, it retries the call the correst number of times. But this is still not very different from if I had called `get_url` directly in `get_response`; in Pytest I could have mocked `get_url` and replaced it with `mock_getter`. This version is a little bit simpler, but is it worth this small improvement in testing to change the patterns you use to write code? There is a lot of cognitive load involved in adopting new patterns.

The other benefits come from this pattern's composability. Let's say I encounter an api that has the following rate limits:

* 3 calls per second
* 100 calls per minute

In this case, the Semaphore being used in `get_url` won't work because it merely limits the number of simultaneous calls that can be made--it does not limit the number of calls that can be made within a certain time period. To maximize the number of calls that I can make to this new API, without going over the limits, I could use a package the package [aiolimiter]().

However, I can't just pass in a different `limiter` to `get_url` because there are actually two rate limits to deal with. Instead I need to rewrite `get_url` to allow for multiple limiters.

In [70]:
async def get_url_new_context_manager(
    url: str, 
    client: httpx.AsyncClient, 
    limiter1: asyncio.Semaphore | AsyncLimiter,
    limiter2: asyncio.Semaphore | AsyncLimiter
) -> httpx.Response:
    """Send and async request to the given URL using the client

    Allows for the optimal use of one or more limiters to restrict the number of Async calls that are made simultaneously, 
    or within a certain period. Python 3.10 allows context managers to be bundled in a

    Parameters
    ----------
    url : str
        _description_
    client : httpx.AsyncClient
        _description_
    limiter : Tuple[asyncio.Semaphore  |  AsyncLimiter] | asyncio.Semaphore | AsyncLimiter, optional
        _description_, by default None

    Returns
    -------
    httpx.Response
        _description_
    """    
    assert isinstance(client, httpx.AsyncClient)
    async with limiter1:
        async with limiter2:
            sent = datetime.now()
            response = await client.get(url)
    response.raise_for_status()
    return sent

In [71]:
# Create a tuple to hold multiple rate limiters
# I'm setting the rate limits at numbers that make them easier to test
limiter2 = Semaphore(2)
limiter1 = AsyncLimiter(max_rate=10,time_period=1)

async with httpx.AsyncClient() as client:
    # resp = await get_url_new_context_manager('http://httpbin.org/get', client=client,limiter=limiters)
    tasks = [
        asyncio.create_task(get_url_new_context_manager('http://httpbin.org/get', client=client,limiter1=limiter1, limiter2=limiter2))
        for _ in range(40)
        ]
    resp = await asyncio.gather(*tasks)


In [72]:
df = pd.DataFrame(pd.to_datetime([r for r in resp]), columns=['call_time']).reset_index().rename({'index':'call_num'}, axis=1)
df['elapsed'] = df.call_time - df.call_time[0]

Here you can see the `AsyncLimiter` working. The first 10 calls happen nearly simultaneously, then the leaky bucket algorithm kicks in to limit capacity

In [73]:
px.scatter(df, x='call_num', y='elapsed')

If you zoom in on the first ten calls, you can see that they are not actually simultaneous. Rather, they are grouped in twos, showing that the `Semaphore` was only allowing two calls to be made simultaneously.

In [74]:
px.scatter(df[:10], x='call_num', y='elapsed')