In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import evalugator
import evalugator.api
import evalugator.api.dispatcher
import evalugator.api.requests

# note: types from yaml files are represented in `evalugator.structs`, likely only YAML for non-python use cases

In [3]:
# list available providers
print('Available providers:')

# note: these are literal python modules
for provider_module in evalugator.api.dispatcher.PROVIDERS:
    print(f' - {provider_module.__name__}')

Available providers:
 - evalugator.api.providers.openai
 - evalugator.api.providers.anthropic
 - evalugator.api.providers.replicate
 - evalugator.api.providers.human
 - evalugator.api.providers.together_api


In [4]:
assert not evalugator.api.providers.openai.provides_model('foo')
assert evalugator.api.providers.openai.provides_model('gpt-4o')

Note: The way `evalugator.api.dispatcher` works is:

```python
from functools import partial

from .providers import openai, anthropic, replicate, human, together_api

PROVIDERS = [openai, anthropic, replicate, human, together_api]

# note: also checks that model ID only matches exactly one provider
def get_model_provider(model_id):
    providers = []
    for provider in PROVIDERS:
        if provider.provides_model(model_id):
            return provider

def get_request_processor(request, model_id):
    provider = get_model_provider(model_id)
    return partial(provider.execute, model_id)


def encode(model_id, *args, **kwargs):
    provider = get_model_provider(model_id)
    return provider.encode(model_id, *args, **kwargs)


def decode(model_id, *args, **kwargs):
    provider = get_model_provider(model_id)
    return provider.decode(model_id, *args, **kwargs)
```

In [5]:
# this means the implicit provider API is:

from typing import Protocol, ParamSpec

P = ParamSpec('P')

# note: this could be a module or class
class Provider(Protocol):
    def provides_model(self, model_id: str) -> bool:
        ...
    def execute(self, model_id: str, request: evalugator.api.requests.Request) -> evalugator.api.requests.Response:
        ...
    def encode(self, model_id: str, *args: P.args, **kwargs: P.kwargs) -> str:
        ...
    def decode(self, model_id: str, *args: P.args, **kwargs: P.kwargs) -> str:
        ...

This then allows the `API` to be used like this:

```python

class API:
    
    model_id: str
    executor: ThreadPoolExecutor

    # note: allows automatically saving results, essentially poor man's async, which makes since given not all providers might have async
    def execute(self, request: Request) -> Future[Response]:
        func = dispatcher.get_request_processor(request, self.model_id)
        future = self.executor.submit(func, request)
        future.add_done_callback(self._log_response)
        return future

    def encode(self, *args, **kwargs):
        return dispatcher.encode(self.model_id, *args, **kwargs)

    def decode(self, *args, **kwargs):
        return dispatcher.decode(self.model_id, *args, **kwargs)
```

additionally defines two convenience functions:

```python
    def get_text(self, ...) -> Future[GetTextResponse]:
        request = GetTextRequest(...)
        return self.execute(request)
    
    def get_probs(self, ...) -> Future[GetPropsResponse]:
        request = GetProbsRequest(...)
        return self.execute(request)
```

In [8]:
model_id = 'gpt-4o-mini'

# note: also allows `log_file_name`
api = evalugator.api.Api(model_id=model_id)

response_future = api.get_text(prompt='Hi!')

# actual waiting
response = response_future.result()

response

GetTextResponse(model_id='gpt-4o-mini', request=GetTextRequest(context=None, prompt=[Message(role='user', content='Hi!')], temperature=1, max_tokens=512), raw_responses=[ChatCompletion(id='chatcmpl-9y7CeaNPkZ7jesyuUlNBAOdyhKy1B', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Hello! How can I assist you today?', refusal=None, role='assistant', function_call=None, tool_calls=None))], created=1724114396, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_48196bc67a', usage=CompletionUsage(completion_tokens=9, prompt_tokens=9, total_tokens=18))], context=None, txt='Hello! How can I assist you today?')

In [10]:
response.as_dict()

{'model_id': 'gpt-4o-mini',
 'request': {'context': None,
  'prompt': [{'role': 'user', 'content': 'Hi!'}],
  'temperature': 1,
  'max_tokens': 512},
 'txt': 'Hello! How can I assist you today?',
 'context': None}

In [None]:
# we'll make a new provider with tool use

# note: task-standard/workbench/example-agents/fncall-baseline/commands.py
#       uses a `return` tool, which seems like an interesting technique