Skip to content

Commit

Permalink
Add estimate cost feature for enrichment tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Apr 26, 2024
1 parent 1353a46 commit aaa00ec
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 3 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ Once installed, this plugin will allow users to select rows to enrich and run th

The plugin also provides `gpt-4-turbo vision`, which can run prompts against an image identified by a URL.

### Estimate Cost Feature

The plugin now includes an "Estimate cost" feature, allowing users to get an estimated cost for their enrichment tasks before execution. This feature works by clicking the "Estimate cost" button available on the enrichment form. Upon clicking, the tool sends a request to the `/-/enrichments-gpt/estimate` API endpoint with the details of the enrichment task. The endpoint then calculates an estimated token count required for the task and returns this information to the user.

To use this feature, ensure your form includes the "Estimate cost" button and that your server is set up to handle requests to the `/-/enrichments-gpt/estimate` endpoint as described in the plugin documentation.

## Development

To set up this plugin locally, first checkout the code. Then create a new virtual environment:
Expand Down
30 changes: 28 additions & 2 deletions datasette_enrichments_gpt/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations
from datasette_enrichments import Enrichment
from datasette import hookimpl
from datasette import hookimpl, Response
from datasette.database import Database
import httpx
from typing import List, Optional
Expand All @@ -15,12 +15,38 @@
from wtforms.validators import ValidationError, DataRequired
import secrets
import sqlite_utils

import threading
import tiktoken

@hookimpl
def register_enrichments():
return [GptEnrichment()]

@hookimpl
def register_routes():
return [
(r"^/-/enrichments-gpt/estimate$", estimate_endpoint),
]

async def estimate_endpoint(request):
post_form_data = await request.post_vars()
template = post_form_data["template"]
system_prompt = post_form_data["system_prompt"]
filter_querystring = post_form_data["filter_querystring"]
model = post_form_data["model"]

# Placeholder for actual token estimation logic
estimated_tokens = await estimate_tokens(template, system_prompt, filter_querystring, model)

return Response.json({"estimated_tokens": estimated_tokens})

async def estimate_tokens(template, system_prompt, filter_querystring, model):
# This function should implement the actual token estimation logic
# For now, it returns a dummy value
text = template + system_prompt + filter_querystring
encoding = tiktoken.encoding_for_model(model)
token_count = len(encoding.encode(text))
return token_count

class GptEnrichment(Enrichment):
name = "AI analysis with OpenAI GPT"
Expand Down
23 changes: 23 additions & 0 deletions datasette_enrichments_gpt/templates/enrichment-gpt.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,28 @@
}
modelSelect.addEventListener('change', handleModelChange);
handleModelChange();

// Add "Estimate cost" button to the form
const estimateButton = document.createElement('button');
estimateButton.type = 'button';
estimateButton.textContent = 'Estimate cost';
estimateButton.id = 'estimate-cost';
document.querySelector('form').appendChild(estimateButton);

// Display area for estimated cost
const estimateDisplay = document.createElement('div');
estimateDisplay.id = 'estimate-display';
document.querySelector('form').appendChild(estimateDisplay);

// Implement JavaScript to handle button click and fetch API endpoint
estimateButton.addEventListener('click', async () => {
const formData = new FormData(document.querySelector('form'));
const response = await fetch('/-/enrichments-gpt/estimate', {
method: 'POST',
body: formData
});
const data = await response.json();
estimateDisplay.textContent = `Estimated cost: ${data.estimated_tokens} tokens`;
});
</script>
{% endblock %}
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ classifiers=[
requires-python = ">=3.8"
dependencies = [
"datasette-enrichments>=0.2",
"sqlite-utils"
"sqlite-utils",
"tiktoken"
]

[project.urls]
Expand Down
16 changes: 16 additions & 0 deletions tests/test_enrichments_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,19 @@ async def test_plugin_is_installed():
assert response.status_code == 200
installed_plugins = {p["name"] for p in response.json()}
assert "datasette-enrichments-gpt" in installed_plugins

@pytest.mark.asyncio
async def test_estimate_endpoint():
datasette = Datasette(memory=True)
response = await datasette.client.post(
"/-/enrichments-gpt/estimate",
json={
"template": "Test template",
"system_prompt": "Test system prompt",
"filter_querystring": "id=1",
"model": "gpt-4"
}
)
assert response.status_code == 200
assert "estimated_tokens" in response.json()
assert isinstance(response.json()["estimated_tokens"], int)

0 comments on commit aaa00ec

Please sign in to comment.