Skip to content

Commit

Permalink
Ability to set multiple columns, closes #5
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Apr 5, 2024
1 parent 2a5d64f commit 7110451
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 8 deletions.
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,22 @@ function enrich(row) {
return row["title"] + "!";
}
```
The return value of your function will be stored in the output column of your choice.

Instead of picking an output column, you can have your function return an object with keys and values.

This example takes a `point` column with values like `37.7749,-122.4194 and splits it into `latitude` and `longitude` columns:

```javascript
function enrich(row) {
const bits = row.point.split(",");
return {
"latitude": parseFloat(bits[0]),
"longitude": parseFloat(bits[1])
}
}
```
The enrichment will then create new columns in the table for each key in the object returned by that function.

## Development

Expand Down
49 changes: 41 additions & 8 deletions datasette_enrichments_quickjs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datasette_enrichments import Enrichment
from datasette import hookimpl
import json
import markupsafe
from quickjs import Function
import sqlite_utils
Expand All @@ -19,6 +20,9 @@ class QuickJsEnrichment(Enrichment):

async def initialize(self, datasette, db, table, config):
# Ensure column exists
if config["mode"] == "multi":
# No need to create columns for multi mode
return
output_column = config["output_column"]
column_type = config["output_column_type"].upper()

Expand All @@ -45,6 +49,17 @@ class ConfigForm(Form):
validators=[DataRequired(message="JavaScript function is required.")],
default='function enrich(row) {\n return JSON.stringify(row) + " enriched";\n}',
)
mode = SelectField(
"Output mode",
choices=[
("single", "Store the function result in a single column"),
(
"multi",
"Return an object and store each key in a separate column",
),
],
validators=[DataRequired(message="A mode is required.")],
)
output_column = StringField(
"Output column name",
description="The column to store the output in - will be created if it does not exist.",
Expand Down Expand Up @@ -85,11 +100,29 @@ async def enrich_batch(
output_column = config["output_column"]
for row in rows:
output = function(row)
await db.execute_write(
"update [{table}] set [{output_column}] = ? where {wheres}".format(
table=table,
output_column=output_column,
wheres=" and ".join('"{}" = ?'.format(pk) for pk in pks),
),
[output] + list(row[pk] for pk in pks),
)
if config["mode"] == "multi":
if isinstance(output, str) and not isinstance(output, dict):
try:
output = json.loads(output)
except json.JSONDecodeError:
output = {"javascript_output": output}
if len(pks) == 1:
pk_value = row[pks[0]]
else:
pk_value = (row[pk] for pk in pks)

def _update(conn):
sqlite_utils.Database(conn)[table].update(
pk_value, output, alter=True
)

await db.execute_write_fn(_update)
else:
await db.execute_write(
"update [{table}] set [{output_column}] = ? where {wheres}".format(
table=table,
output_column=output_column,
wheres=" and ".join('"{}" = ?'.format(pk) for pk in pks),
),
[output] + list(row[pk] for pk in pks),
)
27 changes: 27 additions & 0 deletions datasette_enrichments_quickjs/templates/enrichment-quickjs.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{% extends "enrichment.html" %}

{% block below_form %}
<script>
const modeSelect = document.querySelector('#mode');
const outputColumn = document.querySelector('#output_column').closest('.field');
const outputColumnLabel = document.querySelector('label[for=output_column]').closest('div');
const outputColumnType = document.querySelector('#output_column_type').closest('.field');
const outputColumnTypeLabel = document.querySelector('label[for=output_column_type]').closest('div');
function handleModeChange() {
// If multi, hide those fields
if (modeSelect.value === 'multi') {
outputColumn.style.display = 'none';
outputColumnType.style.display = 'none';
outputColumnLabel.style.display = 'none';
outputColumnTypeLabel.style.display = 'none';
} else {
outputColumn.style.display = '';
outputColumnType.style.display = '';
outputColumnLabel.style.display = '';
outputColumnTypeLabel.style.display = '';
}
}
modeSelect.addEventListener('change', handleModeChange);
handleModeChange();
</script>
{% endblock %}
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ test = ["pytest", "pytest-asyncio", "pytest-timeout"]

[tool.pytest.ini_options]
asyncio_mode = "strict"

[tool.setuptools.package-data]
datasette_enrichments_quickjs = ["templates/*"]
69 changes: 69 additions & 0 deletions tests/test_enrichments_quickjs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
from datasette.app import Datasette
import json
import pytest
import sqlite_utils

Expand Down Expand Up @@ -28,6 +29,7 @@ async def test_enrichment(tmpdir):
cookies = await _cookies(datasette)
post = {
"javascript": "function enrich(row) { return row.description.length }",
"mode": "single",
"output_column": "description_length",
"output_column_type": "integer",
}
Expand Down Expand Up @@ -64,6 +66,72 @@ async def test_enrichment(tmpdir):
]


@pytest.mark.asyncio
async def test_enrichment_multi(tmpdir):
data = str(tmpdir / "data.db")
datasette = Datasette([data], memory=True)
db = sqlite_utils.Database(data)
rows = [
{
"id": 1,
"name": "NYC",
"point": "40.71,-74.0",
},
{
"id": 2,
"name": "SF",
"point": "37.77,-122.41",
},
]
db["items"].insert_all(rows, pk="id")

cookies = await _cookies(datasette)
post = {
"javascript": """
function enrich(row) {
const bits = row.point.split(",");
return {
"latitude": parseFloat(bits[0]),
"longitude": parseFloat(bits[1])
}
}
""",
"mode": "multi",
}
post["csrftoken"] = cookies["ds_csrftoken"]
response = await datasette.client.post(
"/-/enrich/data/items/quickjs",
data=post,
cookies=cookies,
)
assert response.status_code == 302
await asyncio.sleep(0.3)
db = datasette.get_database("data")
jobs = await db.execute("select * from _enrichment_jobs")
job = dict(jobs.first())
assert job["status"] == "finished"
assert job["enrichment"] == "quickjs"
assert job["done_count"] == 2
results = await db.execute("select * from items order by id")
rows = [dict(r) for r in results.rows]
assert rows == [
{
"id": 1,
"name": "NYC",
"point": "40.71,-74.0",
"latitude": 40.71,
"longitude": -74.0,
},
{
"id": 2,
"name": "SF",
"point": "37.77,-122.41",
"latitude": 37.77,
"longitude": -122.41,
},
]


@pytest.mark.asyncio
@pytest.mark.timeout(5)
@pytest.mark.parametrize(
Expand Down Expand Up @@ -124,6 +192,7 @@ async def test_time_and_memory_limit(javascript, expected_error):
]
post = {
"javascript": javascript,
"mode": "single",
"output_column": "description_length",
"output_column_type": "integer",
}
Expand Down

0 comments on commit 7110451

Please sign in to comment.