From 60806fe560d9b745220da6691ed7ded3cc3d9e41 Mon Sep 17 00:00:00 2001 From: Keigh Rim Date: Mon, 18 May 2026 20:22:59 -0400 Subject: [PATCH] fixed bug the envelope not working with cli.py --- .github/workflows/container.yml | 2 +- clams/app/__init__.py | 15 +++- clams/envelop/__init__.py | 46 +++++++++- clams/restify/__init__.py | 46 +++------- tests/test_envelope.py | 151 ++++++++++++++++++++++++++++---- 5 files changed, 205 insertions(+), 55 deletions(-) diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index 2296743..ad925a7 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -59,7 +59,7 @@ jobs: echo "slug=${slug}" >> $GITHUB_OUTPUT - name: "🛍️ Checkout repository" - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: ref: ${{ inputs.ref || inputs.version }} diff --git a/clams/app/__init__.py b/clams/app/__init__.py index 11a9894..1d3f0a2 100644 --- a/clams/app/__init__.py +++ b/clams/app/__init__.py @@ -1,3 +1,4 @@ +import json import logging import os import pathlib @@ -19,6 +20,7 @@ ) from mmif.utils.workflow_helper import generate_param_hash # pytype: disable=import-error from clams.appmetadata import AppMetadata, real_valued_primitives, python_type, map_param_kv_delimiter +from clams.envelop import unwrap_if_envelope logging.basicConfig( level=getattr(logging, os.environ.get('CLAMS_LOGLEVEL', 'WARNING').upper(), logging.WARNING), @@ -159,11 +161,19 @@ def annotate(self, mmif: Union[str, dict, Mmif], **runtime_params: List[str]) -> wrapper around :meth:`~clams.app.ClamsApp._annotate` method where some common operations (that are invoked by keyword arguments) are implemented. - :param mmif: An input MMIF object to annotate + The input may be a raw MMIF (str, dict, or :class:`~mmif.serialize.mmif.Mmif`) + or a JSON envelope wrapping both ``"parameters"`` and ``"mmif"``. + Envelope detection and unwrapping happen here so every execution + path (HTTP, CLI, direct Python API) is envelope-aware. When an + envelope is given, its parameters are merged under ``runtime_params`` + (explicitly-passed parameters take priority on key collision). + + :param mmif: An input MMIF object, or a JSON envelope, to annotate :param runtime_params: An arbitrary set of k-v pairs to configure the app at runtime :return: Serialized JSON string of the output of the app """ if not isinstance(mmif, Mmif): + mmif, runtime_params = unwrap_if_envelope(mmif, runtime_params) mmif = Mmif(mmif) existing_view_ids = {view.id for view in mmif.views} issued_warnings = [] @@ -329,7 +339,8 @@ def set_error_view(self, mmif: Union[str, dict, Mmif], **runtime_conf: List[str] :return: An output MMIF with a new view with the error encoded in the view metadata """ import traceback - if isinstance(mmif, bytes) or isinstance(mmif, str) or isinstance(mmif, dict): + if isinstance(mmif, (bytes, str, dict)): + mmif, runtime_conf = unwrap_if_envelope(mmif, runtime_conf) mmif = Mmif(mmif) error_view: Optional[View] = None for view in reversed(mmif.views): diff --git a/clams/envelop/__init__.py b/clams/envelop/__init__.py index 6be34e1..c925bb8 100644 --- a/clams/envelop/__init__.py +++ b/clams/envelop/__init__.py @@ -11,6 +11,16 @@ MMIF_KEY = 'mmif' +class EnvelopeError(ValueError): + """ + Raised when an input body is detected as an envelope (has a + ``"parameters"`` key) but is otherwise malformed. Subclasses + ``ValueError`` so existing ``except ValueError`` handlers keep + working, while still being distinguishable from unrelated + ``ValueError``\\ s raised by app code. + """ + + def normalize_params(params: dict) -> Dict[str, List[str]]: """ Normalize JSON-native parameter values to the @@ -56,23 +66,53 @@ def unwrap_envelope(body: dict) -> Tuple[str, Dict[str, List[str]]]: :param body: parsed JSON dict with ``"parameters"`` and ``"mmif"`` :returns: tuple of (mmif_json_string, normalized_params) :rtype: Tuple[str, Dict[str, List[str]]] - :raises ValueError: if ``"mmif"`` key is missing or + :raises EnvelopeError: if ``"mmif"`` key is missing or ``"parameters"`` is not a dict """ params = body.get(ENVELOPE_KEY) if not isinstance(params, dict): - raise ValueError( + raise EnvelopeError( f'"{ENVELOPE_KEY}" must be a JSON object, ' f'got {type(params).__name__}' ) if MMIF_KEY not in body: - raise ValueError( + raise EnvelopeError( f'Envelope is missing required "{MMIF_KEY}" key' ) mmif_str = json.dumps(body[MMIF_KEY]) return mmif_str, normalize_params(params) +def unwrap_if_envelope(data, runtime_params): + """ + If ``data`` is (or decodes to) an envelope, return the inner MMIF + together with envelope parameters merged under the explicitly-passed + ``runtime_params`` (so query-string / CLI flags take priority). If + ``data`` is not an envelope, return it unchanged. + + This is the single entry point used by every execution path + (HTTP, CLI, direct Python API) so envelope handling is uniform + regardless of how the app is invoked. + + :param data: raw input -- ``bytes``, ``str``, or ``dict`` + :param runtime_params: explicitly-passed parameters that override + envelope parameters on key collision + :returns: tuple of (mmif_or_original_data, effective_params) + :raises EnvelopeError: if ``data`` is a malformed envelope + """ + raw = data.decode('utf-8') if isinstance(data, bytes) else data + body = raw + if isinstance(raw, str): + try: + body = json.loads(raw) + except (json.JSONDecodeError, ValueError): + return data, runtime_params + if is_envelope(body): + inner_mmif, envelope_params = unwrap_envelope(body) + return inner_mmif, {**envelope_params, **runtime_params} + return data, runtime_params + + def create_envelope( mmif: Union[str, dict, Mmif], parameters: Optional[dict] = None, diff --git a/clams/restify/__init__.py b/clams/restify/__init__.py index a059312..5f99045 100644 --- a/clams/restify/__init__.py +++ b/clams/restify/__init__.py @@ -3,10 +3,9 @@ import jsonschema from flask import Flask, request, Response from flask_restful import Resource, Api -from mmif import Mmif from clams.app import ClamsApp -from clams.envelop import is_envelope, unwrap_envelope +from clams.envelop import EnvelopeError class Restifier(object): @@ -191,50 +190,33 @@ def post(self) -> Response: Maps HTTP POST verb to :meth:`~clams.app.ClamsApp.annotate`. Note that for now HTTP PUT verbs is also mapped to :meth:`~clams.app.ClamsApp.annotate`. - The request body can be either raw MMIF JSON or a JSON envelope - containing both ``"parameters"`` and ``"mmif"`` keys. When an - envelope is detected, parameters are normalized and merged with - any query-string parameters (query string takes priority). - :return: Returns MMIF output from a ClamsApp in a HTTP response. """ raw_data = request.get_data().decode('utf-8') # this will catch duplicate arguments with different values into a list under the key raw_params = request.args.to_dict(flat=False) try: - body = json.loads(raw_data) - except (json.JSONDecodeError, ValueError): - return Response( - response="Invalid JSON in request body.", - status=500, mimetype='text/plain') - if is_envelope(body): - try: - mmif_data, envelope_params = unwrap_envelope(body) - except ValueError as e: - return Response( - response=f"Invalid envelope format: {e}", - status=500, mimetype='text/plain') - # query string overrides envelope parameters - params = {**envelope_params, **raw_params} - else: - mmif_data = raw_data - params = raw_params - try: - _ = Mmif(mmif_data) - except jsonschema.exceptions.ValidationError as e: + return self.json_to_response( + self.cla.annotate(raw_data, **raw_params)) + except (jsonschema.exceptions.ValidationError, + json.JSONDecodeError, EnvelopeError) as e: + # jsonschema's str(e) dumps the entire MMIF schema; use its + # concise .message instead so envelope and MMIF input + # errors share the same compact payload format. + detail = ( + e.message + if isinstance(e, jsonschema.exceptions.ValidationError) + else str(e)) return Response( response="Invalid input data. " "See below for validation error.\n\n" - + str(e), + + detail, status=500, mimetype='text/plain') - try: - return self.json_to_response( - self.cla.annotate(mmif_data, **params)) except Exception: self.cla.logger.exception("Error in annotation") return self.json_to_response( self.cla.record_error( - mmif_data, **params + raw_data, **raw_params ).serialize(pretty=True), status=500) diff --git a/tests/test_envelope.py b/tests/test_envelope.py index 833042f..b295475 100644 --- a/tests/test_envelope.py +++ b/tests/test_envelope.py @@ -12,8 +12,9 @@ import clams.app from clams.appmetadata import AppMetadata from clams.envelop import ( - create_envelope, is_envelope, main as envelope_cli_main, - normalize_params, prep_argparser, unwrap_envelope, + EnvelopeError, create_envelope, is_envelope, + main as envelope_cli_main, normalize_params, prep_argparser, + unwrap_envelope, unwrap_if_envelope, ) from tests.test_clamsapp import ExampleInputMMIF @@ -38,6 +39,8 @@ def _annotate(self, mmif, **kwargs): self.sign_view(new_view, kwargs) new_view.new_contain(AnnotationTypes.TimeFrame, producer='envelope-test') + if kwargs.get('raise_error'): + raise ValueError('boom') return mmif @@ -108,14 +111,16 @@ def test_is_envelope_false(self): def test_is_envelope_non_dict(self): self.assertFalse(is_envelope('not a dict')) + self.assertFalse(is_envelope(None)) + self.assertFalse(is_envelope([])) def test_unwrap_missing_mmif(self): - with self.assertRaises(ValueError) as ctx: + with self.assertRaises(EnvelopeError) as ctx: unwrap_envelope({'parameters': {}}) self.assertIn('mmif', str(ctx.exception).lower()) def test_unwrap_non_dict_parameters(self): - with self.assertRaises(ValueError) as ctx: + with self.assertRaises(EnvelopeError) as ctx: unwrap_envelope({'parameters': 'bad', 'mmif': {}}) self.assertIn('object', str(ctx.exception).lower()) @@ -141,6 +146,14 @@ def test_from_mmif_object(self): self.assertIn('parameters', result) self.assertIn('mmif', result) + def test_from_dict(self): + mmif_dict = json.loads(self.mmif_str) + result = json.loads( + create_envelope(mmif_dict, {'pretty': True}) + ) + self.assertEqual(result['mmif'], mmif_dict) + self.assertEqual(result['parameters']['pretty'], True) + def test_no_params(self): result = json.loads(create_envelope(self.mmif_str)) self.assertEqual(result['parameters'], {}) @@ -158,6 +171,49 @@ def test_roundtrip(self): self.assertEqual(normalized['labels'], ['a', 'b']) +class TestUnwrapIfEnvelope(unittest.TestCase): + """ + Direct tests for the shared helper used by every entry point. + """ + + def setUp(self): + self.mmif_str = ExampleInputMMIF.get_mmif() + + def test_str_envelope(self): + env = create_envelope(self.mmif_str, {'prompt': 'x'}) + data, params = unwrap_if_envelope(env, {}) + Mmif(data) # inner MMIF extracted and valid + self.assertEqual(params, {'prompt': ['x']}) + + def test_bytes_envelope(self): + env = create_envelope( + self.mmif_str, {'prompt': 'x'}).encode('utf-8') + data, params = unwrap_if_envelope(env, {}) + Mmif(data) + self.assertEqual(params, {'prompt': ['x']}) + + def test_dict_envelope(self): + env = json.loads(create_envelope(self.mmif_str, {'prompt': 'x'})) + data, params = unwrap_if_envelope(env, {}) + Mmif(data) + self.assertEqual(params, {'prompt': ['x']}) + + def test_explicit_params_win(self): + env = create_envelope(self.mmif_str, {'prompt': 'env'}) + data, params = unwrap_if_envelope(env, {'prompt': ['cli']}) + self.assertEqual(params['prompt'], ['cli']) + + def test_non_envelope_str_passthrough(self): + data, params = unwrap_if_envelope(self.mmif_str, {'a': ['1']}) + self.assertEqual(data, self.mmif_str) + self.assertEqual(params, {'a': ['1']}) + + def test_valid_json_non_dict_passthrough(self): + data, params = unwrap_if_envelope('123', {}) + self.assertEqual(data, '123') + self.assertEqual(params, {}) + + class TestRestifierEnvelope(unittest.TestCase): def setUp(self): @@ -180,25 +236,18 @@ def test_put_envelope(self): self.assertEqual(res.status_code, 200) Mmif(res.get_data(as_text=True)) - def test_query_string_overrides_envelope(self): - envelope_str = create_envelope( - self.mmif_str, {'pretty': False} - ) - # query string says pretty=true, should override envelope - res = self.client.post( - '/', data=envelope_str, - query_string={'pretty': 'true'}, - ) - self.assertEqual(res.status_code, 200) - # indented JSON indicates pretty=true was honored - output = res.get_data(as_text=True) - self.assertIn('\n', output) + PREFIX = "Invalid input data. See below for validation error." def test_envelope_missing_mmif(self): bad = json.dumps({'parameters': {'pretty': True}}) res = self.client.post('/', data=bad) self.assertEqual(res.status_code, 500) self.assertEqual(res.mimetype, 'text/plain') + body = res.get_data(as_text=True) + # EnvelopeError 500 uses the same payload format as the + # MMIF-validation 500 + self.assertTrue(body.startswith(self.PREFIX)) + self.assertIn('mmif', body) def test_envelope_invalid_mmif(self): bad = json.dumps({ @@ -208,6 +257,11 @@ def test_envelope_invalid_mmif(self): res = self.client.post('/', data=bad) self.assertEqual(res.status_code, 500) self.assertEqual(res.mimetype, 'text/plain') + body = res.get_data(as_text=True) + # trimmed: concise jsonschema message, NOT the full schema dump + self.assertTrue(body.startswith(self.PREFIX)) + self.assertNotIn('$schema', body) + self.assertLess(len(body), 500) def test_raw_mmif_still_works(self): res = self.client.post('/', data=self.mmif_str) @@ -219,6 +273,23 @@ def test_invalid_json(self): self.assertEqual(res.status_code, 500) self.assertEqual(res.mimetype, 'text/plain') + def test_envelope_app_error_returns_error_view(self): + # app raises during _annotate with envelope input: the error + # must come back as an error-view MMIF (application/json), + # NOT a text/plain input error, and set_error_view must have + # unwrapped the envelope to record the params. + envelope_str = create_envelope( + self.mmif_str, {'prompt': 'p', 'raise_error': True} + ) + res = self.client.post('/', data=envelope_str) + self.assertEqual(res.status_code, 500) + self.assertEqual(res.mimetype, 'application/json') + out = Mmif(res.get_data(as_text=True)) + err_meta = json.loads(list(out.views)[-1].metadata.serialize()) + self.assertIn('error', err_meta) + self.assertEqual( + err_meta.get('parameters', {}).get('prompt'), 'p') + class TestEnvelopeReproducibility(unittest.TestCase): """ @@ -320,6 +391,52 @@ def test_cli_envelope_can_be_consumed_by_restifier(self): Mmif(res.get_data(as_text=True)) +class TestEnvelopeNonHTTPEntrypoints(unittest.TestCase): + """ + Envelope handling lives in ClamsApp.annotate(), so it must work + when annotate() is called directly -- i.e. the path used by the + cli.py entry point and any programmatic use -- not just over HTTP. + """ + + def setUp(self): + self.app = EnvelopeTestApp() + self.mmif_str = ExampleInputMMIF.get_mmif() + + def _signed_params(self, out_mmif_str): + out = Mmif(out_mmif_str) + signed = list(out.views)[-1] + return json.loads(signed.metadata.serialize()).get( + 'parameters', {}) + + def test_annotate_accepts_envelope_string(self): + # exactly what cli.py does: clamsapp.annotate(in_data, **params) + envelope_str = create_envelope( + self.mmif_str, {'prompt': 'from cli'} + ) + out = self.app.annotate(envelope_str) + params = self._signed_params(out) + self.assertEqual(params.get('prompt'), 'from cli') + + def test_annotate_raw_mmif_still_works(self): + # raw MMIF (no envelope) passes through; app adds one view + out = Mmif(self.app.annotate(self.mmif_str)) + self.assertEqual(len(list(out.views)), 1) + + def test_explicit_params_override_envelope(self): + envelope_str = create_envelope( + self.mmif_str, {'prompt': 'envelope value'} + ) + # CLI flags / kwargs arrive as lists, like argparse produces + out = self.app.annotate(envelope_str, prompt=['kwarg value']) + params = self._signed_params(out) + self.assertEqual(params.get('prompt'), 'kwarg value') + + def test_malformed_envelope_raises_envelope_error(self): + bad = json.dumps({'parameters': {'p': 1}}) # missing "mmif" + with self.assertRaises(EnvelopeError): + self.app.annotate(bad) + + class TestEnvelopePythonAPI(unittest.TestCase): def test_create_envelope_at_package_root(self):