Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clams/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@

import mmif
from clams import develop
from clams import envelop
from clams.app import *
from clams.app import __all__ as app_all
from clams.appmetadata import AppMetadata
from clams.envelop import create_envelope
from clams.restify import Restifier
from clams.ver import __version__

Expand All @@ -32,6 +34,7 @@ def cli():
to_register = list(mmif.find_all_modules('mmif.utils.cli'))
# then add my own subcommands
to_register.append(develop)
to_register.append(envelop)
for cli_module in to_register:
cli_module_name = cli_module.__name__.rsplit('.')[-1]
cli_modules[cli_module_name] = cli_module
Expand Down
161 changes: 161 additions & 0 deletions clams/envelop/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import argparse
import json
import sys
from typing import Dict, List, Optional, Tuple, Union

from mmif import Mmif

from clams.appmetadata import map_param_kv_delimiter

ENVELOPE_KEY = 'parameters'
MMIF_KEY = 'mmif'


def normalize_params(params: dict) -> Dict[str, List[str]]:
"""
Normalize JSON-native parameter values to the
``Dict[str, List[str]]`` format expected by
:class:`~clams.app.ParameterCaster`.

:param params: parameter dict with JSON-native values
:returns: normalized dict where every value is a list of strings
:rtype: Dict[str, List[str]]
"""
normalized = {}
for k, v in params.items():
if isinstance(v, list):
normalized[k] = [str(elem) for elem in v]
elif isinstance(v, dict):
normalized[k] = [
f"{dk}{map_param_kv_delimiter}{dv}"
for dk, dv in v.items()
]
else:
normalized[k] = [str(v)]
return normalized


def is_envelope(body: dict) -> bool:
"""
Check whether a parsed JSON body is an envelope.

Detection relies on the presence of a top-level ``"parameters"``
key, which is never part of the MMIF schema.

:param body: parsed JSON dict
:returns: True if the body appears to be an envelope
:rtype: bool
"""
return isinstance(body, dict) and ENVELOPE_KEY in body


def unwrap_envelope(body: dict) -> Tuple[str, Dict[str, List[str]]]:
"""
Extract MMIF and normalized parameters from an envelope.

:param body: parsed JSON dict with ``"parameters"`` and ``"mmif"``
:returns: tuple of (mmif_json_string, normalized_params)
:rtype: Tuple[str, Dict[str, List[str]]]
:raises ValueError: if ``"mmif"`` key is missing or
``"parameters"`` is not a dict
"""
params = body.get(ENVELOPE_KEY)
if not isinstance(params, dict):
raise ValueError(
f'"{ENVELOPE_KEY}" must be a JSON object, '
f'got {type(params).__name__}'
)
if MMIF_KEY not in body:
raise ValueError(
f'Envelope is missing required "{MMIF_KEY}" key'
)
mmif_str = json.dumps(body[MMIF_KEY])
return mmif_str, normalize_params(params)


def create_envelope(
mmif: Union[str, dict, Mmif],
parameters: Optional[dict] = None,
) -> str:
"""
Create a JSON envelope string wrapping MMIF and parameters.

:param mmif: MMIF as a string, dict, or
:class:`~mmif.serialize.mmif.Mmif` object
:param parameters: parameter dict with JSON-native values
:returns: JSON string of the envelope
:rtype: str
"""
if isinstance(mmif, Mmif):
mmif_obj = json.loads(mmif.serialize())
elif isinstance(mmif, str):
mmif_obj = json.loads(mmif)
else:
mmif_obj = mmif
envelope = {
ENVELOPE_KEY: parameters if parameters is not None else {},
MMIF_KEY: mmif_obj,
}
return json.dumps(envelope)


# -- CLI interface ---------------------------------------------------

def describe_argparser():
"""
:returns: tuple of (one-line help, detailed description)
"""
oneliner = (
'create a JSON envelope wrapping MMIF and runtime parameters'
)
detailed = (
'Reads a JSON parameter file and an MMIF file (or stdin), '
'combines them into a JSON envelope, and writes the result '
'to stdout. The envelope can be POSTed directly to a CLAMS '
'app HTTP endpoint.'
)
return oneliner, detailed


def prep_argparser(**kwargs):
"""
:returns: argparse.ArgumentParser for the envelop subcommand
"""
parser = argparse.ArgumentParser(
description=describe_argparser()[1],
formatter_class=argparse.RawDescriptionHelpFormatter,
**kwargs,
)
parser.add_argument(
'PARAMS_FILE',
type=argparse.FileType('r'),
help='Path to a JSON file containing runtime parameters.',
)
parser.add_argument(
'MMIF_FILE',
nargs='?',
type=argparse.FileType('r'),
default=None if sys.stdin.isatty() else sys.stdin,
help=(
'Path to the input MMIF file, or stdin if omitted. '
'Use "-" to explicitly read from stdin.'
),
)
return parser


def main(args):
"""
CLI entry point. Reads params JSON and MMIF, writes envelope
JSON to stdout.
"""
if args.MMIF_FILE is None:
print(
'Error: no MMIF input provided '
'(pass a file path or pipe to stdin)',
file=sys.stderr,
)
sys.exit(1)
params = json.load(args.PARAMS_FILE)
mmif_str = args.MMIF_FILE.read()
print(create_envelope(mmif_str, parameters=params))
43 changes: 39 additions & 4 deletions clams/restify/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import json

import jsonschema
from flask import Flask, request, Response
from flask_restful import Resource, Api
from mmif import Mmif

from clams.app import ClamsApp
from clams.envelop import is_envelope, unwrap_envelope


class Restifier(object):
Expand Down Expand Up @@ -188,19 +191,51 @@ def post(self) -> Response:
Maps HTTP POST verb to :meth:`~clams.app.ClamsApp.annotate`.
Note that for now HTTP PUT verbs is also mapped to :meth:`~clams.app.ClamsApp.annotate`.

The request body can be either raw MMIF JSON or a JSON envelope
containing both ``"parameters"`` and ``"mmif"`` keys. When an
envelope is detected, parameters are normalized and merged with
any query-string parameters (query string takes priority).

:return: Returns MMIF output from a ClamsApp in a HTTP response.
"""
raw_data = request.get_data().decode('utf-8')
# this will catch duplicate arguments with different values into a list under the key
raw_params = request.args.to_dict(flat=False)
try:
_ = Mmif(raw_data)
body = json.loads(raw_data)
except (json.JSONDecodeError, ValueError):
return Response(
response="Invalid JSON in request body.",
status=500, mimetype='text/plain')
if is_envelope(body):
try:
mmif_data, envelope_params = unwrap_envelope(body)
except ValueError as e:
return Response(
response=f"Invalid envelope format: {e}",
status=500, mimetype='text/plain')
# query string overrides envelope parameters
params = {**envelope_params, **raw_params}
else:
mmif_data = raw_data
params = raw_params
try:
_ = Mmif(mmif_data)
except jsonschema.exceptions.ValidationError as e:
return Response(response="Invalid input data. See below for validation error.\n\n" + str(e), status=500, mimetype='text/plain')
return Response(
response="Invalid input data. "
"See below for validation error.\n\n"
+ str(e),
status=500, mimetype='text/plain')
try:
return self.json_to_response(self.cla.annotate(raw_data, **raw_params))
return self.json_to_response(
self.cla.annotate(mmif_data, **params))
except Exception:
self.cla.logger.exception("Error in annotation")
return self.json_to_response(self.cla.record_error(raw_data, **raw_params).serialize(pretty=True), status=500)
return self.json_to_response(
self.cla.record_error(
mmif_data, **params
).serialize(pretty=True),
status=500)

put = post
Loading
Loading