src/memex/storage.py

# -*- coding: utf-8 -*-
"""
Annotation storage API.

This module provides the core API with access to basic persistence functions
for storing and retrieving annotations. Data passed to these functions is
assumed to be validated.
"""

from datetime import datetime

from pyramid import i18n

from memex import schemas
from memex import models
from memex.db import types


_ = i18n.TranslationStringFactory(__package__)


def fetch_annotation(session, id_):
    """
    Fetch the annotation with the given id.

    :param session: the database session
    :type session: sqlalchemy.orm.session.Session

    :param id_: the annotation ID
    :type id_: str

    :returns: the annotation, if found, or None.
    :rtype: memex.models.Annotation, NoneType
    """
    try:
        return session.query(models.Annotation).get(id_)
    except types.InvalidUUID:
        return None


def fetch_ordered_annotations(session, ids, query_processor=None):
    """
    Fetch all annotations with the given ids and order them based on the list
    of ids.

    The optional `query_processor` parameter allows for passing in a function
    that can change the query before it is run, especially useful for
    eager-loading certain data. The function will get the query as an argument
    and has to return a query object again.

    :param session: the database session
    :type session: sqlalchemy.orm.session.Session

    :param ids: the list of annotation ids
    :type ids: list

    :param query_processor: an optional function that takes the query and
                            returns an updated query
    :type query_processor: callable

    :returns: the annotation, if found, or None.
    :rtype: memex.models.Annotation, NoneType
    """
    if not ids:
        return []

    ordering = {x: i for i, x in enumerate(ids)}

    query = session.query(models.Annotation).filter(models.Annotation.id.in_(ids))
    if query_processor:
        query = query_processor(query)

    anns = sorted(query, key=lambda a: ordering.get(a.id))
    return anns


def create_annotation(request, data):
    """
    Create an annotation from passed data.

    :param request: the request object
    :type request: pyramid.request.Request

    :param data: a dictionary of annotation properties
    :type data: dict

    :returns: the created and flushed annotation
    :rtype: dict
    """
    created = updated = datetime.utcnow()

    document_uri_dicts = data['document']['document_uri_dicts']
    document_meta_dicts = data['document']['document_meta_dicts']
    del data['document']

    # Replies must have the same group as their parent.
    if data['references']:
        top_level_annotation_id = data['references'][0]
        top_level_annotation = fetch_annotation(request.db,
                                                top_level_annotation_id)
        if top_level_annotation:
            data['groupid'] = top_level_annotation.groupid
        else:
            raise schemas.ValidationError(
                'references.0: ' +
                _('Annotation {id} does not exist').format(
                    id=top_level_annotation_id)
            )

    # The user must have permission to create an annotation in the group
    # they've asked to create one in.
    if data['groupid'] != '__world__':
        group_principal = 'group:{}'.format(data['groupid'])
        if group_principal not in request.effective_principals:
            raise schemas.ValidationError('group: ' +
                                          _('You may not create annotations '
                                            'in groups you are not a member '
                                            'of!'))

    annotation = models.Annotation(**data)
    annotation.created = created
    annotation.updated = updated

    document = models.update_document_metadata(
        request.db,
        annotation.target_uri,
        document_meta_dicts,
        document_uri_dicts,
        created=created,
        updated=updated)
    annotation.document = document

    request.db.add(annotation)
    request.db.flush()

    return annotation


def update_annotation(session, id_, data):
    """
    Update an existing annotation and its associated document metadata.

    Update the annotation identified by id_ with the given
    data. Create, delete and update document metadata as appropriate.

    :param session: the database session
    :type session: sqlalchemy.orm.session.Session

    :param id_: the ID of the annotation to be updated, this is assumed to be a
        validated ID of an annotation that does already exist in the database
    :type id_: string

    :param data: the validated data with which to update the annotation
    :type data: dict

    :returns: the updated annotation
    :rtype: memex.models.Annotation

    """
    updated = datetime.utcnow()

    # Remove any 'document' field first so that we don't try to save it on the
    # annotation object.
    document = data.pop('document', None)

    annotation = session.query(models.Annotation).get(id_)
    annotation.updated = updated

    annotation.extra.update(data.pop('extra', {}))

    for key, value in data.items():
        setattr(annotation, key, value)

    if document:
        document_uri_dicts = document['document_uri_dicts']
        document_meta_dicts = document['document_meta_dicts']
        document = models.update_document_metadata(session,
                                                   annotation.target_uri,
                                                   document_meta_dicts,
                                                   document_uri_dicts,
                                                   updated=updated)
        annotation.document = document

    return annotation


def delete_annotation(session, id_):
    """
    Delete the annotation with the given id.

    :param session: the database session
    :type session: sqlalchemy.orm.session.Session

    :param id_: the annotation ID
    :type id_: str
    """
    session.query(models.Annotation).filter_by(id=id_).delete()


def expand_uri(session, uri):
    """
    Return all URIs which refer to the same underlying document as `uri`.

    This function determines whether we already have "document" records for the
    passed URI, and if so returns the set of all URIs which we currently
    believe refer to the same document.

    :param session: the database session
    :type session: sqlalchemy.orm.session.Session

    :param uri: a URI associated with the document
    :type uri: str

    :returns: a list of equivalent URIs
    :rtype: list
    """
    doc = models.Document.find_by_uris(session, [uri]).one_or_none()

    if doc is None:
        return [uri]

    # We check if the match was a "canonical" link. If so, all annotations
    # created on that page are guaranteed to have that as their target.source
    # field, so we don't need to expand to other URIs and risk false positives.
    docuris = doc.document_uris
    for docuri in docuris:
        if docuri.uri == uri and docuri.type == 'rel-canonical':
            return [uri]

    return [docuri.uri for docuri in docuris]