-
Notifications
You must be signed in to change notification settings - Fork 420
/
storage.py
230 lines (170 loc) · 6.99 KB
/
storage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# -*- coding: utf-8 -*-
"""
Annotation storage API.
This module provides the core API with access to basic persistence functions
for storing and retrieving annotations. Data passed to these functions is
assumed to be validated.
"""
from datetime import datetime
from pyramid import i18n
from memex import schemas
from memex import models
from memex.db import types
_ = i18n.TranslationStringFactory(__package__)
def fetch_annotation(session, id_):
"""
Fetch the annotation with the given id.
:param session: the database session
:type session: sqlalchemy.orm.session.Session
:param id_: the annotation ID
:type id_: str
:returns: the annotation, if found, or None.
:rtype: memex.models.Annotation, NoneType
"""
try:
return session.query(models.Annotation).get(id_)
except types.InvalidUUID:
return None
def fetch_ordered_annotations(session, ids, query_processor=None):
"""
Fetch all annotations with the given ids and order them based on the list
of ids.
The optional `query_processor` parameter allows for passing in a function
that can change the query before it is run, especially useful for
eager-loading certain data. The function will get the query as an argument
and has to return a query object again.
:param session: the database session
:type session: sqlalchemy.orm.session.Session
:param ids: the list of annotation ids
:type ids: list
:param query_processor: an optional function that takes the query and
returns an updated query
:type query_processor: callable
:returns: the annotation, if found, or None.
:rtype: memex.models.Annotation, NoneType
"""
if not ids:
return []
ordering = {x: i for i, x in enumerate(ids)}
query = session.query(models.Annotation).filter(models.Annotation.id.in_(ids))
if query_processor:
query = query_processor(query)
anns = sorted(query, key=lambda a: ordering.get(a.id))
return anns
def create_annotation(request, data):
"""
Create an annotation from passed data.
:param request: the request object
:type request: pyramid.request.Request
:param data: a dictionary of annotation properties
:type data: dict
:returns: the created and flushed annotation
:rtype: dict
"""
created = updated = datetime.utcnow()
document_uri_dicts = data['document']['document_uri_dicts']
document_meta_dicts = data['document']['document_meta_dicts']
del data['document']
# Replies must have the same group as their parent.
if data['references']:
top_level_annotation_id = data['references'][0]
top_level_annotation = fetch_annotation(request.db,
top_level_annotation_id)
if top_level_annotation:
data['groupid'] = top_level_annotation.groupid
else:
raise schemas.ValidationError(
'references.0: ' +
_('Annotation {id} does not exist').format(
id=top_level_annotation_id)
)
# The user must have permission to create an annotation in the group
# they've asked to create one in.
if data['groupid'] != '__world__':
group_principal = 'group:{}'.format(data['groupid'])
if group_principal not in request.effective_principals:
raise schemas.ValidationError('group: ' +
_('You may not create annotations '
'in groups you are not a member '
'of!'))
annotation = models.Annotation(**data)
annotation.created = created
annotation.updated = updated
document = models.update_document_metadata(
request.db,
annotation.target_uri,
document_meta_dicts,
document_uri_dicts,
created=created,
updated=updated)
annotation.document = document
request.db.add(annotation)
request.db.flush()
return annotation
def update_annotation(session, id_, data):
"""
Update an existing annotation and its associated document metadata.
Update the annotation identified by id_ with the given
data. Create, delete and update document metadata as appropriate.
:param session: the database session
:type session: sqlalchemy.orm.session.Session
:param id_: the ID of the annotation to be updated, this is assumed to be a
validated ID of an annotation that does already exist in the database
:type id_: string
:param data: the validated data with which to update the annotation
:type data: dict
:returns: the updated annotation
:rtype: memex.models.Annotation
"""
updated = datetime.utcnow()
# Remove any 'document' field first so that we don't try to save it on the
# annotation object.
document = data.pop('document', None)
annotation = session.query(models.Annotation).get(id_)
annotation.updated = updated
annotation.extra.update(data.pop('extra', {}))
for key, value in data.items():
setattr(annotation, key, value)
if document:
document_uri_dicts = document['document_uri_dicts']
document_meta_dicts = document['document_meta_dicts']
document = models.update_document_metadata(session,
annotation.target_uri,
document_meta_dicts,
document_uri_dicts,
updated=updated)
annotation.document = document
return annotation
def delete_annotation(session, id_):
"""
Delete the annotation with the given id.
:param session: the database session
:type session: sqlalchemy.orm.session.Session
:param id_: the annotation ID
:type id_: str
"""
session.query(models.Annotation).filter_by(id=id_).delete()
def expand_uri(session, uri):
"""
Return all URIs which refer to the same underlying document as `uri`.
This function determines whether we already have "document" records for the
passed URI, and if so returns the set of all URIs which we currently
believe refer to the same document.
:param session: the database session
:type session: sqlalchemy.orm.session.Session
:param uri: a URI associated with the document
:type uri: str
:returns: a list of equivalent URIs
:rtype: list
"""
doc = models.Document.find_by_uris(session, [uri]).one_or_none()
if doc is None:
return [uri]
# We check if the match was a "canonical" link. If so, all annotations
# created on that page are guaranteed to have that as their target.source
# field, so we don't need to expand to other URIs and risk false positives.
docuris = doc.document_uris
for docuri in docuris:
if docuri.uri == uri and docuri.type == 'rel-canonical':
return [uri]
return [docuri.uri for docuri in docuris]