-
Notifications
You must be signed in to change notification settings - Fork 11
/
ActorRdfMetadataExtractShapetrees.ts
274 lines (245 loc) · 10.2 KB
/
ActorRdfMetadataExtractShapetrees.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
import type { ActorInitQueryBase } from '@comunica/actor-init-query';
import { QueryEngineBase } from '@comunica/actor-init-query';
import type { MediatorDereferenceRdf } from '@comunica/bus-dereference-rdf';
import type { MediatorHttp } from '@comunica/bus-http';
import type { IActionRdfMetadataExtract, IActorRdfMetadataExtractOutput } from '@comunica/bus-rdf-metadata-extract';
import { ActorRdfMetadataExtract } from '@comunica/bus-rdf-metadata-extract';
import { KeysInitQuery, KeysQueryOperation } from '@comunica/context-entries';
import type { IActorArgs, IActorTest } from '@comunica/core';
import { ActionContext } from '@comunica/core';
import type { IActionContext } from '@comunica/types';
import type * as RDF from '@rdfjs/types';
import parseLink from 'parse-link-header';
import { storeStream } from 'rdf-store-stream';
import { resolve } from 'relative-to-absolute-iri';
import type * as ShEx from 'shexj';
import { Algebra, Util as AlgebraUtil } from 'sparqlalgebrajs';
import { ShapeTree } from './ShapeTree';
// eslint-disable-next-line ts/no-require-imports,ts/no-var-requires
const shexParser = require('@shexjs/parser');
// eslint-disable-next-line ts/no-require-imports,ts/no-var-requires
const shexVisitor = require('@shexjs/visitor').Visitor;
/**
* A comunica Shapetrees RDF Metadata Extract Actor.
*/
export class ActorRdfMetadataExtractShapetrees extends ActorRdfMetadataExtract {
public static readonly IRI_SHAPETREE = 'http://www.w3.org/ns/shapetrees#ShapeTreeLocator';
public static readonly IRI_SHAPETREE_OLD = 'http://shapetrees.org/#ShapeTree';
public readonly mediatorDereferenceRdf: MediatorDereferenceRdf;
public readonly mediatorHttp: MediatorHttp;
public readonly queryEngine: QueryEngineBase;
public constructor(args: IActorRdfMetadataExtractShapetreesArgs) {
super(args);
this.queryEngine = new QueryEngineBase(args.actorInitQuery);
}
public async test(action: IActionRdfMetadataExtract): Promise<IActorTest> {
if (!action.context.get(KeysInitQuery.query)) {
throw new Error(`Actor ${this.name} can only work in the context of a query.`);
}
if (!action.context.get(KeysQueryOperation.operation)) {
throw new Error(`Actor ${this.name} can only work in the context of a query operation.`);
}
return true;
}
public async run(action: IActionRdfMetadataExtract): Promise<IActorRdfMetadataExtractOutput> {
const applicable: ShapeTree[] = [];
const nonApplicable: ShapeTree[] = [];
const shapeTreeLocatorUrl = this.discoverShapeTreeLocator(action.headers);
if (shapeTreeLocatorUrl) {
const shapeTreeLocators = await this.fetchShapeTreesLocatorShapeTrees(shapeTreeLocatorUrl, action.context);
for (const shapeTreeLocator of shapeTreeLocators) {
const shapeTrees = await this.dereferenceShapeTrees(shapeTreeLocator, action.url, action.context);
for (const shapeTree of shapeTrees) {
if (this.shapeTreeMatchesQuery(
shapeTree,
action.context.get(KeysInitQuery.query)!,
action.context.get(KeysQueryOperation.operation)!,
)) {
applicable.push(shapeTree);
} else {
nonApplicable.push(shapeTree);
}
}
}
}
return {
metadata: {
shapetrees: {
applicable,
nonApplicable,
},
},
};
}
/**
* Extracts the shape tree locator URL from the headers
* @param headers A headers record object
*/
public discoverShapeTreeLocator(headers?: Headers): string | undefined {
if (headers) {
const links = parseLink(headers.get('link'));
if (links) {
// TODO: remove old rel type
// eslint-disable-next-line ts/prefer-nullish-coalescing
const shapeTree = links[ActorRdfMetadataExtractShapetrees.IRI_SHAPETREE] ||
links[ActorRdfMetadataExtractShapetrees.IRI_SHAPETREE_OLD];
if (shapeTree) {
return shapeTree.url;
}
}
}
}
/**
* Fetch all shapetrees identified by the given shape tree locator.
* @param shapeTreeLocatorUrl A shape tree locator URL.
* @param context An action context.
*/
public async fetchShapeTreesLocatorShapeTrees(
shapeTreeLocatorUrl: string,
context: IActionContext,
): Promise<string[]> {
// Parse the Shape Tree locator document
const response = await this.mediatorDereferenceRdf.mediate({ url: shapeTreeLocatorUrl, context });
const store = await storeStream(response.data);
// Query the document to extract all Shape Trees
// TODO: is this query correct? Data doesn't correspond to spec.
const bindingsArray = await (await this.queryEngine
.queryBindings(`
PREFIX st: <http://www.w3.org/ns/shapetree#>
SELECT ?shapeTree WHERE {
<${shapeTreeLocatorUrl}> st:hasShapeTreeLocator/st:hasShapeTree ?shapeTree.
}`, { sources: [ store ]})).toArray();
return bindingsArray
.map(bindings => bindings.get('shapeTree')!.value);
}
/**
* Dereference the given shape tree.
* @param shapeTreeReference A shape tree URL.
* @param baseUrl The base URL for URI templates.
* @param context An action context.
*/
public async dereferenceShapeTrees(
shapeTreeReference: string,
baseUrl: string,
context: IActionContext,
): Promise<ShapeTree[]> {
// Parse the Shape Tree document
const response = await this.mediatorDereferenceRdf.mediate({
url: shapeTreeReference,
// TODO: this is just to cope with the problem that demo servers expose use text/plain
mediaType: 'text/turtle',
// TODO: pass dummy context because the demo servers reject anything with DPoP auth
context: new ActionContext(),
});
const store = await storeStream(response.data);
// Query the document to extract all Shapes
const bindingsArray = await (await this.queryEngine
.queryBindings(`
PREFIX st: <http://www.w3.org/ns/shapetree#>
SELECT ?shapeTree ?shape ?uriTemplate WHERE {
<${shapeTreeReference}> st:contains ?shapeTree.
?shapeTree st:validatedBy ?shape;
st:matchesUriTemplate ?uriTemplate.
}`, { sources: [ store ]})).toArray();
return await Promise.all(bindingsArray
.map(async(bindings) => {
let shapeIri = bindings.get('shape')!.value;
// TODO: workaround for incorrect prefix use on https://shapetrees.pub/ts/medical-record/shapetree
if (shapeIri === 'medshape:MedicalRecordShape') {
shapeIri = 'http://shapes.pub/ns/medical-record/shex#MedicalRecordShape';
}
const shapeExpression = await this.dereferenceShape(shapeIri, context);
// TODO: what is the correct base URL for relative URI templates?
const uriTemplate = resolve(bindings.get('uriTemplate')!.value, baseUrl);
return new ShapeTree(
bindings.get('shapeTree')!.value,
shapeExpression,
uriTemplate,
);
}));
}
/**
* Dereference a shape
* @param shapeIri The URL of a shape definition.
* @param _context An action context.
*/
public async dereferenceShape(shapeIri: string, _context: IActionContext): Promise<ShEx.Shape> {
// Fetch the shape
const response = await this.mediatorHttp.mediate({
input: shapeIri,
// TODO: pass dummy context because servers may reject anything with DPoP auth
context: new ActionContext(),
});
let data = await response.text();
// TODO: temp workaround because the test dataset uses the wrong BASE
data = data.replace(
'PREFIX med: <http://shapes.pub/ns/medical-record/terms#>',
'PREFIX med: <https://shapes.pub/ns/medical-record/terms#MedicalRecord>',
);
// Parse as ShEx shape
const parser = shexParser.construct(shapeIri);
const schema: ShEx.Schema = parser.parse(data);
if (schema.shapes) {
for (const shapeDeclaration of schema.shapes) {
const shape = <ShEx.Shape> shapeDeclaration.shapeExpr;
// TODO: workaround for https://github.com/shexjs/shex.js/issues/93
if (shapeDeclaration.id === 'https://shapes.pub/ns/medical-record/MedicalRecordShape') {
shapeDeclaration.id = 'http://shapes.pub/ns/medical-record/shex#MedicalRecordShape';
}
if (shapeDeclaration.id === shapeIri) {
return shape;
}
}
}
throw new Error(`Could not find a shape at ${shapeIri}`);
}
/**
* Check if the given shape tree matches with the current pattern in the global query.
* @param shapeTree A shape tree to match with the query and pattern.
* @param query The original query that is being executed.
* @param pattern The current pattern that is being evaluated and traversed in.
*/
public shapeTreeMatchesQuery(
shapeTree: ShapeTree,
query: Algebra.Operation,
pattern: Algebra.Operation,
): boolean {
// Collect all predicates in the shape
// TODO: improve shape-query matching, by e.g. also matching rdf:type
const visitor = shexVisitor();
const shapePredicates: string[] = [];
visitor.visitTripleConstraint = (tripleConstraint: ShEx.TripleConstraint): void => {
shapePredicates.push(tripleConstraint.predicate);
};
visitor.visitShape(shapeTree.shape);
// Collect all subjects in the original query that match with any of the predicates
// TODO: we can probably re-organize some things to achieve better performance
const subjects: RDF.Term[] = [];
AlgebraUtil.recurseOperation(query, {
[Algebra.types.PATTERN](queryPattern) {
if (shapePredicates.includes(queryPattern.predicate.value)) {
subjects.push(queryPattern.subject);
}
return false;
},
});
// Check if the current pattern has any of the allowed subjects.
return subjects.some(subject => subject.equals(pattern.subject));
}
}
export interface IActorRdfMetadataExtractShapetreesArgs
extends IActorArgs<IActionRdfMetadataExtract, IActorTest, IActorRdfMetadataExtractOutput> {
/**
* An init query actor that is used to query shapes.
* @default {<urn:comunica:default:init/actors#query>}
*/
actorInitQuery: ActorInitQueryBase;
/**
* The Dereference RDF mediator
*/
mediatorDereferenceRdf: MediatorDereferenceRdf;
/**
* The HTTP mediator
*/
mediatorHttp: MediatorHttp;
}