-
Notifications
You must be signed in to change notification settings - Fork 0
/
common.ts
304 lines (261 loc) · 9.13 KB
/
common.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
/**
* Common types and minor utilities.
*
* @copyright Ivan Herman 2023
*
* @packageDocumentation
*/
import * as rdf from '@rdfjs/types';
import * as n3 from 'n3';
import { IDIssuer } from './issueIdentifier';
import { Logger } from './logging';
import { promisifyEventEmitter } from 'event-emitter-promisify';
import { AVAILABLE_HASH_ALGORITHMS } from './config';
/**
* The prefix used for all generated canonical bnode IDs
*/
export const BNODE_PREFIX = "c14n";
/**
* According to the RDF semantics, the correct representation of a dataset is a Set of Quads. That is
* the structure used internally in the algorithm.
*/
export type Quads = rdf.DatasetCore;
/**
* This is the external, "input" view of the dataset
*/
export type InputQuads = Iterable<rdf.Quad>;
/*
* Per spec, the input can be an abstract dataset (ie, Quads, either as a set or an array) or an N-Quads document (ie, a string)
*/
export type InputDataset = InputQuads | string;
export type BNodeId = string;
export type Hash = string;
// export type QuadToNquad = (quad: rdf.Quad) => string;
/**
* BNode labels to Quads mapping. Used in the canonicalization state as the blank node to quad map. See
* the [specification](https://www.w3.org/TR/rdf-canon/#canon-state).
*/
export interface BNodeToQuads {
[index: BNodeId]: rdf.Quad[];
}
/**
* Hash values to BNode labels mapping. Used in the canonicalization state as the hash to bnode map. See
* the [specification](https://www.w3.org/TR/rdf-canon/#canon-state).
*/
export interface HashToBNodes {
[index: Hash]: BNodeId[];
}
/**
* Canonicalization result, i.e., the result structure of the algorithm.
* See the [specification](https://www.w3.org/TR/rdf-canon/#ca.7)
*/
export interface C14nResult {
/** N-Quads serialization of the dataset. */
canonical_form: string;
/** Dataset as a DatasetCore */
canonicalized_dataset: Quads;
/** Mapping of a blank node to its identifier. */
bnode_identifier_map: ReadonlyMap<rdf.BlankNode, BNodeId>;
/** Mapping of an (original) blank node id to its canonical equivalent. */
issued_identifier_map: ReadonlyMap<BNodeId, BNodeId>;
}
/**
* Canonicalization state. See
* the [specification](https://www.w3.org/TR/rdf-canon/#canon-state). (The "hash algorithm" field has been
* added to the state because the it can be parametrized.)
*/
export interface C14nState {
bnode_to_quads: BNodeToQuads;
hash_to_bnodes: HashToBNodes;
canonical_issuer: IDIssuer;
hash_algorithm: string;
}
/**
* Extensions to the canonicalization state. These extensions are not defined by the specification, but are necessary to
* run the code.
*
*/
export interface GlobalState extends C14nState {
/**
* [RDF data factory instance](http://rdf.js.org/data-model-spec/#datafactory-interface), to be used
* to create new terms and quads.
*/
dataFactory: rdf.DataFactory;
/** A logger instance */
logger: Logger;
/** Logger instance's identifier name */
logger_id: string;
/**
* Complexity number: the multiplicative factor that
* sets the value of {@link maximum_n_degree_call} by
* multiplying it with the number of blank nodes
*/
complexity_number: number;
/**
* Maximal number of recursions allowed.
* This value may be modified by the caller
*/
maximum_n_degree_call: number;
/**
* Current recursion level. Initialized to zero, increased every time a recursion occurs
*/
current_n_degree_call: number;
}
/**
* Return structure from a N-degree quad's hash computation, see [the specification](https://www.w3.org/TR/rdf-canon/#hash-nd-quads-algorithm).
*/
export interface NDegreeHashResult {
hash: Hash;
issuer: IDIssuer;
}
/***********************************************************
Various utility functions used by the rest of the code.
***********************************************************/
/**
* Return the hash of a string.
*
* This is the core of the various hashing functions. It is the interface to the Web Crypto API,
* which does the effective calculations.
*
* @param input
* @returns - hash value
*
* @async
*/
export async function computeHash(state: C14nState, input: string): Promise<Hash> {
const encoder = new TextEncoder();
const data = encoder.encode(input);
const hashBuffer = await crypto.subtle.digest(AVAILABLE_HASH_ALGORITHMS[state.hash_algorithm], data);
const hashArray = Array.from(new Uint8Array(hashBuffer));
const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
return hashHex
}
/**
* Convert an array of nquad statements into a single N-Quads document:
* this means concatenating all nquads into a long string. Care should be taken that each
* quad must end with a single `/n` character (see [Canonical N-Quads specification](https://www.w3.org/TR/rdf12-n-quads/#canonical-quads)).
*
* @param nquads
* @returns - N-Quads document as a string
*
*/
export function concatNquads(nquads: string[]): string {
return nquads.map((q: string): string => q.endsWith('\n') ? q : `${q}\n`).join('');
}
/**
* Return the hash of an array of N-Quads statements; per specification, this means
* concatenating all nquads into a long string before hashing.
*
* @param nquads
* @returns - hash value
* @async
*
*/
export async function hashNquads(state: C14nState, nquads: string[]): Promise<Hash> {
// Care should be taken that the final data to be hashed include a single `/n`
// for every quad, before joining the quads into a string that must be hashed
return computeHash(state, concatNquads(nquads));
}
/**
* Serialize an `rdf.Quad` object into single nquad.
*
* @param quad
* @returns - N-Quad string
*/
export function quadToNquad(quad: rdf.Quad): string {
const retval = n3Writer.quadToString(quad.subject, quad.predicate, quad.object, quad.graph);;
return retval.endsWith(' .') ? retval.replace(/ .$/, ' .') : retval;
}
const n3Writer = new n3.Writer();
/**
* Return a nquad serialization of a dataset. This is a utility that external user can use, the library
* doesn't rely on it.
*
* @param quads
* @param sort - whether the quads must be sorted before hash. Defaults to `true`.
* @returns - array of nquads
*/
export function quadsToNquads(quads: InputQuads, sort: boolean = true): string[] {
const retval: string[] = [];
for (const quad of quads) {
retval.push(quadToNquad(quad));
}
if (sort) retval.sort();
return retval;
}
/**
* Hash a dataset. This is done by turning each quad into a nquad, concatenate them, possibly
* sort them, and then hash the result.
*
* @param quads
* @param sort - whether the quads must be sorted before hash. Defaults to `true`.
* @returns - hash value
*
* @async
*/
export async function hashDataset(state: C14nState, quads: InputQuads, sort: boolean = true): Promise<Hash> {
const nquads: string[] = quadsToNquads(quads, sort);
return hashNquads(state, nquads);
}
/**
* Parse an nQuads document into a set of Quads.
*
*
* @param nquads
* @returns parsed dataset
*/
export async function parseNquads(nquads: string): Promise<InputQuads> {
// This version of the function, relying on the streaming parser, has been
// suggested by Jesse Wright(`@jeswr` on github).
const store = new n3.Store();
const parser = new n3.StreamParser({ blankNodePrefix: '' });
const storeEventHandler = store.import(parser);
parser.write(nquads);
parser.end();
await promisifyEventEmitter(storeEventHandler);
return store;
}
/**
* Type guard to see if an object implements the rdf.DatasetCore interface (a.k.a. Quads). If that is
* indeed the case, then the object is considered as "safe": there are no repeated terms, and it is not
* a generator, ie, it can be iterated on several times.
*
* Used at the very beginning of the algorithm, part of a function that stores the quads in a local (n3) data store. By
* checking this, we can avoid unnecessary duplication of a dataset.
*/
export function isQuads(obj: any): obj is Quads {
// Having match is important, because all the other terms are also valid for a Set...
return 'has' in obj && 'match' in obj && 'add' in obj && 'delete' in obj && 'size' in obj;
}
/**
* Replacement of a `Set<rdf.BlankNode>` object: the build-in Set structure does not compare the RDF terms,
* therefore does not filter out duplicate BNode instances.
*
* (Inspired by the TermSet class from @rdfjs/term-set, which could not be used directly due to some
* node.js+typescript issues. This version is stripped down to the strict minimum.)
*/
export class BnodeSet {
private index: Map<string, rdf.BlankNode>;
constructor() {
this.index = new Map();
}
get size() {
return this.index.size;
}
add(term: rdf.BlankNode) {
const key = term.value;
if (!this.index.has(key)) {
this.index.set(key, term);
}
return this;
}
values() {
return new Set(this.index.values());
}
keys() {
return this.values();
}
[Symbol.iterator]() {
return this.index.values();
}
}