-
Notifications
You must be signed in to change notification settings - Fork 7
/
cmarkit.mli
1875 lines (1440 loc) · 67.3 KB
/
cmarkit.mli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(*---------------------------------------------------------------------------
Copyright (c) 2021 The cmarkit programmers. All rights reserved.
SPDX-License-Identifier: ISC
---------------------------------------------------------------------------*)
(** CommonMark parser and abstract syntax tree.
See {{!page-index.quick}examples}.
{b References.}
{ul
{- John MacFarlane.
{e {{:https://spec.commonmark.org/0.30/}
CommonMark Spec}}. Version 0.30, 2021}} *)
(** {1:ast Abstract syntax tree} *)
(** Text locations.
A text location identifies a text span in a given UTF-8 encoded file
by an inclusive range of absolute {{!Textloc.type-byte_pos}byte} positions
and the {{!Textloc.type-line_pos}line positions} on which those occur. *)
module Textloc : sig
(** {1:fpath File paths} *)
type fpath = string
(** The type for file paths. *)
val file_none : fpath
(** [file_none] is ["-"]. A file path to use when there is none. *)
(** {1:pos Positions} *)
(** {2:byte_pos Byte positions} *)
type byte_pos = int
(** The type for zero-based, absolute, byte positions in text. If
the text has [n] bytes, [0] is the first position and [n-1] is
the last position. *)
val byte_pos_none : byte_pos
(** [byte_pos_none] is [-1]. A position to use when there is none. *)
(** {2:lines Lines} *)
type line_num = int
(** The type for one-based, line numbers in the text. Lines
increment after a {e newline} which is either a line feed ['\n']
(U+000A), a carriage return ['\r'] (U+000D) or a carriage return
and a line feed ["\r\n"] (<U+000D,U+000A>). *)
val line_num_none : line_num
(** [line_num_none] is [-1]. A line number to use when there is none. *)
(** {2:line_pos Line positions} *)
type line_pos = line_num * byte_pos
(** The type for line positions. This identifies a line by its line
number and the absolute byte position following its newline
(or the start of text for the first line). That byte position:
{ul
{- Indexes the first byte of text of the line if the line is non-empty.}
{- Indexes the first byte of the next newline if the line is empty.}
{- Is out of bounds and equal to the text's length for a last empty
line (this includes when the text is empty).}} *)
val line_pos_first : line_pos
(** [line_pos_first] is [1, 0]. Note that this is the only line position
of the empty text. *)
val line_pos_none : line_pos
(** [line_pos_none] is [(line_none, pos_none)]. *)
(** {1:tloc Text locations} *)
type t
(** The type for text locations. A text location identifies a text
span in an UTF-8 encoded file by an inclusive range of absolute
{{!type-byte_pos}byte positions} and the {{!type-line_pos}line positions}
on which they occur.
If the first byte equals the last byte the range contains
exactly that byte. If the first byte is greater than the last
byte this represents an insertion point before the first byte. In
this case information about the last position should be ignored:
it can contain anything. *)
val none : t
(** [none] is a position to use when there is none. *)
val v :
file:fpath -> first_byte:byte_pos -> last_byte:byte_pos ->
first_line:line_pos -> last_line:line_pos -> t
(** [v ~file ~first_byte ~last_byte ~first_line ~last_line] is a text
location with the given arguments, see corresponding accessors for
the semantics. If you don't have a file use {!file_none}. *)
val file : t -> fpath
(** [file l] is [l]'s file. *)
val first_byte : t -> byte_pos
(** [first_byte l] is [l]'s first byte. Irrelevant if {!is_none} is
[true]. *)
val last_byte : t -> byte_pos
(** [last_byte l] is [l]'s last byte. Irrelevant if {!is_none} or {!is_empty}
is [true]. *)
val first_line : t -> line_pos
(** [first_line l] is the line position on which [first_byte l] lies.
Irrelevant if {!is_none} is [true].*)
val last_line : t -> line_pos
(** [last_line l] is the line position on which [last_byte l] lies.
Irrelevant if {!is_none} or {!is_empty} is [true].*)
(** {2:preds Predicates and comparisons} *)
val is_none : t -> bool
(** [is_none t] is [true] iff [first_byte < 0]. *)
val is_empty : t -> bool
(** [is_empty t] is [true] iff [first_byte t > last_byte t]. *)
val equal : t -> t -> bool
(** [equal t0 t1] is [true] iff [t0] and [t1] are equal. This checks
that {!file}, {!first_byte} and {!last_byte} are equal. Line information
is ignored. *)
val compare : t -> t -> int
(** [compare t0 t1] orders [t0] and [t1]. The order is compatible
with {!equal}. Comparison starts with {!file}, follows with {!first_byte}
and ends, if needed, with {!last_byte}. Line information is ignored. *)
(** {2:shrink_and_stretch Shrink and stretch} *)
val set_first : t -> first_byte:byte_pos -> first_line:line_pos -> t
(** [set_first l ~first_byte ~first_line] sets the the first position of
[l] to given values. *)
val set_last : t -> last_byte:byte_pos -> last_line:line_pos -> t
(** [set_last l ~last_byte ~last_line] sets the last position of [l]
to given values. *)
val to_first : t -> t
(** [to_first l] has both first and last positions set to [l]'s first
position. The range spans {!first_byte}. See also {!before}. *)
val to_last : t -> t
(** [to_last l] has both first and last positions set to [l]'s last
position. The range spans {!last_byte}. See also {!after}. *)
val before : t -> t
(** [before t] is the {{!is_empty}empty} text location starting at
{!first_byte}. *)
val after : t -> t
(** [after t] is the empty {{!is_empty}empty} location starting at
[last_byte t + 1]; note that at the end of input this may be an
invalid byte {e index}. The {!first_line} and {!last_line} of the
result is [last_line t]. *)
val span : t -> t -> t
(** [span l0 l1] is the span from the smallest byte position of [l0] and
[l1] to the largest byte position of [l0] and [l1]. The file path is
taken from the greatest byte position. *)
val reloc : first:t -> last:t -> t
(** [reloc ~first ~last] uses the first position of [first], the
last position of [last] and the file of [last]. *)
(** {2:fmt Formatting} *)
val pp_ocaml : Format.formatter -> t -> unit
(** [pp_ocaml] formats text locations like the OCaml compiler. *)
val pp_gnu : Format.formatter -> t -> unit
(** [pp_gnu] formats text locations according to the
{{:https://www.gnu.org/prep/standards/standards.html#Errors}GNU
convention}. *)
val pp : Format.formatter -> t -> unit
(** [pp] is {!pp_gnu}. *)
val pp_dump : Format.formatter -> t -> unit
(** [pp_dump] formats raw data for debugging. *)
end
(** Node metadata.
Holds text locations and custom, client-defined metadata. *)
module Meta : sig
type id = int
(** The type for non-negative metadata identifiers. *)
type t
(** The type for abstract syntax tree node metadata. *)
val none : t
(** [none] is metadata for when there is none, its {!textloc} is
{!Textloc.none}. *)
val make : ?textloc:Textloc.t -> unit -> t
(** [make textloc] is metadata with text location [textloc] (defaults
to {!Textloc.none}) and a fresh identifier (see {!val-id}). *)
val id : t -> id
(** [id m] is an identifier for the metadata. Depending on how you
process the abstract syntax tree this may become non-unique but
the metadata values in an abstract syntax tree returned by
{!Doc.of_string} with [locs:true] have distinct identifiers. *)
val textloc : t -> Textloc.t
(** [textloc m] is the source location of the syntactic construct [m]
is attached to. *)
val with_textloc : keep_id:bool -> t -> Textloc.t -> t
(** [with_textloc ~keep_id m textloc] is metadata [m] with text location
[textloc] and a fresh id, unless [keep_id] is [true]. *)
(** {1:preds Predicates and comparisons} *)
val equal : t -> t -> bool
(** [equal m0 m1] is [true] if [m0] and [m1] have the same {!val-id}.
Note that they may have different {{!custom}metadata.} *)
val compare : t -> t -> int
(** [compare m0 m1] is a total order on metadata {!val-id}s compatible with
{!equal}. *)
val is_none : t -> bool
(** [is_none m] is [equal none m]. *)
(** {1:custom Custom metadata}
{b Warning.} Operating on custom metadata never changes
{!val-id}. It is possible for two meta values to have the same
id and different metadata. *)
type 'a key
(** The type for custom metadata keys. *)
val key : unit -> 'a key
(** [key ()] is a new metadata key. *)
val mem : 'a key -> t -> bool
(** [mem k m] is [true] iff [k] is bound in [m]. *)
val add : 'a key -> 'a -> t -> t
(** [add k v m] is [m] with key [k] bound to [v]. *)
val tag : unit key -> t -> t
(** [tag k m] is [add k () m]. *)
val remove : 'a key -> t -> t
(** [remove k m] is [m] with key [k] unbound in [v]. *)
val find : 'a key -> t -> 'a option
(** [find k m] the value of [k] in [m], if any. *)
end
type 'a node = 'a * Meta.t
(** The type for abstract syntax tree nodes. The data of type ['a] and its
metadata. *)
(** Types for layout information.
Values of these types do not represent document data. They are
used to recover document source layout informations when the
abstract syntax tree cannot represent them.
See {{!Cmarkit_commonmark.layout}source layout preservation}
for more information.
For programmatically generated nodes, values of these types can be
left empty or filled with a desired layout. Except for the
{{!Cmarkit_commonmark}CommonMark renderer} these values are usually
ignored. *)
module Layout : sig
type blanks = string
(** The type for blanks layout. This is only made of spaces and tabs. *)
type nonrec string = string
(** The type for string layout. For example the art of thematic breaks
or code fences. *)
type nonrec char = char
(** The type for character layout. For example the character used for
an emphasis or an unordered list marker. *)
type count = int
(** The type for some kind of layout count. Usually a character
count. *)
type indent = int
(** The type for block indentation. Mostly between 0-3. *)
val string : ?meta:Meta.t -> string -> string node
(** [string s] is a layout string with meta data [meta]
(defaults to {!Meta.none}). *)
val empty : string node
(** [empty] is [string ""]. *)
end
(** Block lines.
In CommonMark blocks, a "line" does not necessarily correspond to
a line in the source plain text. For example the lines of a
paragraph in a block quote are the lines stripped from the block
quote markers. We call the line resulting from stripping the
block structure preceeding a given block a {e block line}. *)
module Block_line : sig
(** {1:lines Lines} *)
type t = string node
(** The type for block lines. *)
val to_string : t -> string
(** [to_string l] is (fst l). *)
val list_textloc : t list -> Textloc.t
(** [list_textloc ls] is a text location spanning the lines [ls]
This is {!Textloc.none} on [[]]. *)
val list_of_string : ?meta:Meta.t -> string -> t list
(** [list_of_string s] cuts [s] on newlines. [meta] is used for
all nodes, default to [Meta.none]. *)
(** {1:tight_lines Tight lines} *)
type tight = Layout.blanks * t
(** The type for tight block lines. A block line with its
initial blanks trimmed but kept for layout. *)
val tight_to_string : tight -> string
(** [tight_to_string l] is [(fst (snd l))]. *)
val tight_list_textloc : tight list -> Textloc.t
(** [tigh_list_textloc ls] is a text location spanning the lines [ls]
This is {!Textloc.none} on [[]]. *)
val tight_list_of_string : ?meta:Meta.t -> string -> tight list
(** [list_of_string s] cuts [s] on newlines and computes the blanks
(except on the first line where they are part of the
data). [meta] is used for all nodes, default to [Meta.none]. *)
(** {1:blank_lines Blank lines} *)
type blank = Layout.blanks node
(** The type for blank block lines. *)
end
(** Labels.
Labels are used by
{{:https://spec.commonmark.org/0.30/#reference-link}reference links} to
refer to the {{!Label.definitions}definitions} of
{{:https://spec.commonmark.org/0.30/#link-reference-definitions}
link reference definitions},
{{!Cmarkit.ext_footnote_def}footnote definitions} and your own
{{!Label.resolvers}interpretations}. *)
module Label : sig
(** {1:label Labels} *)
type key = string
(** The type for label keys. These are
{{:https://spec.commonmark.org/0.30/#link-label}link labels}
normalized for {{:https://spec.commonmark.org/0.30/#matches}matching}. *)
type t
(** The type for {{:https://spec.commonmark.org/0.30/#link-label}link
labels}. *)
val make : ?meta:Meta.t -> key:string -> Block_line.tight list -> t
(** [make key text] is a label with key [id] and unormalized text [text]. *)
val with_meta : Meta.t -> t -> t
(** [with_meta m l] is [l] with meta [m]. *)
val meta : t -> Meta.t
(** [meta k] is metadata for [k]. *)
val key : t -> key
(** [key_id l] is the label's key. If [l] comes out of a parse this
[l]'s normalized {!text}. *)
val text : t -> Block_line.tight list
(** [text l] is the text of [l]. *)
val text_to_string : t -> string
(** [text_to_string l] is the lines of {!text} separated
by spaces. In contrast to {!val-key} this has not gone
throught {{:https://spec.commonmark.org/0.30/#matches}normalization}.
*)
val compare : t -> t -> int
(** [compare l0 l1] is [String.compare (key l0) (key l1)]. *)
(** {1:definitions Definitions}
A label definition is the content referenced by its {!val-key}.
Labels are defined in documents via footnotes and link reference
definitions. Additional label definitions can be added before
parsing starts by using the [defs] argument of
{!Doc.of_string}. They can also be manipulated and
created on the fly during parsing by using a
{{!resolvers}resolver}. *)
type def = ..
(** The type for label definitions.
See for example {!Link_definition.extension-Def} or
{!Block.Footnote.extension-Def}. *)
(** Label key maps. *)
module Map : Map.S with type key := key
type defs = def Map.t
(** The type for label definitions. Maps label keys to their definition. *)
(** {1:resolvers Resolvers}
To have more control over the label definitions used in a
document, the [defs] argument of {!Doc.of_string} can be
specified to pre-populate the label definitions used during parsing;
for example with those of a previously parsed document.
In addition the [resolver] argument can be specified to:
{ol
{- Alter or suppress label definitions made by link reference definitions
and footnote definitions. It can also be used to warn, by
side effect, on multiple label definitions.}
{- Alter, or suppress label references on reference links and images –
which happen after all label definitions have been made. You can
define the actual label that will be used for resolving
the reference to its definition.}}
In particular 2. can be used to create synthetic label definitions
on undefined label references. This provides the ability to treat
the very liberal
{{:https://spec.commonmark.org/0.30/#link-label}link label}
syntax as a domain specific language of yours (e.g. for data binding).
Note that parsing is not finished when resolvers are invoked
this is the reason why you don't get access to the definition's
data during resolution.
See {{!resolver_example}an example}. *)
type context =
[ `Def of t option * t (** Label definitions *)
| `Ref of [ `Link | `Image ] * t * t option (** Label references *) ]
(** The type for resolver contexts. See {!type-resolver}. *)
type resolver = context -> t option
(** The type for resolvers. [context] is:
{ul
{- [`Def (prev, current)] when we just hit a
{{:https://spec.commonmark.org/0.30/#link-reference-definitions}
link reference definition} or
{{!Cmarkit.ext_footnote_def}footnote definition} that defines
the label [current]. If there is already a definition for
[current]'s {!val-key} it is provided in [prev] (whose {!meta} has
the location of the definition if you parse with locations).
If [None] is returned the [current] definition is ignored,
and definition [prev] (if any) is kept for the document. If
[Some l] is returned [l]'s key will be bound to the parsed
definition for [current] in {!Doc.defs} at the end of parsing.
The result of the resolver is stored in the abstract syntax tree and
available via {!Link_definition.defined_label} and
{!Block.Footnote.defined_label}.}
{- [`Ref (kind, ref, def)] when we just hit a link or image
referencing label [ref]. [def] is the label defining [ref]'s {!val-key}
in the document (if any). The result of the resolver is the label
stored for resolving the reference to its definition in the resulting
{!Inline.module-Link} node;
[None] means that [label] is undefined and the inline becomes
{!Inline.extension-Text} like in CommonMark.}}
See {{!resolver_example}an example} and the {!default_resolver}. *)
val default_resolver : resolver
(** [default_resolver] is the default resolver.
This resolves according to the CommonMark specification.
The first label definition always takes over subsequent
ones and resolution is left untouched (i.e. a label has to be
defined in the document to be used):
{[
let default_resolver = function
| `Def (None, l) -> Some l
| `Def (Some _, _) -> None (* Previous takes over *)
| `Ref (_, _, def) -> def
]} *)
(** {1:resolver_example Resolver example}
In this example we assume references to undefined labels denote
links to pages or media in our wiki and want to process them
them later via a {{!Mapper}tree transformation} or in a
{{!Cmarkit_renderer.example}renderer extension}.
We devise a resolver to create synthetic labels on any undefined
label so that the CommonMark parser does not turn them into text.
{[
let wikilink = Cmarkit.Meta.key () (* A meta key to recognize them *)
let make_wikilink label = (* Just a placeholder label definition *)
let meta = Cmarkit.Meta.tag wikilink (Cmarkit.Label.meta label) in
Cmarkit.Label.with_meta meta label
let with_wikilinks = function
| `Def _ as ctx -> Cmarkit.Label.default_resolver ctx
| `Ref (_, _, (Some _ as def)) -> def (* As per doc definition *)
| `Ref (_, ref, None) -> Some (make_wikilink ref)
]}
*)
end
(** Link definitions. *)
module Link_definition : sig
(** {1:layout Layout} *)
type layout =
{ indent : Layout.indent; (** Amount of indentation, [0] on inline links. *)
angled_dest : bool; (** [true] if destination is between [<…>]. *)
before_dest : Block_line.blank list; (** Blanks to destination. *)
after_dest : Block_line.blank list; (** Blanks after destination. *)
title_open_delim : Layout.char;
(** Title open delimiter (['\"'], ['('], …) *)
after_title : Block_line.blank list;
(** Blanks after title (inline links). *) }
(** The type for link reference layout. *)
val layout_for_dest : string -> layout
(** [layout_for_dest d] computes a layout value for destination [d]. This
just determines if [angled_dest] needs to be [true]. *)
(** {1:link_defs Link definitions} *)
type t
(** The type for representing
{{:https://spec.commonmark.org/0.30/#link-reference-definitions}
link references definitions} and
{{:https://spec.commonmark.org/0.30/#inline-link}inline links}. *)
val make :
?layout:layout -> ?defined_label:Label.t option -> ?label:Label.t ->
?dest:string node -> ?title:Block_line.tight list -> unit -> t
(** [make ()] is a link reference with given parameters. If [dest] is
given and [layout] is not, the latter is computed with
{!layout_for_dest}. [label] is a label if the link is defined
via a link reference definition. [defined_label] defaults to
[label]. *)
val layout : t -> layout
(** [layout ld] is the layout of [ld]. *)
val label : t -> Label.t option
(** [label ld] is [None] if this is a link definition for an inline
link. It is [Some l], if [ld] is a link reference
definition. [l] is the label as found in the text. The result
of the resolver is in {!defined_label}. *)
val defined_label : t -> Label.t option
(** [defined_label ld] is the label determined by the {!Label.type-resolver}
for the link definition reference. The label as found
in the source text is in {!label}. If this is [None] either
it's a link definition for an inline link or the resolver deleted
the label definition. *)
val dest : t -> string node option
(** [dest ld] is the link destination of [ld]. [None] means
there was no destination. CommonMark renders that as an empty
[href] in HTML. *)
val title : t -> Block_line.tight list option
(** [title ld] is the title of the reference, if any. *)
(** {1:labeldef As label definitions} *)
type Label.def += Def of t node (** *)
(** A label definition for links. *)
end
(** Inlines.
{b Note.} Document data in inline nodes is always stored
{{:https://spec.commonmark.org/0.30/#backslash-escapes}unescaped} and
with {{:https://spec.commonmark.org/0.30/#entity-and-numeric-character-references}entity and character references} resolved. *)
module Inline : sig
(** {1:inlines Inlines} *)
type t = ..
(** The type for inlines. *)
(** Autolinks. *)
module Autolink : sig
type t
(** The type for
{{:https://spec.commonmark.org/0.30/#autolink}autolinks}. *)
val make : string node -> t
(** [autolink link] is an autolink for [link]
which must be a CommonMark
{{:https://spec.commonmark.org/0.30/#absolute-uri}absolute URI}
or a CommonMark
{{:https://spec.commonmark.org/0.30/#email-address}email
address}. *)
val is_email : t -> bool
(** [is_email a] is [true] iff {!link}[ a] is
a CommonMark
{{:https://spec.commonmark.org/0.30/#email-address}email
address}. *)
val link : t -> string node
(** [link a] is the CommonMark
{{:https://spec.commonmark.org/0.30/#absolute-uri}absolute URI} or
{{:https://spec.commonmark.org/0.30/#email-address}email address}. *)
end
(** Hard and soft breaks *)
module Break : sig
type type' =
[ `Hard (** {{:https://spec.commonmark.org/0.30/#hard-line-breaks}
Hard line break.} *)
| `Soft (** {{:https://spec.commonmark.org/0.30/#soft-line-breaks}
Soft line break.} *) ]
(** The type for types of line breaks. *)
type t
(** The type for
{{:https://spec.commonmark.org/0.30/#hard-line-breaks}hard}
and
{{:https://spec.commonmark.org/0.30/#soft-line-breaks}soft}
line breaks. *)
val make :
?layout_before:Layout.string node -> ?layout_after:Layout.blanks node ->
type' -> t
(** [make type'] is a new break of type [type']. Layout values default
to {!Layout.empty}. *)
val type' : t -> type'
(** [type' b] is the type of [b]. *)
val layout_before : t -> Layout.string node
(** [layout_before b] is the layout before the newline, spaces
or possibly ['\'] for hard breaks. *)
val layout_after : t -> Layout.blanks node
(** [layout_after] are blanks on the new {e block line}. *)
end
(** Code spans. *)
module Code_span : sig
type t
(** The type for
{{:https://spec.commonmark.org/0.30/#code-spans}code spans}. *)
val make : backtick_count:Layout.count -> Block_line.tight list -> t
(** [make ~backtick_count code_layout] is a code span with given
parameters.
{b Warning.} Nothing is made to ensure correctness of the
data, use {!of_string} to compute the right amount of
backticks. *)
val of_string : ?meta:Meta.t -> string -> t
(** [of_string s] is a code span for [s]. [s] can start with or
include backticks; the appropriate minimal backtick count and
possible needed leading and trailing space are computed
accordingly. If [s] contains newlines, blanks after newlines
are treated as layout like during parsing. [meta] is used for
the lines of the resulting code layout (see {!code_layout}). *)
val backtick_count : t -> Layout.count
(** [backtick_count cs] is the number of delimiting backticks. *)
val code : t -> string
(** [code cs] computes from {!code_layout} the code in the span [cs]. *)
val code_layout : t -> Block_line.tight list
(** [code_layout cs] is the code data in a form that allows layout
preservation.
The actual code data is the tight block lines concatenated and
separated by space and if the result starts and ends with a
space and is not only made of spaces, these should be
dropped. The {!code} function does all that for you. *)
end
(** Emphasis and strong emphasis. *)
module Emphasis : sig
type inline := t
type t
(** The type for
{{:https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis}
emphasis and strong emphasis}. *)
val make : ?delim:Layout.char -> inline -> t
(** [make i] is an emphasis on [i]. [delim] is the delimiter
used it should be either ['*'] or ['_']. *)
val inline : t -> inline
(** [inline e] is the emphasised inline. *)
val delim : t -> Layout.char
(** [delim e] is the delimiter used for emphasis, should be
either ['*'] or ['_']. *)
end
(** Links. *)
module Link : sig
type inline := t
type reference_layout =
[ `Collapsed
(** {{:https://spec.commonmark.org/0.30/#collapsed-reference-link}
Collapsed reference link} *)
| `Full
(** {{:https://spec.commonmark.org/0.30/#full-reference-link}
Full reference link} *)
| `Shortcut
(** {{:https://spec.commonmark.org/0.30/#shortcut-reference-link}
Shortcut reference link} *) ]
(** The type for reference link layouts. *)
type reference =
[ `Inline of Link_definition.t node
(** {{:https://spec.commonmark.org/0.30/#inline-link}Inline link} *)
| `Ref of reference_layout * Label.t * Label.t
(** {{:https://spec.commonmark.org/0.30/#reference-link}Reference
links}. First label is the label of the reference, second
label is the label of the referenced definition. *) ]
(** The type for references. *)
type t
(** The type for {{:https://spec.commonmark.org/0.30/#links}links}
and {{:https://spec.commonmark.org/0.30/#images}images}. *)
val make : inline -> reference -> t
(** [make i ref] is a link for text [i] and link reference [ref].
If you plan to render to CommonMark and this is not an inline
reference you should include a
{!Block.extension-Link_reference_definition} (or
{!Block.extension-Ext_footnote_definition}) for [ref]
somewhere in the document, otherwise the reference will not
parse back. *)
val text : t -> inline
(** [text l] is the text of the link. *)
val reference : t -> reference
(** [reference l] is the reference of the link. *)
val referenced_label : t -> Label.t option
(** [referenced_label l] is the label referenced by the label of [l].
This is the second label of [`Ref _] or [None] on inline
references.*)
val reference_definition : Label.defs -> t -> Label.def option
(** [reference_definition defs l] is the definition of [l]'s
reference. If [l] is an [`Inline] reference this returns its
link definition wrapped in a {!Link_definition.Def}. If [l] is
[`Ref] this looks up the {!referenced_label} in [defs]. *)
val is_unsafe : string -> bool
(** [is_unsafe url] is [true] if [url] is deemed unsafe. This is
the case if [url] starts with a caseless match of
[javascript:], [vbscript:], [file:] or [data:] except if
[data:image/{gif,png,jpeg,webp}]. These rules were taken from
{{:https://github.com/commonmark/cmark}[cmark]}, the C
reference implementation of CommonMark and are likely
incomplete. If you are trying to prevent XSS you should
post-process rendering outputs with a dedicated HTML sanitizer. *)
end
(** Raw HTML. *)
module Raw_html : sig
type t = Block_line.tight list
(** The type for {{:https://spec.commonmark.org/0.30/#raw-html}inline raw
HTML} (can span multiple lines).
{b Warning.} If you create HTML blocks using
{!Block_line.tight_list_of_string} you should make sure the
resulting lines satisfy the contraints of CommonMark raw HTML
(one way is to parse them instead). *)
end
(** Text. *)
module Text : sig
type t = string
(** The type for
{{:https://spec.commonmark.org/0.30/#textual-content}textual content}.
Normally these strings should not contain newlines. This can
however happen if the source had newlines as
{{:https://spec.commonmark.org/0.30/#entity-and-numeric-character-references}character references}. *)
end
type t +=
| Autolink of Autolink.t node
| Break of Break.t node
| Code_span of Code_span.t node
| Emphasis of Emphasis.t node
| Image of Link.t node
| Inlines of t list node (** Splicing *)
| Link of Link.t node
| Raw_html of Raw_html.t node
| Strong_emphasis of Emphasis.t node
| Text of Text.t node (** *)
(** The
CommonMark {{:https://spec.commonmark.org/0.30/#inlines}inlines}. *)
val empty : t
(** [empty] is [Inlines ([], Meta.none)]. *)
(** {1:exts Extensions}
See the description of {{!Cmarkit.extensions}extensions}. *)
(** Strikethrough. *)
module Strikethrough : sig
type inline := t
type t
(** The type for {{!Cmarkit.ext_strikethrough}strikethrough}. *)
val make : inline -> t
(** [make i] is [i] with a strikethrough. *)
val inline : t -> inline
(** [inline s] is the inline with a strikethrough. *)
end
(** Math span. *)
module Math_span : sig
type t
(** The type for {{!Cmarkit.ext_math_inline}math spans}. *)
val make : display:bool -> Block_line.tight list -> t
(** [make tex_layout] is an inline or display math span with given
T{_E}X code. *)
val display : t -> bool
(** [display ms] is [true] if the span should be on its own line. *)
val tex : t -> string
(** [tex ms] is the inline math T{_E}X code of [ms] *)
val tex_layout : t -> Block_line.tight list
(** [tex_layout ms] is inline math T{_E}X code in a form that
allows layout preservation.
The acual code data is the tight block lines concatenated and
separated by space. The {!tex} function does that for you. *)
end
type t +=
| Ext_strikethrough of Strikethrough.t node
| Ext_math_span of Math_span.t node (** *)
(** The supported inline extensions. These inlines are only parsed when
{!Doc.of_string} is called with [strict:false]. *)
(** {1:funs Functions} *)
val is_empty : t -> bool
(** [is_empty i] is [true] if [i] is [Inline ([], _)] or [Text ("", _)]. *)
val meta : ?ext:(t -> Meta.t) -> t -> Meta.t
(** [meta ~ext i] is the metadata of [i].
[ext] is called on cases not defined in this module. The default
raises [Invalid_argument]. *)
val normalize : ?ext:(t -> t) -> t -> t
(** [normalize i] has the same content as [i] but is such that for any
occurence of [Inlines (is, _)] in [i] the list of inlines [is]:
{ol
{- [is] is not a singleton list.}
{- Has no two consecutive [Text _] cases. If that occurs the texts are
concatenated, the meta of the first one is kept and its text
location extended to include the second one.}
{- Has no [Inlines _] case. The meta is dropped and the nested
inlines are spliced in [is] where the case occurs.}}
[ext] is called on cases not defined in this module. The default
raises [Invalid_argument]. *)
val to_plain_text :
?ext:(break_on_soft:bool -> t -> t) -> break_on_soft:bool ->
t -> string list list
(** [to_plain_text ~ext ~break_on_soft i] has the plain text of [i]
as a sequence of lines represented by a list of strings to be
concatenated. If [break_on_soft] is [true] soft line breaks
are turned into hard line breaks. To turn the result [r]
in a single string apply:
{[ String.concat "\n" (List.map (String.concat "") r) ]}
[ext] is called on cases not defined in this module, it should
compile extensions to one of these cases. The default raises
[Invalid_argument]. *)
val id : ?buf:Buffer.t -> ?ext:(break_on_soft:bool -> t -> t) -> t -> string
(** [id ?buf i] derives an identifier for inline [i] using [buf] as
scratch space (one is created if unspecified).
This converts [i] to plain text using {!Inline.to_plain_text},
then applies the same
{{:https://spec.commonmark.org/0.30/#matches}normalization}
performed on labels, maps spaces to character [-] (U+002D),
drops {{:https://spec.commonmark.org/0.30/#unicode-punctuation-character}
Unicode punctuation characters} except [-] (U+002D) and [_] ([U+005F]).
[ext] is given to {!Inline.to_plain_text}. *)
end
(** Blocks. *)
module Block : sig
(** {1:blocks Blocks} *)
type t = ..
(** The type for blocks. *)
(** Blank lines. *)
module Blank_line : sig
type t = Layout.blanks
(** The type for
{{:https://spec.commonmark.org/0.30/#blank-lines}blank lines}.
These can be ignored during rendering, they are kept for layout. *)
end
(** Block quotes. *)
module Block_quote : sig
type block := t
type t
(** The type for {{:https://spec.commonmark.org/0.30/#block-quotes}
block quotes}. *)
val make : ?indent:Layout.indent -> block -> t
(** [make b] quotes block [b]. *)
val indent : t -> Layout.indent
(** [indent bq] is the indentation to the block quote
marker found on the first line. *)
val block : t -> block
(** [block bq] is the quoted block. *)
end
(** Code blocks. *)
module Code_block : sig
type fenced_layout =
{ indent : Layout.indent; (** Indent to opening fence *)
opening_fence : Layout.string node;
(** Opening fence (before info string). *)
closing_fence : Layout.string node option;
(** Closing fence (if any). *) }
(** The type for fenced code block layouts. *)
type layout = [ `Indented | `Fenced of fenced_layout ]
(** The type for code block layouts. *)
type t
(** The type for
{{:https://spec.commonmark.org/0.30/#indented-code-block}
indented} and
{{:https://spec.commonmark.org/0.30/#fenced-code-blocks}fenced}
code blocks. *)
val make :
?layout:layout -> ?info_string:string node -> Block_line.t list -> t
(** [make ?layout ?info_string code] is a code block with given
parameters. [layout] defaults to a fenced layout. If [layout]
is [`Indented] and an [info_string] is provided, the layout is
switched to [`Fenced]. *)
val layout : t -> layout
(** [layout cb] is the layout of [cb]. *)