mirrored from git://git.sv.gnu.org/emacs.git
-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
treesit.el
3712 lines (3264 loc) · 152 KB
/
treesit.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;;; treesit.el --- tree-sitter utilities -*- lexical-binding: t -*-
;; Copyright (C) 2021-2023 Free Software Foundation, Inc.
;; Maintainer: 付禹安 (Yuan Fu) <casouri@gmail.com>
;; Keywords: treesit, tree-sitter, languages
;; Package: emacs
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;; This file is the Lisp counterpart of treesit.c. Together they
;; provide tree-sitter integration for Emacs. This file contains
;; convenient functions that are more idiomatic and flexible than the
;; exposed C API of tree-sitter. It also contains frameworks for
;; integrating tree-sitter with font-lock, indentation, activating and
;; deactivating tree-sitter, debugging tree-sitter, etc.
;;; Code:
(eval-when-compile (require 'subr-x)) ; For `string-join'.
(require 'cl-lib)
(require 'font-lock)
(require 'seq)
;;; Function declarations
(declare-function treesit-language-available-p "treesit.c")
(declare-function treesit-language-version "treesit.c")
(declare-function treesit-parser-p "treesit.c")
(declare-function treesit-node-p "treesit.c")
(declare-function treesit-compiled-query-p "treesit.c")
(declare-function treesit-query-p "treesit.c")
(declare-function treesit-query-language "treesit.c")
(declare-function treesit-node-parser "treesit.c")
(declare-function treesit-parser-create "treesit.c")
(declare-function treesit-parser-delete "treesit.c")
(declare-function treesit-parser-list "treesit.c")
(declare-function treesit-parser-buffer "treesit.c")
(declare-function treesit-parser-language "treesit.c")
(declare-function treesit-parser-tag "treesit.c")
(declare-function treesit-parser-root-node "treesit.c")
(declare-function treesit-parser-set-included-ranges "treesit.c")
(declare-function treesit-parser-included-ranges "treesit.c")
(declare-function treesit-parser-add-notifier "treesit.c")
(declare-function treesit-node-type "treesit.c")
(declare-function treesit-node-start "treesit.c")
(declare-function treesit-node-end "treesit.c")
(declare-function treesit-node-string "treesit.c")
(declare-function treesit-node-parent "treesit.c")
(declare-function treesit-node-child "treesit.c")
(declare-function treesit-node-check "treesit.c")
(declare-function treesit-node-field-name-for-child "treesit.c")
(declare-function treesit-node-child-count "treesit.c")
(declare-function treesit-node-child-by-field-name "treesit.c")
(declare-function treesit-node-next-sibling "treesit.c")
(declare-function treesit-node-prev-sibling "treesit.c")
(declare-function treesit-node-first-child-for-pos "treesit.c")
(declare-function treesit-node-descendant-for-range "treesit.c")
(declare-function treesit-node-eq "treesit.c")
(declare-function treesit-pattern-expand "treesit.c")
(declare-function treesit-query-expand "treesit.c")
(declare-function treesit-query-compile "treesit.c")
(declare-function treesit-query-capture "treesit.c")
(declare-function treesit-search-subtree "treesit.c")
(declare-function treesit-search-forward "treesit.c")
(declare-function treesit-induce-sparse-tree "treesit.c")
(declare-function treesit-subtree-stat "treesit.c")
(declare-function treesit-node-match-p "treesit.c")
(declare-function treesit-available-p "treesit.c")
(defvar treesit-thing-settings)
;;; Custom options
;; Tree-sitter always appear as treesit in symbols.
(defgroup treesit nil
"Incremental parser.
It is used to enhance major mode features like font-lock,
indent, imenu, etc."
:group 'tools
:version "29.1")
(defcustom treesit-max-buffer-size
(let ((mb (* 1024 1024)))
;; 40MB for 64-bit systems, 15 for 32-bit.
(if (or (< most-positive-fixnum (* 2.0 1024 mb))
;; 32-bit system with wide ints.
(string-search "--with-wide-int" system-configuration-options))
(* 15 mb)
(* 40 mb)))
"Maximum buffer size (in bytes) for enabling tree-sitter parsing.
A typical tree-sitter parser needs 10 times as much memory as the
buffer it parses. Also, the tree-sitter library has a hard limit
of max unsigned 32-bit value for byte offsets into buffer text."
:type 'integer
:version "29.1")
;;; Parser API supplement
(defun treesit-parse-string (string language)
"Parse STRING using a parser for LANGUAGE.
Return the root node of the syntax tree."
(with-temp-buffer
(insert string)
(treesit-parser-root-node
(treesit-parser-create language))))
(defvar-local treesit-language-at-point-function nil
"A function that returns the language at point.
This is used by `treesit-language-at', which is used by various
functions to determine which parser to use at point.
The function is called with one argument, the position of point.
In general, this function should call `treesit-node-at' with an
explicit language (usually the host language), and determine the
language at point using the type of the returned node.
DO NOT derive the language at point from parser ranges. It's
cumbersome and can't deal with some edge cases.")
(defun treesit-language-at (position)
"Return the language at POSITION.
This function assumes that parser ranges are up-to-date. It
returns the return value of `treesit-language-at-point-function'
if it's non-nil, otherwise it returns the language of the first
parser in `treesit-parser-list', or nil if there is no parser.
In a multi-language buffer, make sure
`treesit-language-at-point-function' is implemented! Otherwise
`treesit-language-at' wouldn't return the correct result."
(if treesit-language-at-point-function
(funcall treesit-language-at-point-function position)
(when-let ((parser (car (treesit-parser-list))))
(treesit-parser-language parser))))
;;; Node API supplement
(define-error 'treesit-no-parser "No available parser for this buffer"
'treesit-error)
(defun treesit-node-buffer (node)
"Return the buffer in which NODE belongs."
(treesit-parser-buffer
(treesit-node-parser node)))
(defun treesit-node-language (node)
"Return the language symbol that NODE's parser uses."
(treesit-parser-language
(treesit-node-parser node)))
(defun treesit-node-at (pos &optional parser-or-lang named)
"Return the leaf node at position POS.
A leaf node is a node that doesn't have any child nodes.
The returned node's span covers POS: the node's beginning is before
or at POS, and the node's end is after POS.
If no such node exists, but there's a leaf node which ends at POS,
return that node.
Otherwise (e.g., when POS is on whitespace between two leaf
nodes), return the first leaf node after POS.
If there is no leaf node after POS, return the first leaf node
before POS.
Return nil if no leaf node can be returned. If NAMED is non-nil,
only look for named nodes.
If PARSER-OR-LANG is a parser, use that parser; if PARSER-OR-LANG
is a language, find the first parser for that language in the
current buffer, or create one if none exists; If PARSER-OR-LANG
is nil, try to guess the language at POS using `treesit-language-at'.
If there's a local parser at POS, the local parser takes priority
unless PARSER-OR-LANG is a parser, or PARSER-OR-LANG is a
language and doesn't match the language of the local parser."
(let* ((root (if (treesit-parser-p parser-or-lang)
(treesit-parser-root-node parser-or-lang)
(or (when-let ((parser
(car (treesit-local-parsers-at
pos parser-or-lang))))
(treesit-parser-root-node parser))
(treesit-buffer-root-node
(or parser-or-lang
(treesit-language-at pos))))))
(node root)
(node-before root)
(pos-1 (max (1- pos) (point-min)))
next)
(when node
;; This is very fast so no need for C implementation.
(while (setq next (treesit-node-first-child-for-pos
node pos named))
(setq node next))
;; If POS is at the end of buffer, after all the text, we will
;; end up with NODE = root node. Instead of returning nil,
;; return the last leaf node in the tree for convenience.
(if (treesit-node-eq node root)
(progn
(while (setq next (treesit-node-child node -1 named))
(setq node next))
node)
;; Normal case, where we found a node.
(if (<= (treesit-node-start node) pos)
node
;; So the node we found is completely after POS, try to find
;; a node whose end equals to POS.
(while (setq next (treesit-node-first-child-for-pos
node-before pos-1 named))
(setq node-before next))
(if (eq (treesit-node-end node-before) pos)
node-before
node))))))
(defun treesit-node-on (beg end &optional parser-or-lang named)
"Return the smallest node covering BEG to END.
BEWARE! Calling this function on an empty line that is not
inside any top-level construct (function definition, etc.) most
probably will give you the root node, because the root node is
the smallest node that covers that empty line. You probably want
to use `treesit-node-at' instead.
Return nil if none was found. If NAMED is non-nil, only look for
named node.
If PARSER-OR-LANG is a parser, use that parser; if PARSER-OR-LANG
is a language, find the first parser for that language in the
current buffer, or create one if none exists; If PARSER-OR-LANG
is nil, try to guess the language at BEG using `treesit-language-at'.
If there's a local parser between BEG and END, try to use that
parser first."
(let* ((lang-at-point (treesit-language-at beg))
(root (if (treesit-parser-p parser-or-lang)
(treesit-parser-root-node parser-or-lang)
(or (when-let ((parser
(car (treesit-local-parsers-on
beg end (or parser-or-lang
lang-at-point)))))
(treesit-parser-root-node parser))
(treesit-buffer-root-node
(or parser-or-lang lang-at-point))))))
(treesit-node-descendant-for-range root beg (or end beg) named)))
(defun treesit-node-top-level (node &optional pred include-node)
"Return the top-level equivalent of NODE.
Specifically, return the highest parent of NODE that has the same
type as it. If no such parent exists, return nil.
If PRED is non-nil, match each parent's type with PRED rather
than using NODE's type. PRED can also be a predicate function,
and more. See `treesit-thing-settings' for details.
If INCLUDE-NODE is non-nil, return NODE if it satisfies PRED."
(let ((pred (or pred (rx bos (literal (treesit-node-type node)) eos)))
(result nil))
(cl-loop for cursor = (if include-node node
(treesit-node-parent node))
then (treesit-node-parent cursor)
while cursor
if (treesit-node-match-p cursor pred t)
do (setq result cursor))
result))
(defun treesit-buffer-root-node (&optional language tag)
"Return the root node of the current buffer.
Use the first parser in the parser list if LANGUAGE is omitted.
If LANGUAGE is non-nil, use the first parser for LANGUAGE with
TAG in the parser list, or create one if none exists. TAG
defaults to nil."
(if-let ((parser
(if language
(treesit-parser-create language nil nil tag)
(or (car (treesit-parser-list))
(signal 'treesit-no-parser (list (current-buffer)))))))
(treesit-parser-root-node parser)))
(defun treesit-filter-child (node pred &optional named)
"Return children of NODE that satisfies predicate PRED.
PRED is a function that takes one argument, the child node.
If optional argument NAMED is non-nil, only search for named
node."
(let ((child (treesit-node-child node 0 named))
result)
(while child
(when (funcall pred child)
(push child result))
(setq child (treesit-node-next-sibling child named)))
(reverse result)))
(defun treesit-node-text (node &optional no-property)
"Return the buffer (or string) content corresponding to NODE.
If optional argument NO-PROPERTY is non-nil, remove text
properties."
(when node
(with-current-buffer (treesit-node-buffer node)
(if no-property
(buffer-substring-no-properties
(treesit-node-start node)
(treesit-node-end node))
(buffer-substring
(treesit-node-start node)
(treesit-node-end node))))))
(defun treesit-parent-until (node pred &optional include-node)
"Return the closest parent of NODE that satisfies PRED.
This function successively examines the parent of NODE, then
the parent of the parent, etc., until it finds the first
ancestor node which satisfies the predicate PRED; then it
returns that ancestor node. It returns nil if no ancestor
node was found that satisfies PRED.
PRED should be a function that takes one argument, the node to
examine, and returns a boolean value indicating whether that
node is a match.
If INCLUDE-NODE is non-nil, return NODE if it satisfies PRED."
(let ((node (if include-node node
(treesit-node-parent node))))
(while (and node (not (funcall pred node)))
(setq node (treesit-node-parent node)))
node))
(defun treesit-parent-while (node pred)
"Return the furthest parent of NODE (including NODE) that satisfies PRED.
This function successively examines NODE, the parent of NODE,
then the parent of the parent, etc., until it finds a node which
no longer satisfies the predicate PRED; it returns the last
examined node that satisfies PRED. If no node satisfies PRED, it
returns nil.
PRED should be a function that takes one argument, the node to
examine, and returns a boolean value indicating whether that
node is a match."
(let ((last nil))
(while (and node (funcall pred node))
(setq last node
node (treesit-node-parent node)))
last))
(defun treesit-node-children (node &optional named)
"Return a list of NODE's children.
If NAMED is non-nil, collect named child only."
(mapcar (lambda (idx)
(treesit-node-child node idx named))
(number-sequence
0 (1- (treesit-node-child-count node named)))))
(defun treesit-node-index (node &optional named)
"Return the index of NODE in its parent.
If NAMED is non-nil, count named child only."
(let ((count 0))
(while (setq node (treesit-node-prev-sibling node named))
(cl-incf count))
count))
(defun treesit-node-field-name (node)
"Return the field name of NODE as a child of its parent."
(when-let ((parent (treesit-node-parent node))
(idx (treesit-node-index node)))
(treesit-node-field-name-for-child parent idx)))
(defun treesit-node-get (node instructions)
"Get things from NODE by INSTRUCTIONS.
This is a convenience function that chains together multiple node
accessor functions together. For example, to get NODE's parent's
next sibling's second child's text, call
(treesit-node-get node
\\='((parent 1)
(sibling 1 nil)
(child 1 nil)
(text nil)))
INSTRUCTION is a list of INSTRUCTIONs of the form (FN ARG...).
The following FN's are supported:
\(child IDX NAMED) Get the IDX'th child
\(parent N) Go to parent N times
\(field-name) Get the field name of the current node
\(type) Get the type of the current node
\(text NO-PROPERTY) Get the text of the current node
\(children NAMED) Get a list of children
\(sibling STEP NAMED) Get the nth prev/next sibling, negative STEP
means prev sibling, positive means next
Note that arguments like NAMED and NO-PROPERTY can't be omitted,
unlike in their original functions."
(declare (indent 1))
(while (and node instructions)
(pcase (pop instructions)
('(field-name) (setq node (treesit-node-field-name node)))
('(type) (setq node (treesit-node-type node)))
(`(child ,idx ,named) (setq node (treesit-node-child node idx named)))
(`(parent ,n) (dotimes (_ n)
(setq node (treesit-node-parent node))))
(`(text ,no-property) (setq node (treesit-node-text node no-property)))
(`(children ,named) (setq node (treesit-node-children node named)))
(`(sibling ,step ,named)
(dotimes (_ (abs step))
(setq node (if (> step 0)
(treesit-node-next-sibling node named)
(treesit-node-prev-sibling node named)))))))
node)
;;; Query API supplement
(defun treesit-query-string (string query language)
"Query STRING with QUERY in LANGUAGE.
See `treesit-query-capture' for QUERY."
(with-temp-buffer
(insert string)
(let ((parser (treesit-parser-create language)))
(treesit-query-capture
(treesit-parser-root-node parser)
query))))
(defun treesit-query-range (node query &optional beg end offset)
"Query the current buffer and return ranges of captured nodes.
QUERY, NODE, BEG, END are the same as in `treesit-query-capture'.
This function returns a list of (START . END), where START and
END specifics the range of each captured node. OFFSET is an
optional pair of numbers (START-OFFSET . END-OFFSET). The
respective offset values are added to each (START . END) range
being returned. Capture names generally don't matter, but names
that starts with an underscore are ignored."
(let ((offset-left (or (car offset) 0))
(offset-right (or (cdr offset) 0)))
(cl-loop for capture
in (treesit-query-capture node query beg end)
for name = (car capture)
for node = (cdr capture)
if (not (string-prefix-p "_" (symbol-name name)))
collect (cons (+ (treesit-node-start node) offset-left)
(+ (treesit-node-end node) offset-right)))))
;;; Range API supplement
(defvar-local treesit-range-settings nil
"A list of range settings.
Each element of the list is of the form (QUERY LANGUAGE LOCAL-P
OFFSET). When updating the range of each parser in the buffer,
`treesit-update-ranges' queries each QUERY, and sets LANGUAGE's
range to the range spanned by captured nodes. QUERY must be a
compiled query. If LOCAL-P is t, give each range a separate
local parser rather than using a single parser for all the
ranges. If OFFSET is non-nil, it should be a cons of
numbers (START-OFFSET . END-OFFSET), where the start and end
offset are added to each queried range to get the result ranges.
Capture names generally don't matter, but names that starts with
an underscore are ignored.
QUERY can also be a function, in which case it is called with 2
arguments, START and END. It should ensure parsers' ranges are
correct in the region between START and END.
The exact form of each setting is considered internal and subject
to change. Use `treesit-range-rules' to set this variable.")
(defun treesit-range-rules (&rest query-specs)
"Produce settings for `treesit-range-settings'.
QUERY-SPECS are a series of QUERY-SPECs, where each QUERY-SPEC is
a QUERY preceded by zero or more pairs of :KEYWORD and VALUE,
like this:
:KEYWORD VALUE... QUERY
Each QUERY is a tree-sitter query in either the string,
s-expression or compiled form.
Capture names generally don't matter, but names that starts with
an underscore are ignored.
For each QUERY, :KEYWORD and VALUE pairs add meta information to
it. For example,
(treesit-range-rules
:embed \\='javascript
:host \\='html
:offset \\='(1 . -1)
\\='((script_element (raw_text) @cap)))
The `:embed' keyword specifies the embedded language, and the
`:host' keyword specifies the host language. They are used in
this way: Emacs queries QUERY in the host language's parser,
computes the ranges spanned by the captured nodes, and applies
these ranges to parsers for the embedded language.
If there's a `:local' keyword with value t, the range computed by
this QUERY is given a dedicated local parser. Otherwise, the
range shares the same parser with other ranges.
If there's an `:offset' keyword with a pair of numbers, each
captured range is offset by those numbers. For example, an
offset of (1 . -1) will update a captured range of (2 . 8) to
be (3 . 7). This can be used to exclude things like surrounding
delimiters from being included in the range covered by an
embedded parser.
QUERY can also be a function that takes two arguments, START and
END. If QUERY is a function, it doesn't need the :KEYWORD VALUE
pair preceding it. This function should set the ranges for
parsers in the current buffer in the region between START and
END. It is OK for this function to set ranges in a larger region
that encompasses the region between START and END."
(let (host embed offset result local)
(while query-specs
(pcase (pop query-specs)
(:local (when (eq t (pop query-specs))
(setq local t)))
(:host (let ((host-lang (pop query-specs)))
(unless (symbolp host-lang)
(signal 'treesit-error (list "Value of :host option should be a symbol" host-lang)))
(setq host host-lang)))
(:embed (let ((embed-lang (pop query-specs)))
(unless (symbolp embed-lang)
(signal 'treesit-error (list "Value of :embed option should be a symbol" embed-lang)))
(setq embed embed-lang)))
(:offset (let ((range-offset (pop query-specs)))
(unless (and (consp range-offset)
(numberp (car range-offset))
(numberp (cdr range-offset)))
(signal 'treesit-error (list "Value of :offset option should be a pair of numbers" range-offset)))
(setq offset range-offset)))
(query (if (functionp query)
(push (list query nil nil) result)
(when (null embed)
(signal 'treesit-error (list "Value of :embed option cannot be omitted")))
(when (null host)
(signal 'treesit-error (list "Value of :host option cannot be omitted")))
(push (list (treesit-query-compile host query)
embed local offset)
result))
(setq host nil embed nil offset nil))))
(nreverse result)))
(defun treesit--merge-ranges (old-ranges new-ranges start end)
"Merge OLD-RANGES and NEW-RANGES, discarding ranges between START and END.
OLD-RANGES and NEW-RANGES are lists of cons of the form (BEG . END).
When merging the two ranges, if a range in OLD-RANGES intersects with
another range in NEW-RANGES, discard the one in OLD-RANGES and
keep the one in NEW-RANGES. Also discard any range in OLD-RANGES
that intersects the region marked by START and END.
Return the merged list of ranges."
(let ((result nil))
(while (and old-ranges new-ranges)
(let ((new-beg (caar new-ranges))
(new-end (cdar new-ranges))
(old-beg (caar old-ranges))
(old-end (cdar old-ranges)))
(cond
;; Old range intersects with START-END, discard.
((and (< start old-end)
(< old-beg end))
(setq old-ranges (cdr old-ranges)))
;; New range and old range don't intersect, new comes
;; before, push new.
((<= new-end old-beg)
(push (car new-ranges) result)
(setq new-ranges (cdr new-ranges)))
;; New range and old range don't intersect, old comes
;; before, push old.
((<= old-end new-beg)
(push (car old-ranges) result)
(setq old-ranges (cdr old-ranges)))
(t ;; New and old range intersect, discard old.
(setq old-ranges (cdr old-ranges))))))
(let ((left-over (or new-ranges old-ranges)))
(dolist (range left-over)
(push range result)))
(nreverse result)))
(defun treesit--clip-ranges (ranges start end)
"Clip RANGES in between START and END.
RANGES is a list of ranges of the form (BEG . END). Ranges
outside of the region between START and END are thrown away, and
those inside are kept."
(cl-loop for range in ranges
if (<= start (car range) (cdr range) end)
collect range))
(defun treesit-local-parsers-at (&optional pos language)
"Return all the local parsers at POS.
POS defaults to point.
Local parsers are those which only parse a limited region marked
by an overlay with non-nil `treesit-parser' property.
If LANGUAGE is non-nil, only return parsers for LANGUAGE."
(let ((res nil))
(dolist (ov (overlays-at (or pos (point))))
(when-let ((parser (overlay-get ov 'treesit-parser)))
(when (or (null language)
(eq (treesit-parser-language parser)
language))
(push parser res))))
(nreverse res)))
(defun treesit-local-parsers-on (&optional beg end language)
"Return all the local parsers between BEG END.
BEG and END default to the beginning and end of the buffer's
accessible portion.
Local parsers are those which have an `embedded' tag, and only parse
a limited region marked by an overlay with a non-nil `treesit-parser'
property. If LANGUAGE is non-nil, only return parsers for LANGUAGE."
(let ((res nil))
(dolist (ov (overlays-in (or beg (point-min)) (or end (point-max))))
(when-let ((parser (overlay-get ov 'treesit-parser)))
(when (or (null language)
(eq (treesit-parser-language parser)
language))
(push parser res))))
(nreverse res)))
(defun treesit--update-ranges-local
(query embedded-lang &optional beg end)
"Update range for local parsers betwwen BEG and END.
Use QUERY to get the ranges, and make sure each range has a local
parser for EMBEDDED-LANG."
;; Clean up.
(dolist (ov (overlays-in (or beg (point-min)) (or end (point-max))))
(when-let ((parser (overlay-get ov 'treesit-parser)))
(when (eq (overlay-start ov) (overlay-end ov))
(delete-overlay ov)
(treesit-parser-delete parser))))
;; Update range.
(let* ((host-lang (treesit-query-language query))
(ranges (treesit-query-range host-lang query beg end)))
(pcase-dolist (`(,beg . ,end) ranges)
(let ((has-parser nil))
(dolist (ov (overlays-in beg end))
;; Update range of local parser.
(let ((embedded-parser (overlay-get ov 'treesit-parser)))
(when (and (treesit-parser-p embedded-parser)
(eq (treesit-parser-language embedded-parser)
embedded-lang))
(treesit-parser-set-included-ranges
embedded-parser `((,beg . ,end)))
(setq has-parser t))))
;; Create overlay and local parser.
(when (not has-parser)
(let ((embedded-parser (treesit-parser-create
embedded-lang nil t 'embedded))
(ov (make-overlay beg end nil nil t)))
(overlay-put ov 'treesit-parser embedded-parser)
(treesit-parser-set-included-ranges
embedded-parser `((,beg . ,end)))))))))
(defun treesit-update-ranges (&optional beg end)
"Update the ranges for each language in the current buffer.
If BEG and END are non-nil, only update parser ranges in that
region."
;; When updating ranges, we want to avoid querying the whole buffer
;; which could be slow in very large buffers. Instead, we only
;; query for nodes that intersect with the region between BEG and
;; END. Also, we only update the ranges intersecting BEG and END;
;; outside of that region we inherit old ranges.
(dolist (setting treesit-range-settings)
(let ((query (nth 0 setting))
(language (nth 1 setting))
(local (nth 2 setting))
(offset (nth 3 setting))
(beg (or beg (point-min)))
(end (or end (point-max))))
(cond
((functionp query) (funcall query beg end))
(local
(treesit--update-ranges-local query language beg end))
(t
(let* ((host-lang (treesit-query-language query))
(parser (treesit-parser-create language))
(old-ranges (treesit-parser-included-ranges parser))
(new-ranges (treesit-query-range
host-lang query beg end offset))
(set-ranges (treesit--clip-ranges
(treesit--merge-ranges
old-ranges new-ranges beg end)
(point-min) (point-max))))
(dolist (parser (treesit-parser-list nil language))
(treesit-parser-set-included-ranges
parser (or set-ranges
;; When there's no range for the embedded
;; language, set it's range to a dummy (1
;; . 1), otherwise it would be set to the
;; whole buffer, which is not what we want.
`((,(point-min) . ,(point-min))))))))))))
(defun treesit-parser-range-on (parser beg &optional end)
"Check if PARSER's range covers the portion between BEG and END.
If it does, return the range covering that portion in the form
of (RANGE-BEG . RANGE-END), if not, return nil. If nil or
omitted, default END to BEG."
(let ((ranges (treesit-parser-included-ranges parser))
(end (or end beg)))
(if (null ranges)
(cons (point-min) (point-max))
(cl-loop for rng in ranges
if (<= (car rng) beg end (cdr rng))
return rng
finally return nil))))
;;; Fontification
(define-error 'treesit-font-lock-error
"Generic tree-sitter font-lock error"
'treesit-error)
(defvar-local treesit-font-lock-settings nil
"A list of SETTINGs for treesit-based fontification.
The exact format of each SETTING is considered internal. Use
`treesit-font-lock-rules' to set this variable.
Each SETTING has the form:
(QUERY ENABLE FEATURE OVERRIDE)
QUERY must be a compiled query. See Info node `(elisp)Pattern
Matching' for how to write a query and compile it.
For SETTING to be activated for font-lock, ENABLE must be t. To
disable this SETTING, set ENABLE to nil.
FEATURE is the \"feature name\" of the query. Users can control
which features are enabled with `treesit-font-lock-level' and
`treesit-font-lock-feature-list'.
OVERRIDE is the override flag for this query. Its value can be
t, nil, append, prepend, keep. See more in
`treesit-font-lock-rules'.")
(defun treesit--font-lock-level-setter (sym val)
"Custom setter for `treesit-font-lock-level'.
Set the default value of SYM to VAL, recompute fontification
features and refontify for every buffer where tree-sitter-based
fontification is enabled."
(set-default sym val)
(and (treesit-available-p)
(named-let loop ((res nil)
(buffers (buffer-list)))
(if (null buffers)
(mapc (lambda (b)
(with-current-buffer b
(setq-local treesit-font-lock-level val)
(treesit-font-lock-recompute-features)
(treesit-font-lock-fontify-region (point-min)
(point-max))))
res)
(let ((buffer (car buffers)))
(with-current-buffer buffer
(if treesit-font-lock-settings
(loop (append res (list buffer)) (cdr buffers))
(loop res (cdr buffers)))))))))
(defcustom treesit-font-lock-level 3
"Decoration level to be used by tree-sitter fontifications.
Major modes categorize their fontification features into levels,
from 1 which is the absolute minimum, to 4 that yields the maximum
fontifications.
Level 1 usually contains only comments and definitions.
Level 2 usually adds keywords, strings, data types, etc.
Level 3 usually represents full-blown fontifications, including
assignments, constants, numbers and literals, etc.
Level 4 adds everything else that can be fontified: delimiters,
operators, brackets, punctuation, all functions, properties,
variables, etc.
In addition to the decoration level, individual features can be
turned on/off by calling `treesit-font-lock-recompute-features'.
Changing the decoration level requires calling
`treesit-font-lock-recompute-features' to have an effect, unless
done via `customize-variable'.
To see which syntactical categories are fontified by each level
in a particular major mode, examine the buffer-local value of the
variable `treesit-font-lock-feature-list'."
:type 'integer
:set #'treesit--font-lock-level-setter
:version "29.1")
(defvar-local treesit--font-lock-query-expand-range (cons 0 0)
"The amount to expand the start and end of the region when fontifying.
This should be a cons cell (START . END). When fontifying a
buffer, Emacs will move the start of the query range backward by
START amount, and the end of the query range by END amount. Both
START and END should be positive integers or 0. This doesn't
affect the fontified range.")
(defvar-local treesit-font-lock-feature-list nil
"A list of lists of feature symbols.
Use `treesit-font-lock-recompute-features' and
`treesit-font-lock-level' to configure enabled features.
Each sublist represents a decoration level.
`treesit-font-lock-level' controls which levels are activated.
Inside each sublist are feature symbols, which correspond to the
:feature value of a query defined in `treesit-font-lock-rules'.
Removing a feature symbol from this list disables the
corresponding query during font-lock.
Common feature names (for general programming languages) include
definition, type, assignment, builtin, constant, keyword,
string-interpolation, comment, doc, string, operator, property,
preprocessor, escape-sequence, key (in key-value pairs). Major
modes are free to subdivide or extend on these common features.
See the manual for more explanations on some of the features.
For changes to this variable to take effect, run
`treesit-font-lock-recompute-features'.")
(defun treesit-font-lock-rules (&rest query-specs)
"Return a value suitable for `treesit-font-lock-settings'.
QUERY-SPECS is a series of QUERY-SPECs. Each QUERY-SPEC is a
QUERY preceded by multiple pairs of :KEYWORD and VALUE:
:KEYWORD VALUE... QUERY
QUERY is a tree-sitter query in either the string, s-expression
or compiled form. For each query, captured nodes are highlighted
with the capture name as its face.
:KEYWORD and VALUE pairs preceding a QUERY add meta information
to QUERY. For example,
(treesit-font-lock-rules
:language \\='javascript
:override t
:feature\\='constant
\\='((true) @font-lock-constant-face
(false) @font-lock-constant-face)
:language \\='html
:feature \\='script
\"(script_element) @font-lock-builtin-face\")
For each QUERY, a :language keyword and a :feature keyword are
required. Each query's :feature is a symbol summarizing what the
query fontifies. It is used to allow users to enable/disable
certain features. See `treesit-font-lock-feature-list' for more.
Other keywords include:
KEYWORD VALUE DESCRIPTION
:override nil If the region already has a face,
discard the new face.
t Always apply the new face.
`append' Append the new face to existing ones.
`prepend' Prepend the new face to existing ones.
`keep' Fill-in regions without an existing face.
:default-language LANGUAGE Every QUERY after this keyword
will use LANGUAGE by default.
Capture names in QUERY should be face names like
`font-lock-keyword-face'. The captured node will be fontified
with that face.
Capture names can also be function names, in which case the
function will be called with the following argument list:
(NODE OVERRIDE START END &rest _)
where NODE is the tree-sitter node object, OVERRIDE is the
override option of that rule, and START and END specify the region
to be fontified. This function should accept more arguments as
optional arguments for future extensibility, and it shouldn't
fontify text outside the region given by START and END.
If a capture name is both a face and a function, the face takes
priority. If a capture name is not a face name nor a function
name, it is ignored."
;; Other tree-sitter function don't tend to be called unless
;; tree-sitter is enabled, which means tree-sitter must be compiled.
;; But this function is usually call in `defvar' which runs
;; regardless whether tree-sitter is enabled. So we need this
;; guard.
(when (treesit-available-p)
(let (;; Tracks the current :language/:override/:toggle/:level value
;; that following queries will apply to.
current-language current-override
current-feature
;; DEFAULT-LANGUAGE will be chosen when current-language is
;; not set.
default-language
;; The list this function returns.
(result nil))
(while query-specs
(let ((token (pop query-specs)))
(pcase token
;; (1) Process keywords.
(:default-language
(let ((lang (pop query-specs)))
(when (or (not (symbolp lang)) (null lang))
(signal 'treesit-font-lock-error
`("Value of :default-language should be a symbol"
,lang)))
(setq default-language lang)))
(:language
(let ((lang (pop query-specs)))
(when (or (not (symbolp lang)) (null lang))
(signal 'treesit-font-lock-error
`("Value of :language should be a symbol"
,lang)))
(setq current-language lang)))
(:override
(let ((flag (pop query-specs)))
(when (not (memq flag '(t nil append prepend keep)))
(signal 'treesit-font-lock-error
`("Value of :override should be one of t, nil, append, prepend, keep"
,flag))
(signal 'wrong-type-argument
`((or t nil append prepend keep)
,flag)))
(setq current-override flag)))
(:feature
(let ((var (pop query-specs)))
(when (or (not (symbolp var))
(memq var '(t nil)))
(signal 'treesit-font-lock-error
`("Value of :feature should be a symbol"
,var)))
(setq current-feature var)))
;; (2) Process query.
((pred treesit-query-p)
(let ((lang (or default-language current-language)))
(when (null lang)
(signal 'treesit-font-lock-error
`("Language unspecified, use :language keyword or :default-language to specify a language for this query" ,token)))
(when (null current-feature)
(signal 'treesit-font-lock-error
`("Feature unspecified, use :feature keyword to specify the feature name for this query" ,token)))
(if (treesit-compiled-query-p token)
(push `(,lang token) result)
(push `(,(treesit-query-compile lang token)
t
,current-feature
,current-override)
result))
;; Clears any configurations set for this query.
(setq current-language nil
current-override nil
current-feature nil)))
(_ (signal 'treesit-font-lock-error
`("Unexpected value" ,token))))))
(nreverse result))))
;; `font-lock-fontify-region-function' has the LOUDLY argument, but
;; `jit-lock-functions' doesn't pass that argument. So even if we set
;; `font-lock-verbose' to t, if jit-lock is enabled (and it's almost
;; always is), we don't get debug messages. So we add our own.
(defvar treesit--font-lock-verbose nil
"If non-nil, print debug messages when fontifying.")
(defun treesit-font-lock-recompute-features
(&optional add-list remove-list language)
"Enable/disable font-lock features.