forked from melton1968/math
-
Notifications
You must be signed in to change notification settings - Fork 0
/
math-tokenize.el
149 lines (120 loc) · 4.83 KB
/
math-tokenize.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
;; Tokenization
;;
;; External names: math-tokenize-*
;; Internal names: math-t--*
(require 'math-token-re)
(require 'math-token)
(defun math-t--goto-start-of-string (&optional backward)
"If point is inside of a string, move point to the start of the string.
If backward is not nil, move to the end of the string."
;; `(nth 3 (syntax-ppss))' is nil unless point is within a string.
;; `(nth 8 (syntax-ppss))' points to the start of the string.
;; When point is within a string.
(when (nth 3 (syntax-ppss))
;; Move to the start of the string.
(goto-char (nth 8 (syntax-ppss)))
;; If backward is not nil, move to the other end of the string.
(if backward
(forward-sexp))))
(defun math-t--goto-start-of-comment (&optional backward)
"If point is inside a comment, move point to the beginning of the comment.
If backward is not nil, move to the end of the comment."
;; `(nth 4 (syntax-ppss)' is nil unless point is within a comment.
;; `(nth 8 (syntax-ppss)' points to the start of the comment.
;; When point is within a comment.
(when (nth 4 (syntax-ppss))
;; Move to the start of the comment.
(goto-char (nth 8 (syntax-ppss)))
;; If backward is not nil, move to the other end of the comment.
(if backward
(forward-comment 1))))
(defun math-t--advance-token (backward)
"Consume and return the next lexical token in the buffer
starting at `point'. If backward is not nil, the previous lexical
token."
;; If case-fold search is true, then upper and lower cases are not
;; distinguishable in regular expressions. We set it to false
;; locally so that upper and lower cases can be distinguished. This
;; does not have any impact outside of this function.
(let ((case-fold-search nil))
;; If point is within a string, move to the beginning of the string.
(math-t--goto-start-of-string backward)
;; If point is within a comment, move to the beginning of the
;; comment.
(math-t--goto-start-of-comment backward)
;; Move past any white space.
(forward-comment (if backward (-(point-max)) (point-max)))
;; The point is just before a token.
(let ((looking (if backward (lambda (re) (looking-back re (point-min) t)) 'looking-at ))
(match (if backward 'match-beginning 'match-end)))
(cond
;; If we are at the end of the buffer, return the end token.
((= (point) (point-max))
(math-token-make-instance :eof :eof))
;; If we are looking at a newline
((= (char-after (point)) ?\n)
(forward-char 1)
;; If we are at top-level, then return the eol token.
;; Otherwise, skip the eol and return the next token.
(if (= (nth 0 (syntax-ppss)) 0)
(math-token-make-instance :eol :eol)
(math-t--advance-token backward)))
;; Identifiers
((funcall looking math-tok-identifier-re)
(goto-char (funcall match 0))
(math-token-make-instance :identifier (match-string-no-properties 0)))
;; Symbols
((funcall looking math-tok-symbol-re)
(goto-char (funcall match 0))
(math-token-make-instance :operator (match-string-no-properties 0)))
;; Strings
((funcall looking math-tok-string-re)
(goto-char (funcall match 0))
(math-token-make-instance :string (match-string-no-properties 0)))
;; Numbers
((funcall looking math-tok-number-re)
(goto-char (funcall match 0))
(math-token-make-instance :number (match-string-no-properties 0)))
;; Slots
((funcall looking math-tok-slot-re)
(goto-char (funcall match 0))
(math-token-make-instance :slot (match-string-no-properties 0)))
;; Out
((funcall looking math-tok-out-re)
(goto-char (funcall match 0))
(math-token-make-instance :out (match-string-no-properties 0)))
;; Operators
((funcall looking math-tok-operator-re)
(goto-char (funcall match 0))
(math-token-make-instance :operator (match-string-no-properties 0)))
;; Otherwise, we did not recognize the token. Move forward one
;; character so we do not get stuck and then return the
;; unrecognized token.
(t
(forward-char 1)
(math-token-make-instance :unknown (string (char-before (point)))))))))
(defun math-tokenize-next ()
(interactive)
(math-t--advance-token nil))
(defun math-tokenize-prev ()
(interactive)
(math-t--advance-token t))
(defun math-tokenize-region (begin end)
(interactive "r")
(save-excursion
(save-restriction
(narrow-to-region begin end)
(math-tokenize-buffer))))
(defun math-tokenize-buffer ()
(interactive)
(save-excursion
(goto-char (point-min))
(with-output-to-temp-buffer "*math-tokenize-output*"
(let ((last-line 0))
(while (< (point) (point-max))
(let* ((token (math-tokenize-next))
(line (math-token-line token)))
(if (> line last-line) (terpri))
(princ (format "'%s' " (math-token-source token)))
(setq last-line line)))))))
(provide 'math-tokenize)