Skip to content

Commit

Permalink
Unknown line endings in filepos-to-bufferpos/bufferpos-to-linepos
Browse files Browse the repository at this point in the history
* lisp/international/mule-util.el (filepos-to-bufferpos): Give
better errors on `exact' with unknown line endings, and guess at
Unix if `approximate' (bug#36573).
(bufferpos-to-filepos): Ditto.
  • Loading branch information
larsmagne committed Aug 13, 2021
1 parent 6247540 commit 37dbf10
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 4 deletions.
25 changes: 21 additions & 4 deletions lisp/international/mule-util.el
Expand Up @@ -333,13 +333,20 @@ QUALITY can be:
`approximate', in which case we may cut some corners to avoid
excessive work.
`exact', in which case we may end up re-(en/de)coding a large
part of the file/buffer, this can be expensive and slow.
part of the file/buffer, this can be expensive and slow. (It
is an error to request the `exact' method when the buffer's
EOL format is not yet decided.)
nil, in which case we may return nil rather than an approximation."
(unless coding-system (setq coding-system buffer-file-coding-system))
(let ((eol (coding-system-eol-type coding-system))
(type (coding-system-type coding-system))
(base (coding-system-base coding-system))
(pm (save-restriction (widen) (point-min))))
;; Handle EOL edge cases.
(unless (numberp eol)
(if (eq quality 'exact)
(error "Unknown EOL format in coding system: %s" coding-system)
(setq eol 0)))
(and (eq type 'utf-8)
;; Any post-read/pre-write conversions mean it's not really UTF-8.
(not (null (coding-system-get coding-system :post-read-conversion)))
Expand Down Expand Up @@ -409,14 +416,24 @@ QUALITY can be:
`approximate', in which case we may cut some corners to avoid
excessive work.
`exact', in which case we may end up re-(en/de)coding a large
part of the file/buffer, this can be expensive and slow.
part of the file/buffer, this can be expensive and slow. (It
is an error to request the `exact' method when the buffer's
EOL format is not yet decided.)
nil, in which case we may return nil rather than an approximation."
(unless coding-system (setq coding-system buffer-file-coding-system))
(let* ((eol (coding-system-eol-type coding-system))
(lineno (if (= eol 1) (1- (line-number-at-pos position)) 0))
(type (coding-system-type coding-system))
(base (coding-system-base coding-system))
(point-min 1)) ;Clarify what the `1' means.
(point-min 1) ;Clarify what the `1' means.
lineno)
;; Handle EOL edge cases.
(unless (numberp eol)
(if (eq quality 'exact)
(error "Unknown EOL format in coding system: %s" coding-system)
(setq eol 0)))
(setq lineno (if (= eol 1)
(1- (line-number-at-pos position))
0))
(and (eq type 'utf-8)
;; Any post-read/pre-write conversions mean it's not really UTF-8.
(not (null (coding-system-get coding-system :post-read-conversion)))
Expand Down
2 changes: 2 additions & 0 deletions test/lisp/international/mule-util-resources/utf-8.txt
@@ -0,0 +1,2 @@
Thís is a test line 1.
Line 2.
40 changes: 40 additions & 0 deletions test/lisp/international/mule-util-tests.el
Expand Up @@ -22,6 +22,7 @@
;;; Code:

(require 'ert)
(require 'ert-x)
(require 'mule-util)

(defconst mule-util-test-truncate-data
Expand Down Expand Up @@ -82,4 +83,43 @@
(dotimes (i (length mule-util-test-truncate-data))
(mule-util-test-truncate-create i))

(ert-deftest filepos/bufferpos-tests-utf-8 ()
(let ((coding-system-for-read 'utf-8-unix))
(with-temp-buffer
(insert-file-contents (ert-resource-file "utf-8.txt"))
(should (eq buffer-file-coding-system 'utf-8-unix))
;; First line is "Thís is a test line 1.".
;; Bytes start counting at 0; chars at 1.
(should (= (filepos-to-bufferpos 1 'exact) 2))
(should (= (bufferpos-to-filepos 2 'exact) 1))
;; After non-ASCII.
(should (= (filepos-to-bufferpos 4 'exact) 4))
(should (= (bufferpos-to-filepos 4 'exact) 4)))))

(ert-deftest filepos/bufferpos-tests-binary ()
(let ((coding-system-for-read 'binary))
(with-temp-buffer
(insert-file-contents (ert-resource-file "utf-8.txt"))
(should (eq buffer-file-coding-system 'no-conversion))
;; First line is "Thís is a test line 1.".
;; Bytes start counting at 0; chars at 1.
(should (= (filepos-to-bufferpos 1 'exact) 2))
(should (= (bufferpos-to-filepos 2 'exact) 1))
;; After non-ASCII.
(should (= (filepos-to-bufferpos 4 'exact) 5))
(should (= (bufferpos-to-filepos 5 'exact) 4)))))

(ert-deftest filepos/bufferpos-tests-undecided ()
(let ((coding-system-for-read 'binary))
(with-temp-buffer
(insert-file-contents (ert-resource-file "utf-8.txt"))
(setq buffer-file-coding-system 'undecided)
(should-error (filepos-to-bufferpos 1 'exact))
(should-error (bufferpos-to-filepos 2 'exact))
(should (= (filepos-to-bufferpos 1 'approximate) 2))
(should (= (bufferpos-to-filepos 2 'approximate) 1))
;; After non-ASCII.
(should (= (filepos-to-bufferpos 4 'approximate) 5))
(should (= (bufferpos-to-filepos 5 'approximate) 4)))))

;;; mule-util-tests.el ends here

0 comments on commit 37dbf10

Please sign in to comment.