diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el index 55449599fe97..38d29cb2385b 100644 --- a/lisp/international/mule-util.el +++ b/lisp/international/mule-util.el @@ -333,13 +333,20 @@ QUALITY can be: `approximate', in which case we may cut some corners to avoid excessive work. `exact', in which case we may end up re-(en/de)coding a large - part of the file/buffer, this can be expensive and slow. + part of the file/buffer, this can be expensive and slow. (It + is an error to request the `exact' method when the buffer's + EOL format is not yet decided.) nil, in which case we may return nil rather than an approximation." (unless coding-system (setq coding-system buffer-file-coding-system)) (let ((eol (coding-system-eol-type coding-system)) (type (coding-system-type coding-system)) (base (coding-system-base coding-system)) (pm (save-restriction (widen) (point-min)))) + ;; Handle EOL edge cases. + (unless (numberp eol) + (if (eq quality 'exact) + (error "Unknown EOL format in coding system: %s" coding-system) + (setq eol 0))) (and (eq type 'utf-8) ;; Any post-read/pre-write conversions mean it's not really UTF-8. (not (null (coding-system-get coding-system :post-read-conversion))) @@ -409,14 +416,24 @@ QUALITY can be: `approximate', in which case we may cut some corners to avoid excessive work. `exact', in which case we may end up re-(en/de)coding a large - part of the file/buffer, this can be expensive and slow. + part of the file/buffer, this can be expensive and slow. (It + is an error to request the `exact' method when the buffer's + EOL format is not yet decided.) nil, in which case we may return nil rather than an approximation." (unless coding-system (setq coding-system buffer-file-coding-system)) (let* ((eol (coding-system-eol-type coding-system)) - (lineno (if (= eol 1) (1- (line-number-at-pos position)) 0)) (type (coding-system-type coding-system)) (base (coding-system-base coding-system)) - (point-min 1)) ;Clarify what the `1' means. + (point-min 1) ;Clarify what the `1' means. + lineno) + ;; Handle EOL edge cases. + (unless (numberp eol) + (if (eq quality 'exact) + (error "Unknown EOL format in coding system: %s" coding-system) + (setq eol 0))) + (setq lineno (if (= eol 1) + (1- (line-number-at-pos position)) + 0)) (and (eq type 'utf-8) ;; Any post-read/pre-write conversions mean it's not really UTF-8. (not (null (coding-system-get coding-system :post-read-conversion))) diff --git a/test/lisp/international/mule-util-resources/utf-8.txt b/test/lisp/international/mule-util-resources/utf-8.txt new file mode 100644 index 000000000000..385bbb4ba806 --- /dev/null +++ b/test/lisp/international/mule-util-resources/utf-8.txt @@ -0,0 +1,2 @@ +Thís is a test line 1. +Line 2. diff --git a/test/lisp/international/mule-util-tests.el b/test/lisp/international/mule-util-tests.el index 6518be66dbe0..0fcff9d02dd0 100644 --- a/test/lisp/international/mule-util-tests.el +++ b/test/lisp/international/mule-util-tests.el @@ -22,6 +22,7 @@ ;;; Code: (require 'ert) +(require 'ert-x) (require 'mule-util) (defconst mule-util-test-truncate-data @@ -82,4 +83,43 @@ (dotimes (i (length mule-util-test-truncate-data)) (mule-util-test-truncate-create i)) +(ert-deftest filepos/bufferpos-tests-utf-8 () + (let ((coding-system-for-read 'utf-8-unix)) + (with-temp-buffer + (insert-file-contents (ert-resource-file "utf-8.txt")) + (should (eq buffer-file-coding-system 'utf-8-unix)) + ;; First line is "Thís is a test line 1.". + ;; Bytes start counting at 0; chars at 1. + (should (= (filepos-to-bufferpos 1 'exact) 2)) + (should (= (bufferpos-to-filepos 2 'exact) 1)) + ;; After non-ASCII. + (should (= (filepos-to-bufferpos 4 'exact) 4)) + (should (= (bufferpos-to-filepos 4 'exact) 4))))) + +(ert-deftest filepos/bufferpos-tests-binary () + (let ((coding-system-for-read 'binary)) + (with-temp-buffer + (insert-file-contents (ert-resource-file "utf-8.txt")) + (should (eq buffer-file-coding-system 'no-conversion)) + ;; First line is "Thís is a test line 1.". + ;; Bytes start counting at 0; chars at 1. + (should (= (filepos-to-bufferpos 1 'exact) 2)) + (should (= (bufferpos-to-filepos 2 'exact) 1)) + ;; After non-ASCII. + (should (= (filepos-to-bufferpos 4 'exact) 5)) + (should (= (bufferpos-to-filepos 5 'exact) 4))))) + +(ert-deftest filepos/bufferpos-tests-undecided () + (let ((coding-system-for-read 'binary)) + (with-temp-buffer + (insert-file-contents (ert-resource-file "utf-8.txt")) + (setq buffer-file-coding-system 'undecided) + (should-error (filepos-to-bufferpos 1 'exact)) + (should-error (bufferpos-to-filepos 2 'exact)) + (should (= (filepos-to-bufferpos 1 'approximate) 2)) + (should (= (bufferpos-to-filepos 2 'approximate) 1)) + ;; After non-ASCII. + (should (= (filepos-to-bufferpos 4 'approximate) 5)) + (should (= (bufferpos-to-filepos 5 'approximate) 4))))) + ;;; mule-util-tests.el ends here