Subject: [PATCH 5/5] Support long text for translate_tts To: hirofumi@mail.parknet.co.jp From: OGAWA Hirofumi Date: Thu, 08 Dec 2016 15:51:50 +0900 Message-ID: <713a9aa83e858490306523829.ps@mail.parknet.co.jp> References: <96897a153e858490306123829.ps@mail.parknet.co.jp> <4f1496243e858490306323829.ps@mail.parknet.co.jp> In-Reply-To: If text length is longer than 200 (in my test, limit was 200), translate_tts returns 404 error. To support longer than 200, this tries to split text at sentence if possible. Then pass made URLs to google-translate-listen-program as arguments at once. Like following, program "http://...&total=2&idx=0..." "http://...&total=2&idx=1..." With this, the program can process long text. --- google-translate-core-ui.el | 12 ++++--- google-translate-core.el | 69 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 72 insertions(+), 9 deletions(-) diff -puN google-translate-core-ui.el~split-long-text google-translate-core-ui.el --- google-translate/google-translate-core-ui.el~split-long-text 2016-12-08 15:51:19.325857192 +0900 +++ google-translate-hirofumi/google-translate-core-ui.el 2016-12-08 15:51:19.333857167 +0900 @@ -629,12 +629,14 @@ clicked." (if google-translate-translation-listening-debug (with-current-buffer (get-buffer-create buf) (insert (format "Listen program: %s\r\n" google-translate-listen-program)) - (insert (format "Listen URL: %s\r\n" (google-translate-format-listen-url text language))) - (call-process google-translate-listen-program nil t nil - (format "%s" (google-translate-format-listen-url text language))) + (mapc (lambda (x) + (insert (format "Listen URL: %s\r\n" x))) + (google-translate-format-listen-urls text language)) + (apply 'call-process google-translate-listen-program nil t nil + (google-translate-format-listen-urls text language)) (switch-to-buffer buf)) - (call-process google-translate-listen-program nil nil nil - (format "%s" (google-translate-format-listen-url text language)))))) + (apply 'call-process google-translate-listen-program nil nil nil + (google-translate-format-listen-urls text language))))) (defun google-translate-translate (source-language target-language text &optional output-destination) "Translate TEXT from SOURCE-LANGUAGE to TARGET-LANGUAGE. diff -puN google-translate-core.el~split-long-text google-translate-core.el --- google-translate/google-translate-core.el~split-long-text 2016-12-08 15:51:19.328857183 +0900 +++ google-translate-hirofumi/google-translate-core.el 2016-12-08 15:51:19.330857176 +0900 @@ -1,4 +1,4 @@ -;;; google-translate-core.el --- google-translate core script. +;;; google-translate-core.el --- google-translate core script. -*- coding: utf-8 -*- ;; Copyright (C) 2012 Oleksandr Manzyuk @@ -87,6 +87,54 @@ "en") "Host language to translate.") +(defvar google-translate-punctuation-re "[,、]" + "Regexp describing the punctuation.") + +(defvar google-translate-listen-maxlen 200 + "Split text for tts url to less than this. If 0, disable split.") + +(defun google-translate--split-text (text maxlen) + "Split TEXT to less than MAXLEN at applicable point for translating." + (let (result) + (if (or (null maxlen) (<= maxlen 0)) + (push text result) + ;; split long text? + (with-temp-buffer + (save-excursion (insert text)) + ;; strategy to split at applicable point + ;; 1) fill-region remaining text by maxlen + ;; 2) find end of sentence, end of punctuation, word boundary + ;; 3) consume from remaining text between start and (2) + ;; 4) repeat + (let ((fill-column (* maxlen 3)) + (sentence-end-double-space nil) + (pos (point-min))) + (while (< pos (point-max)) + (save-restriction + (narrow-to-region pos (point-max)) + (fill-region pos (point-max)) + (let ((limit (+ pos maxlen))) + (if (>= limit (point-max)) + (setq limit (point-max)) + (goto-char limit) + ;; try to split at end of sentence + (if (> (backward-sentence) pos) + (setq limit (point)) + ;; try to split at end of punctuation + (goto-char limit) + (if (re-search-backward google-translate-punctuation-re + pos t) + (setq limit (1+ (point))) ; include punctuation + (goto-char limit) + ;; try to split at word boundary + (forward-word-strictly -1) + (when (> (point) pos) + (setq limit (point)))))) + (push (buffer-substring-no-properties pos limit) result) + (goto-char limit) + (setq pos limit))))))) + (reverse result))) + (defun google-translate--format-query-string (query-params) "Format QUERY-PARAMS as a query string. @@ -113,19 +161,32 @@ QUERY-PARAMS must be an alist of field-v "?" (google-translate--format-query-string query-params))) -(defun google-translate-format-listen-url (text language) +(defun google-translate-format-listen-url (text language &optional total idx) "Format listen url for TEXT and TARGET-LANGUAGE." (google-translate--format-listen-url `(("ie" . "UTF-8") ("q" . ,text) ("tl" . ,language) - ("total" . "1") - ("idx" . "0") + ("total" . ,(or total "1")) + ("idx" . ,(or idx "0")) ("textlen" . ,(number-to-string (length text))) ("client" . "t") ("prev" . "input") ("tk" . ,(google-translate--gen-tk text)) ("ttsspeed" . ,google-translate-ttsspeed)))) +(defun google-translate-format-listen-urls (text language) + "Split TEXT with `google-translate--split-text', then format +listen url for TEXT and TARGET-LANGUAGE." + (let* ((texts (google-translate--split-text + text google-translate-listen-maxlen)) + (total (number-to-string (length texts))) + (idx 0)) + (mapcar (lambda (x) + (prog1 (google-translate-format-listen-url x language total + (number-to-string idx)) + (setq idx (1+ idx)))) + texts))) + (defun google-translate--http-response-body (url &optional for-test-purposes) "Retrieve URL and return the response body as a string." (with-current-buffer (url-retrieve-synchronously url) _