Skip to content

Commit 1dbda69

Browse files
authored
Merge pull request #57 from ginqi7/mac-dictionary
feat: support MacOS dictionary
2 parents c234e05 + 6246ee9 commit 1dbda69

File tree

3 files changed

+104
-70
lines changed

3 files changed

+104
-70
lines changed

README.org

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919
- [[https://github.com/jd-boyd/sexpdata][sexpdata]] 用于把 python 对象转换为 sexp
2020
- [[https://pypi.org/project/snowballstemmer/][snowballstemmer]] 用于“词干提取”的算法包
2121
- [[https://git.ookami.one/cgit/google-translate/][google-translate]] 用于网络翻译,非必选,可以用 crow-translate 替换
22+
- [[https://pyobjc.readthedocs.io/en/latest/][pyobjc]] 非必选,MacOS 用户想要使用系统词典时,需要安装
2223

23-
你可以使用 ~dictionary-overlay-install~ 来安装相关的 python 包(不包括 google-translate)。
24+
你可以使用 ~dictionary-overlay-install~ 来安装相关的 python 包(不包括 google-translate 和 pyobjc)。
2425

2526
** 网络翻译
2627
默认会使用 sdcv 本地词典翻译。当单词在本地词典未找到时,会使用网络翻译,目前支持:
@@ -66,15 +67,16 @@ git clone --depth=1 -b main https://github.com/ginqi7/dictionary-overlay ~/.emac
6667

6768
* 选项
6869

69-
| 选项 | 说明 |
70-
|---------------------------------------------------+---------------------------------------------------------------|
71-
| dictionary-overlay-just-unknown-words | t 时使用“生词本”模式,nil 为“透析阅读”模式,默认为 t |
72-
| dictionary-overlay-user-data-directory | 用户数据存放 目录,默认值为:“~/.emacs.d/dictionary-overlay-data” |
73-
| dictionary-overlay-position | 显示翻译的位置:词后,help-echo, 默认在词后 |
74-
| dictionary-overlay-lookup-with | 查词词典设置:默认系统词典。可自定义第三方包,比如 youdao-dictionary, popweb |
75-
| dictionary-overlay-inihibit-keymap | t 时关闭 keymap, 默认为 nil |
76-
| dictionary-overlay-auto-jump-after | 可选项:标为生词 mark-word-known, 标为熟词 mark-word-unknwon, 刷新 render-buffer |
77-
| dictionary-overlay-translation-format | 翻译展示的形式,默认是:"(%s)" |
70+
| 选项 | 说明 |
71+
|----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
72+
| dictionary-overlay-just-unknown-words | t 时使用“生词本”模式,nil 为“透析阅读”模式,默认为 t |
73+
| dictionary-overlay-user-data-directory | 用户数据存放 目录,默认值为:“~/.emacs.d/dictionary-overlay-data” |
74+
| dictionary-overlay-position | 显示翻译的位置:词后,help-echo, 默认在词后 |
75+
| dictionary-overlay-lookup-with | 查词词典设置:默认系统词典。可自定义第三方包,比如 youdao-dictionary, popweb |
76+
| dictionary-overlay-inihibit-keymap | t 时关闭 keymap, 默认为 nil |
77+
| dictionary-overlay-auto-jump-after | 可选项:标为生词 mark-word-known, 标为熟词 mark-word-unknwon, 刷新 render-buffer |
78+
| dictionary-overlay-translation-format | 翻译展示的形式,默认是:"(%s)" |
79+
| dictionary-overlay-translators | 指定使用的翻译引擎以及使用顺序。默认包含'("local" "sdcv" "darwin" "web") 分别表示,本地dictionary.json 文件,内置的sdcv 词典, MacOs 系统词典,以及web 翻译,你可以选择使用的词典以及顺序。 |
7880

7981

8082

dictionary-overlay.el

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,13 @@
3131
;; `dictionary-overlay-stop'
3232
;; Stop dictionary-overlay.
3333
;; `dictionary-overlay-restart'
34-
;; Restart dictionary-overlay and show process.
34+
;; Restart dictionary-overlay.
3535
;; `dictionary-overlay-render-buffer'
3636
;; Render current buffer.
3737
;; `dictionary-overlay-toggle'
3838
;; Toggle current buffer.
39+
;; `dictionary-overlay-refresh-buffer'
40+
;; Refresh current buffer.
3941
;; `dictionary-overlay-jump-next-unknown-word'
4042
;; Jump to next unknown word.
4143
;; `dictionary-overlay-jump-prev-unknown-word'
@@ -44,22 +46,22 @@
4446
;; Jump to first unknown word.
4547
;; `dictionary-overlay-jump-last-unknown-word'
4648
;; Jump to last unknown word.
49+
;; `dictionary-overlay-jump-out-of-overlay'
50+
;; Jump out overlay so that we no longer in keymap.
4751
;; `dictionary-overlay-mark-word-known'
4852
;; Mark current word known.
4953
;; `dictionary-overlay-mark-word-unknown'
5054
;; Mark current word unknown.
51-
;; `dictionary-overlay-jump-out-of-overlay'
52-
;; Move cursor out of overlay.
5355
;; `dictionary-overlay-mark-word-smart'
54-
;; Smartly mark current word as known or unknow.
56+
;; Smartly mark current word as known or unknown.
5557
;; `dictionary-overlay-mark-word-smart-reversely'
56-
;; Smartly mark current word as known or unknow, inverse version of the above.
58+
;; Smartly mark current word known or unknown smartly, but reversely.
5759
;; `dictionary-overlay-mark-buffer'
5860
;; Mark all words as known, except those in `unknownwords' list.
5961
;; `dictionary-overlay-mark-buffer-unknown'
6062
;; Mark all words as unknown, except those in `unknownwords' list.
6163
;; `dictionary-overlay-lookup'
62-
;; Look up word at cursor
64+
;; Look up word.
6365
;; `dictionary-overlay-install'
6466
;; Install all python dependencies.
6567
;; `dictionary-overlay-install-google-translate'
@@ -74,29 +76,33 @@
7476
;; `dictionary-overlay-just-unknown-words'
7577
;; If t, show overlay for words in unknownwords list.
7678
;; default = t
77-
;; `dictionary-overlay-auto-jump-after'
78-
;; Auto jump after commands
79-
;; Options: 'mark-word-known, 'mark-word-unknown, 'render-buffer
80-
;; default = '()
81-
;; `dictionary-overlay-inhibit-keymap'
82-
;; If t, show overlay for words in unknownwords list.
83-
;; default = t
8479
;; `dictionary-overlay-position'
85-
;; If value is 'after, put translation after word
86-
;; If value is 'help-echo, show it when mouse over word
80+
;; Where to show translation.
8781
;; default = 'after
88-
;; `dictonary-overlay-recenter-after-mark-and-jump'
89-
;; If t, recenter after mark or jump.
90-
;; default is nil
9182
;; `dictionary-overlay-user-data-directory'
9283
;; Place user data in Emacs directory.
9384
;; default = (locate-user-emacs-file "dictionary-overlay-data/")
9485
;; `dictionary-overlay-translation-format'
95-
;; Translation format
86+
;; Translation format.
9687
;; default = "(%s)"
9788
;; `dictionary-overlay-crow-engine'
98-
;; Crow translate engine
89+
;; Crow translate engine.
9990
;; default = "google"
91+
;; `dictionary-overlay-inhibit-keymap'
92+
;; When non-nil, don't use `dictionary-overlay-map'.
93+
;; default = nil
94+
;; `dictionary-overlay-auto-jump-after'
95+
;; Auto jump to next unknown word.
96+
;; default = 'nil
97+
;; `dictonary-overlay-recenter-after-mark-and-jump'
98+
;; Recenter after mark or jump.
99+
;; default = nil
100+
;; `dictionary-overlay-lookup-with'
101+
;; Look up word with fn.
102+
;; default = 'dictionary-lookup-definition
103+
;; `dictionary-overlay-translators'
104+
;; The translators and theirs's order.
105+
;; default = '("local" "sdcv" "darwin" "web")
100106

101107
;;; Code:
102108

@@ -207,6 +213,10 @@ next overlay."
207213
:group 'dictionary-overlay
208214
:type '(function))
209215

216+
(defcustom dictionary-overlay-translators '("local" "sdcv" "darwin" "web")
217+
"The translators and theirs's order."
218+
:group 'dictionary-overlay)
219+
210220
(defvar dictionary-overlay-map
211221
(let ((map (make-sparse-keymap)))
212222
(define-key map (kbd "r") #'dictionary-overlay-refresh-buffer)

dictionary-overlay.py

Lines changed: 62 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import re
66
import shutil
7+
from sys import platform
78
from threading import Timer
89

910
import snowballstemmer
@@ -24,11 +25,14 @@
2425
tokenizer = Tokenizer(BPE())
2526
pre_tokenizer = Whitespace()
2627
dictionary = {}
28+
translators = []
2729

2830
def in_or_stem_in(word:str, words) -> bool:
31+
'''Check a word or word stem in the word list'''
2932
return word in words or snowball_stemmer.stemWord(word) in words
3033

3134
async def parse(sentence: str):
35+
'''parse the sentence'''
3236
only_unknown_words = await bridge.get_emacs_var(
3337
"dictionary-overlay-just-unknown-words"
3438
)
@@ -126,17 +130,14 @@ async def on_message(message):
126130
print(f"not fount handler for {cmd}", flush=True)
127131

128132
async def modify_translation(word: str):
133+
"let the user to modify default translation"
129134
all_translations = []
130-
# add all translations to make user select.
131-
# add word translation on local dictionary.
132-
all_translations.append(dictionary.get(word))
133-
# add word all translations on local sdcv dictionary
134-
all_translations.extend(sdcv_translate(word))
135-
# add word web stranslation
136-
result = await web_translate(word)
137-
all_translations.append(result)
135+
for translator in translators:
136+
translations = await translate_by_translator(word, translator)
137+
all_translations.extend(translations)
138138
# remove duplicative translations
139-
all_translations = list(set(all_translations))
139+
# dict.fromkeys doesn't lose ordering. It's slower than list(set(items)) (takes 50-100% longer typically), but much faster than any other order-preserving solution
140+
all_translations = list(dict.fromkeys(all_translations))
140141
sexp = dumps(all_translations)
141142
cmd = f'(dictionary-overlay-choose-translate "{word}" \'{sexp})'
142143
await run_and_log(cmd)
@@ -181,77 +182,87 @@ async def jump_next_unknown_word(sentence: str, point: int):
181182
for token in tokens:
182183
begin = token[1][0] + 1
183184
if point < begin:
184-
cmd = "(goto-char {begin})".format(begin=begin)
185+
cmd = f"(goto-char {begin})"
185186
await run_and_log(cmd)
186187
break
187188

188189
async def jump_prev_unknown_word(sentence: str, point: int):
189190
tokens = await parse(sentence)
190-
# todo: write this with build-in 'any' function
191191
for token in reversed(tokens):
192192
begin = token[1][0] + 1
193193
if point > begin:
194-
cmd = "(goto-char {begin})".format(begin=begin)
194+
cmd = f"(goto-char {begin})"
195195
await run_and_log(cmd)
196196
break
197197

198-
async def web_translate(word: str) -> str:
198+
async def web_translate(word: str) -> list:
199+
'''translate word by web translator, crow or google'''
199200
try:
200201
if shutil.which("crow"):
201202
result = get_command_result(f'crow -t zh-CN --json -e {crow_engine} "{word}"')
202-
return json.loads(result)["translation"]
203-
else:
204-
import google_translate # type: ignore
205-
result = google_translate.translate(word, dst_lang='zh')
206-
return result["trans"][0]
203+
return [json.loads(result)["translation"]]
204+
import google_translate
205+
result = google_translate.translate(word, dst_lang='zh')
206+
return result["trans"]
207207
except ImportError:
208208
msg= f"[Dictionary-overlay]you do not have a network dictionary installed and the queried word [\"{word}\"] is not in the local dictionary, please install crow-translate or google-translate"
209209
print(msg)
210210
await bridge.message_to_emacs(msg)
211-
return ""
211+
return []
212212
except Exception as e:
213213
print (e)
214214
msg = "[Dictionary-overlay]web-translate error, check your network. or run (websocket-bridge-app-open-buffer 'dictionary-overlay) see the error details."
215215
await bridge.message_to_emacs(msg)
216-
return ""
216+
return []
217217

218-
def extract_translations(msg:str):
218+
def extract_translations(msg:str) -> list:
219219
'''extract translations by regex'''
220220
re_chinese_words = re.compile("[\u4e00-\u9fa5]+")
221221
return re.findall(re_chinese_words, msg)
222222

223-
def sdcv_translate(word:str):
223+
def sdcv_translate(word:str) -> list:
224224
'''translate word and stem by sdcv'''
225225
stem = snowball_stemmer.stemWord(word)
226226
translations = extract_translations(sdcv_dictionary.get(word))
227227
translations.extend(extract_translations(sdcv_dictionary.get(stem)))
228228
return translations
229229

230-
async def translate(word: str):
230+
def local_translate(word:str) -> list:
231+
'''translate word by local dictionary'''
232+
translation = dictionary.get(word)
233+
return [translation] if translation else []
234+
235+
async def translate_by_translator(word: str, translator: str) -> list:
236+
'''translate word by specified translator'''
237+
if translator == "local":
238+
local_translate(word)
239+
if translator == "sdcv":
240+
return sdcv_translate(word)
241+
if translator == "darwin":
242+
return macos_dictionary_translate(word)
243+
if translator == "web":
244+
return await web_translate(word)
245+
return []
246+
247+
async def translate(word: str) -> str:
231248
'''translate word.'''
232-
# default show the first translation in sdcv dictionary
233-
translations = sdcv_translate(word)
234-
if translations:
235-
return translations[0]
236-
return await web_translate(word)
249+
for translator in translators:
250+
translations = await translate_by_translator(word, translator)
251+
if translations:
252+
dictionary[word] = translations[0]
253+
return translations[0]
254+
return ""
237255

238256
async def render(message, buffer_name):
239257
'''call Emacs render message'''
240258
try:
241259
tokens = await parse(message)
242260
for token in tokens:
243261
word = token[0].lower()
244-
# first try find translation in local dictionary text
245-
# dictionary contains last translation
246-
chinese = dictionary.get(word, "")
247-
if chinese == "":
248-
# if first step find nothing, then try find translation on sdcv or web.
249-
chinese = await translate(word)
250-
dictionary[word] = chinese
251-
if chinese == "":
252-
# if find nothing, don't run render function in emacs.
253-
return
254-
await render_word(token, chinese, buffer_name)
262+
chinese = await translate(word)
263+
if chinese != "":
264+
# if find translation, render function in emacs.
265+
await render_word(token, chinese, buffer_name)
255266
except Exception as e:
256267
msg = "[Dictionary-overlay]Render buffer error. Run (websocket-bridge-app-open-buffer 'dictionary-overlay) see the error details"
257268
await bridge.message_to_emacs(msg)
@@ -276,9 +287,10 @@ async def main():
276287
await asyncio.gather(init(), bridge.start())
277288

278289
async def init():
279-
global dictionary_file_path, knownwords_file_path, unknownwords_file_path, known_words, unknown_words, crow_engine, dictionary
290+
global dictionary_file_path, knownwords_file_path, unknownwords_file_path, known_words, unknown_words, crow_engine, dictionary, translators
280291
crow_engine = await bridge.get_emacs_var("dictionary-overlay-crow-engine")
281292
crow_engine = crow_engine.strip('"')
293+
translators = json.loads(await bridge.get_emacs_var("dictionary-overlay-translators"))
282294
user_data_directory = await bridge.get_emacs_var("dictionary-overlay-user-data-directory")
283295
user_data_directory = os.path.expanduser(user_data_directory.strip('"'))
284296
dictionary_file_path = os.path.join(user_data_directory, "dictionary.json")
@@ -292,6 +304,7 @@ async def init():
292304
with open(unknownwords_file_path, "r", encoding="utf-8") as f: unknown_words= set(f.read().split())
293305

294306
def create_user_data_file_if_not_exist(path: str, content=None):
307+
'''create user data file if not exist'''
295308
if not os.path.exists(path):
296309
# Build parent directories when file is not exist.
297310
basedir = os.path.dirname(path)
@@ -304,4 +317,13 @@ def create_user_data_file_if_not_exist(path: str, content=None):
304317

305318
print(f"[dictionary-overlay] auto create user data file {path}")
306319

320+
def macos_dictionary_translate(word: str) -> list:
321+
'''using macos dictionary to translate word'''
322+
if platform == "darwin":
323+
import CoreServices
324+
translation_msg = CoreServices.DCSCopyTextDefinition(None, word, (0, len(word)))
325+
translation_msg = translation_msg if translation_msg else ""
326+
return extract_translations(translation_msg)
327+
return []
328+
307329
asyncio.run(main())

0 commit comments

Comments
 (0)