44import os
55import re
66import shutil
7+ from sys import platform
78from threading import Timer
89
910import snowballstemmer
2425tokenizer = Tokenizer (BPE ())
2526pre_tokenizer = Whitespace ()
2627dictionary = {}
28+ translators = []
2729
2830def in_or_stem_in (word :str , words ) -> bool :
31+ '''Check a word or word stem in the word list'''
2932 return word in words or snowball_stemmer .stemWord (word ) in words
3033
3134async def parse (sentence : str ):
35+ '''parse the sentence'''
3236 only_unknown_words = await bridge .get_emacs_var (
3337 "dictionary-overlay-just-unknown-words"
3438 )
@@ -126,17 +130,14 @@ async def on_message(message):
126130 print (f"not fount handler for { cmd } " , flush = True )
127131
128132async def modify_translation (word : str ):
133+ "let the user to modify default translation"
129134 all_translations = []
130- # add all translations to make user select.
131- # add word translation on local dictionary.
132- all_translations .append (dictionary .get (word ))
133- # add word all translations on local sdcv dictionary
134- all_translations .extend (sdcv_translate (word ))
135- # add word web stranslation
136- result = await web_translate (word )
137- all_translations .append (result )
135+ for translator in translators :
136+ translations = await translate_by_translator (word , translator )
137+ all_translations .extend (translations )
138138 # remove duplicative translations
139- all_translations = list (set (all_translations ))
139+ # dict.fromkeys doesn't lose ordering. It's slower than list(set(items)) (takes 50-100% longer typically), but much faster than any other order-preserving solution
140+ all_translations = list (dict .fromkeys (all_translations ))
140141 sexp = dumps (all_translations )
141142 cmd = f'(dictionary-overlay-choose-translate "{ word } " \' { sexp } )'
142143 await run_and_log (cmd )
@@ -181,77 +182,87 @@ async def jump_next_unknown_word(sentence: str, point: int):
181182 for token in tokens :
182183 begin = token [1 ][0 ] + 1
183184 if point < begin :
184- cmd = "(goto-char {begin})" . format ( begin = begin )
185+ cmd = f "(goto-char { begin } )"
185186 await run_and_log (cmd )
186187 break
187188
188189async def jump_prev_unknown_word (sentence : str , point : int ):
189190 tokens = await parse (sentence )
190- # todo: write this with build-in 'any' function
191191 for token in reversed (tokens ):
192192 begin = token [1 ][0 ] + 1
193193 if point > begin :
194- cmd = "(goto-char {begin})" . format ( begin = begin )
194+ cmd = f "(goto-char { begin } )"
195195 await run_and_log (cmd )
196196 break
197197
198- async def web_translate (word : str ) -> str :
198+ async def web_translate (word : str ) -> list :
199+ '''translate word by web translator, crow or google'''
199200 try :
200201 if shutil .which ("crow" ):
201202 result = get_command_result (f'crow -t zh-CN --json -e { crow_engine } "{ word } "' )
202- return json .loads (result )["translation" ]
203- else :
204- import google_translate # type: ignore
205- result = google_translate .translate (word , dst_lang = 'zh' )
206- return result ["trans" ][0 ]
203+ return [json .loads (result )["translation" ]]
204+ import google_translate
205+ result = google_translate .translate (word , dst_lang = 'zh' )
206+ return result ["trans" ]
207207 except ImportError :
208208 msg = f"[Dictionary-overlay]you do not have a network dictionary installed and the queried word [\" { word } \" ] is not in the local dictionary, please install crow-translate or google-translate"
209209 print (msg )
210210 await bridge .message_to_emacs (msg )
211- return ""
211+ return []
212212 except Exception as e :
213213 print (e )
214214 msg = "[Dictionary-overlay]web-translate error, check your network. or run (websocket-bridge-app-open-buffer 'dictionary-overlay) see the error details."
215215 await bridge .message_to_emacs (msg )
216- return ""
216+ return []
217217
218- def extract_translations (msg :str ):
218+ def extract_translations (msg :str ) -> list :
219219 '''extract translations by regex'''
220220 re_chinese_words = re .compile ("[\u4e00 -\u9fa5 ]+" )
221221 return re .findall (re_chinese_words , msg )
222222
223- def sdcv_translate (word :str ):
223+ def sdcv_translate (word :str ) -> list :
224224 '''translate word and stem by sdcv'''
225225 stem = snowball_stemmer .stemWord (word )
226226 translations = extract_translations (sdcv_dictionary .get (word ))
227227 translations .extend (extract_translations (sdcv_dictionary .get (stem )))
228228 return translations
229229
230- async def translate (word : str ):
230+ def local_translate (word :str ) -> list :
231+ '''translate word by local dictionary'''
232+ translation = dictionary .get (word )
233+ return [translation ] if translation else []
234+
235+ async def translate_by_translator (word : str , translator : str ) -> list :
236+ '''translate word by specified translator'''
237+ if translator == "local" :
238+ local_translate (word )
239+ if translator == "sdcv" :
240+ return sdcv_translate (word )
241+ if translator == "darwin" :
242+ return macos_dictionary_translate (word )
243+ if translator == "web" :
244+ return await web_translate (word )
245+ return []
246+
247+ async def translate (word : str ) -> str :
231248 '''translate word.'''
232- # default show the first translation in sdcv dictionary
233- translations = sdcv_translate (word )
234- if translations :
235- return translations [0 ]
236- return await web_translate (word )
249+ for translator in translators :
250+ translations = await translate_by_translator (word , translator )
251+ if translations :
252+ dictionary [word ] = translations [0 ]
253+ return translations [0 ]
254+ return ""
237255
238256async def render (message , buffer_name ):
239257 '''call Emacs render message'''
240258 try :
241259 tokens = await parse (message )
242260 for token in tokens :
243261 word = token [0 ].lower ()
244- # first try find translation in local dictionary text
245- # dictionary contains last translation
246- chinese = dictionary .get (word , "" )
247- if chinese == "" :
248- # if first step find nothing, then try find translation on sdcv or web.
249- chinese = await translate (word )
250- dictionary [word ] = chinese
251- if chinese == "" :
252- # if find nothing, don't run render function in emacs.
253- return
254- await render_word (token , chinese , buffer_name )
262+ chinese = await translate (word )
263+ if chinese != "" :
264+ # if find translation, render function in emacs.
265+ await render_word (token , chinese , buffer_name )
255266 except Exception as e :
256267 msg = "[Dictionary-overlay]Render buffer error. Run (websocket-bridge-app-open-buffer 'dictionary-overlay) see the error details"
257268 await bridge .message_to_emacs (msg )
@@ -276,9 +287,10 @@ async def main():
276287 await asyncio .gather (init (), bridge .start ())
277288
278289async def init ():
279- global dictionary_file_path , knownwords_file_path , unknownwords_file_path , known_words , unknown_words , crow_engine , dictionary
290+ global dictionary_file_path , knownwords_file_path , unknownwords_file_path , known_words , unknown_words , crow_engine , dictionary , translators
280291 crow_engine = await bridge .get_emacs_var ("dictionary-overlay-crow-engine" )
281292 crow_engine = crow_engine .strip ('"' )
293+ translators = json .loads (await bridge .get_emacs_var ("dictionary-overlay-translators" ))
282294 user_data_directory = await bridge .get_emacs_var ("dictionary-overlay-user-data-directory" )
283295 user_data_directory = os .path .expanduser (user_data_directory .strip ('"' ))
284296 dictionary_file_path = os .path .join (user_data_directory , "dictionary.json" )
@@ -292,6 +304,7 @@ async def init():
292304 with open (unknownwords_file_path , "r" , encoding = "utf-8" ) as f : unknown_words = set (f .read ().split ())
293305
294306def create_user_data_file_if_not_exist (path : str , content = None ):
307+ '''create user data file if not exist'''
295308 if not os .path .exists (path ):
296309 # Build parent directories when file is not exist.
297310 basedir = os .path .dirname (path )
@@ -304,4 +317,13 @@ def create_user_data_file_if_not_exist(path: str, content=None):
304317
305318 print (f"[dictionary-overlay] auto create user data file { path } " )
306319
320+ def macos_dictionary_translate (word : str ) -> list :
321+ '''using macos dictionary to translate word'''
322+ if platform == "darwin" :
323+ import CoreServices
324+ translation_msg = CoreServices .DCSCopyTextDefinition (None , word , (0 , len (word )))
325+ translation_msg = translation_msg if translation_msg else ""
326+ return extract_translations (translation_msg )
327+ return []
328+
307329asyncio .run (main ())
0 commit comments