/
plugin_google_translate.ml
194 lines (183 loc) · 4.86 KB
/
plugin_google_translate.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
(*
* (c) 2008-2009 Anastasia Gornostaeva <ermine@ermine.pp.ru>
*)
open Nethtml
open Netconversion
open Pcre
open Common
open Hooks
open Plugin_command
open Http_suck
let slist = [
"auto", "Detect language";
"ar", "Arabic";
"bg", "Bulgarian";
"ca", "Catalan";
"zh-CN", "Chinese";
"hr", "Croatian";
"cs", "Czech";
"da", "Danish";
"nl", "Dutch";
"en", "English";
"tl", "Filipino";
"fi", "Finnish";
"fr", "French";
"de", "German";
"el", "Greek";
"iw", "Hebrew";
"hi", "Hindi";
"id", "Indonesian";
"it", "Italian";
"ja", "Japanese";
"ko", "Korean";
"lv", "Latvian";
"lt", "Lithuanian";
"no", "Norwegian";
"pl", "Polish";
"pt", "Portuguese";
"ro", "Romanian";
"ru", "Russian";
"sr", "Serbian";
"sk", "Slovak";
"sl", "Slovenian";
"es", "Spanish";
"sv", "Swedish";
"uk", "Ukrainian";
"vi", "Vietnamese";
]
let tlist = [
"ar", "Arabic";
"bg", "Bulgarian";
"ca", "Catalan";
"zh-CN", "Chinese (Simplified)";
"zh-TW", "Chinese (Traditional)";
"hr", "Croatian";
"cs", "Czech";
"da", "Danish";
"nl", "Dutch";
"en", "English";
"tl", "Filipino";
"fi", "Finnish";
"fr", "French";
"de", "German";
"el", "Greek";
"iw", "Hebrew";
"hi", "Hindi";
"id", "Indonesian";
"it", "Italian";
"ja", "Japanese";
"ko", "Korean";
"lv", "Latvian";
"lt", "Lithuanian";
"no", "Norwegian";
"pl", "Polish";
"pt", "Portuguese";
"ro", "Romanian";
"ru", "Russian";
"sr", "Serbian";
"sk", "Slovak";
"sl", "Slovenian";
"es", "Spanish";
"sv", "Swedish";
"uk", "Ukrainian";
"vi", "Vietnamese";
]
let list_languages =
"\n" ^ String.concat "\n" (List.map (fun (a, b) -> a ^ "\t" ^ b) slist)
let process_result doc =
let get_data els =
let rec aux_get_data acc = function
| [] -> acc
| x :: xs ->
match x with
| Data data -> aux_get_data (acc ^ data) xs
| Element (tag, _, _) ->
if tag = "br" then
aux_get_data (acc ^ "\n") xs
else
aux_get_data acc xs
in
aux_get_data "" els
in
let rec aux_find = function
| [] -> None
| x :: xs ->
match x with
| Element (tag, attrs, els1) ->
if tag = "div" &&
(try List.assoc "id" attrs with Not_found -> "") =
"result_box" then
Some (get_data els1)
else (
match aux_find els1 with
| None -> aux_find xs
| Some v -> Some v
)
| Data _ ->
aux_find xs
in
aux_find doc
let translate_text sl tl text xmpp env kind jid_from =
let callback data =
let resp = match data with
| OK (_media, charset, content) -> (
try
let enc =
match charset with
| None -> `Enc_iso88591
| Some v ->
encoding_of_string v
in
let data =
if enc <> `Enc_utf8 then
convert ~in_enc:enc ~out_enc:`Enc_utf8 content
else
content
in
let ch = new Netchannels.input_string data in
let doc = parse ~dtd:relaxed_html40_dtd ~return_declarations:false
~return_pis:false ~return_comments:false ch in
match process_result doc with
| None -> ""
| Some v -> v
with _exn ->
Lang.get_msg env.env_lang "plugin_google_translate_not_parsed" []
)
| Exception _exn ->
Lang.get_msg env.env_lang "plugin_google_translate_server_error" []
in
env.env_message xmpp kind jid_from resp
in
let url = "http://translate.google.com/translate_t" in
let data = Printf.sprintf "langpair=%s|%s&ie=UTF8&oe=UTF8&text=%s"
sl tl (Netencoding.Url.encode (Xml.decode text))
in
Http_suck.http_post url [] data callback
(* gtr er ru text *)
let cmd = Pcre.regexp ~flags:[`DOTALL; `UTF8]
"^([a-zA-Z-]{2,5})\\s+([a-zA-Z-]{2,5})\\s(.+)"
let rm_newlines = Pcre.regexp "[\n\r]"
let translate xmpp env kind jid_from text =
match trim(text) with
| "list" ->
env.env_message xmpp kind jid_from list_languages
| _ ->
try
let res = exec ~rex:cmd text in
let lg1 = get_substring res 1 in
let lg2 = get_substring res 2 in
let str = get_substring res 3 in
if List.mem_assoc lg1 slist then
if List.mem_assoc lg2 tlist then
translate_text lg1 lg2 str xmpp env kind jid_from
else
raise Not_found
else
raise Not_found
with Not_found ->
env.env_message xmpp kind jid_from
(Lang.get_msg env.env_lang "plugin_google_translate_bad_syntax" [])
let plugin opts =
()
let _ =
add_plugin "google_translate" plugin