/
encoding.go
111 lines (105 loc) · 4.31 KB
/
encoding.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT
package linux
import (
"golang.org/x/net/html/charset"
"golang.org/x/text/encoding/ianaindex"
)
// This conversion map is based on
// key: https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/AgentReference.html#agent-configuration-file
// value:https://github.com/golang/text/blob/master/encoding/htmlindex/tables.go#L52-L91
// mapping: https://docs.python.org/3/library/codecs.html#standard-encodings
// https://github.com/golang/text/blob/master/encoding/htmlindex/tables.go#L95-L312
// Entries are removed if charset.Lookup works
var nameMap = map[string]string{
"big5hkscs": "big5",
"cp424": "iso-8859-8",
"cp500": "", //Western Europe, https://en.wikipedia.org/wiki/EBCDIC_500
"cp720": "iso-8859-6",
"cp737": "iso-8859-7",
"cp775": "iso-8859-13",
"cp856": "", //Hebrew, https://en.wikipedia.org/wiki/Code_page_856
"cp857": "", //Turkish, https://en.wikipedia.org/wiki/Code_page_857
"cp858": "", //Western Europe, https://en.wikipedia.org/wiki/Code_page_858
"cp861": "", //Icelandic, https://en.wikipedia.org/wiki/Code_page_861
"cp864": "", //Arabic, https://en.wikipedia.org/wiki/Code_page_864
"cp869": "", //Greek, https://en.wikipedia.org/wiki/Code_page_869
"cp874": "windows-874",
"cp875": "", //Greek,https://en.wikipedia.org/wiki/Code_page_875
"cp932": "", //Japanese, https://en.wikipedia.org/wiki/Code_page_932_(Microsoft_Windows)
"cp949": "", //Korean, https://en.wikipedia.org/wiki/Unified_Hangul_Code
"cp950": "", //Traditional Chinese ,https://en.wikipedia.org/wiki/Code_page_950
"cp1006": "", //Urdu
"cp1026": "", //Turkish
"cp1140": "", //Western Europe
"euc_jp": "euc-jp",
"euc_jis_2004": "", //Japanese, https://en.wikipedia.org/wiki/Extended_Unix_Code#EUC-JP
"euc_jisx0213": "", //Japanese, https://en.wikipedia.org/wiki/Extended_Unix_Code#EUC-JP
"euc_kr": "euc-kr",
"hz": "gbk",
"iso2022_jp": "", //Japanese
"iso2022_jp_1": "", //Japanese
"iso2022_jp_2": "", //Japanese, Korean, Simplified Chinese, Western Europe, Greek
"iso2022_jp_2004": "", //Japanese
"iso2022_jp_3": "", //Japanese
"iso2022_jp_ext": "", //Japanese
"iso2022_kr": "", //Korean
"latin_1": "windows-1252",
"iso8859_2": "iso-8859-2",
"iso8859_3": "iso-8859-3",
"iso8859_4": "iso-8859-4",
"iso8859_5": "iso-8859-5",
"iso8859_6": "iso-8859-6",
"iso8859_7": "iso-8859-7",
"iso8859_8": "iso-8859-8",
"iso8859_9": "windows-1254",
"iso8859_10": "iso-8859-10",
"iso8859_13": "iso-8859-13",
"iso8859_14": "iso-8859-14",
"iso8859_15": "iso-8859-15",
"iso8859_16": "iso-8859-16",
"johab": "", //Korean
"koi8_u": "koi8-u",
"mac_cyrillic": "x-mac-cyrillic",
"mac_greek": "", //Greek
"mac_iceland": "", //Icelandic
"mac_latin2": "", //Central and Eastern Europe
"mac_roman": "macintosh",
"mac_turkish": "", //Turkish
"ptcp154": "", //Kazakh
"shift_jis_2004": "", //Japanese, https://en.wikipedia.org/wiki/Shift_JIS#Shift_JISx0213_and_Shift_JIS-2004
"shift_jisx0213": "", //Japanese, https://en.wikipedia.org/wiki/Shift_JIS#Shift_JISx0213_and_Shift_JIS-2004
"utf_32": "", //not support
"utf_32_be": "", //not support
"utf_32_le": "", //not support
"utf_16": "utf-16le",
"utf_16_be": "utf-16be",
"utf_16_le": "utf-16le",
"utf_7": "", //not support
"utf_8": "utf-8",
"utf_8_sig": "", //not support
}
func NormalizeEncoding(encoding string) string {
if name := normalizeByLib(encoding); name != "" {
return name
}
if name, ok := nameMap[encoding]; ok {
//do another check in case the value in our override map not support by lib any more.
if name = normalizeByLib(name); name != "" {
return name
}
}
return ""
}
func normalizeByLib(encoding string) string {
_, name := charset.Lookup(encoding)
if name != "" {
return name
}
if enc, err := ianaindex.IANA.Encoding(encoding); err == nil {
if name, err = ianaindex.IANA.Name(enc); err == nil {
return name
}
}
return ""
}