Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 157 lines (133 sloc) 4.856 kb
f43b096 first commit
Amir authored
1 #include "TranslateUTF8Accents.hh"
2 #include <iostream>
3 #include <iterator>
4 #include <algorithm>
5 #include <sstream>
6
7
8 string
9 TranslateUTF8Accents::utf8Encode(const string & w_src)
10 {
11 string dst;
12
13 dst.reserve(w_src.size() * 2);
14 for (const char *p = w_src.c_str(); *p; p++)
15 {
16 if ((unsigned) (*p) < 0x80)
17 {
18 dst += *p;
19 }
20 else if ((unsigned) (*p) < 0xC0)
21 {
22 char encoded = 0xC2;
23 dst += encoded;
24 dst += *p;
25 }
26 else
27 {
28 char encoded = 0xC3;
29 dst += encoded;
30 encoded = *p - 0x40;
31 dst += encoded;
32 }
33 }
34 dst.resize(dst.size());
35 return dst;
36 }
37
38
39 string
40 TranslateUTF8Accents::utf8Decode(const string & w_src)
41 {
42 string dst;
43
44 dst.reserve(w_src.size());
45 for (const char *p = w_src.c_str(); *p; p++)
46 {
47 if (*p == (char) (0xC2))
48 {
49 p++;
50 dst += *p;
51 }
52 else if (*p == (char) (0xC3))
53 {
54 p++;
55 char decoded = *p + 0x40;
56 dst += decoded;
57 }
58 else
59 {
60 dst += *p;
61 }
62 }
63 dst.resize(dst.size());
64 return dst;
65 }
66
67 TranslateUTF8Accents::TranslateUTF8Accents()
68 {
69 m_charactersMap.insert(make_pair(utf8Decode("À")[0], 'A'));
70 m_charactersMap.insert(make_pair(utf8Decode("Á")[0], 'A'));
71 m_charactersMap.insert(make_pair(utf8Decode("Â")[0], 'A'));
72 m_charactersMap.insert(make_pair(utf8Decode("Ã")[0], 'A'));
73 m_charactersMap.insert(make_pair(utf8Decode("Ä")[0], 'A'));
74 m_charactersMap.insert(make_pair(utf8Decode("Å")[0], 'A'));
75
76 m_charactersMap.insert(make_pair(utf8Decode("à")[0], 'a'));
77 m_charactersMap.insert(make_pair(utf8Decode("á")[0], 'a'));
78 m_charactersMap.insert(make_pair(utf8Decode("â")[0], 'a'));
79 m_charactersMap.insert(make_pair(utf8Decode("ã")[0], 'a'));
80 m_charactersMap.insert(make_pair(utf8Decode("ä")[0], 'a'));
81 m_charactersMap.insert(make_pair(utf8Decode("å")[0], 'a'));
82
83 m_charactersMap.insert(make_pair(utf8Decode("Ò")[0], 'O'));
84 m_charactersMap.insert(make_pair(utf8Decode("Ó")[0], 'O'));
85 m_charactersMap.insert(make_pair(utf8Decode("Ô")[0], 'O'));
86 m_charactersMap.insert(make_pair(utf8Decode("Õ")[0], 'O'));
87 m_charactersMap.insert(make_pair(utf8Decode("Ö")[0], 'O'));
88 m_charactersMap.insert(make_pair(utf8Decode("Ø")[0], 'O'));
89
90 m_charactersMap.insert(make_pair(utf8Decode("ò")[0], 'o'));
91 m_charactersMap.insert(make_pair(utf8Decode("ó")[0], 'o'));
92 m_charactersMap.insert(make_pair(utf8Decode("ô")[0], 'o'));
93 m_charactersMap.insert(make_pair(utf8Decode("õ")[0], 'o'));
94 m_charactersMap.insert(make_pair(utf8Decode("ö")[0], 'o'));
95 m_charactersMap.insert(make_pair(utf8Decode("ø")[0], 'o'));
96
97 m_charactersMap.insert(make_pair(utf8Decode("È")[0], 'E'));
98 m_charactersMap.insert(make_pair(utf8Decode("É")[0], 'E'));
99 m_charactersMap.insert(make_pair(utf8Decode("Ê")[0], 'E'));
100 m_charactersMap.insert(make_pair(utf8Decode("Ë")[0], 'E'));
101
102 m_charactersMap.insert(make_pair(utf8Decode("è")[0], 'e'));
103 m_charactersMap.insert(make_pair(utf8Decode("é")[0], 'e'));
104 m_charactersMap.insert(make_pair(utf8Decode("ê")[0], 'e'));
105 m_charactersMap.insert(make_pair(utf8Decode("ë")[0], 'e'));
106
107 m_charactersMap.insert(make_pair(utf8Decode("Ç")[0], 'C'));
108 m_charactersMap.insert(make_pair(utf8Decode("ç")[0], 'c'));
109
110 m_charactersMap.insert(make_pair(utf8Decode("Ì")[0], 'I'));
111 m_charactersMap.insert(make_pair(utf8Decode("Í")[0], 'I'));
112 m_charactersMap.insert(make_pair(utf8Decode("Î")[0], 'I'));
113 m_charactersMap.insert(make_pair(utf8Decode("Ï")[0], 'I'));
114
115 m_charactersMap.insert(make_pair(utf8Decode("ì")[0], 'i'));
116 m_charactersMap.insert(make_pair(utf8Decode("í")[0], 'i'));
117 m_charactersMap.insert(make_pair(utf8Decode("î")[0], 'i'));
118 m_charactersMap.insert(make_pair(utf8Decode("ï")[0], 'i'));
119
120 m_charactersMap.insert(make_pair(utf8Decode("Ù")[0], 'U'));
121 m_charactersMap.insert(make_pair(utf8Decode("Ú")[0], 'U'));
122 m_charactersMap.insert(make_pair(utf8Decode("Û")[0], 'U'));
123 m_charactersMap.insert(make_pair(utf8Decode("Ü")[0], 'U'));
124
125 m_charactersMap.insert(make_pair(utf8Decode("ù")[0], 'u'));
126 m_charactersMap.insert(make_pair(utf8Decode("ú")[0], 'u'));
127 m_charactersMap.insert(make_pair(utf8Decode("û")[0], 'u'));
128 m_charactersMap.insert(make_pair(utf8Decode("ü")[0], 'u'));
129
130 m_charactersMap.insert(make_pair(utf8Decode("ÿ")[0], 'y'));
131
132 m_charactersMap.insert(make_pair(utf8Decode("Ñ")[0], 'N'));
133
134 m_charactersMap.insert(make_pair(utf8Decode("ñ")[0], 'n'));
135 }
136
137 void
138 TranslateUTF8Accents::translate (char& p_input)
139 {
140 map<char, char>::const_iterator l_iter = m_charactersMap.find(p_input);
141 if(l_iter == m_charactersMap.end())
142 {
143 p_input = l_iter->second;
144 }
145 }
146
147 void
148 TranslateUTF8Accents::removeUTF8Accents (string& p_data)
149 {
150 p_data = utf8Decode(p_data);
151
152 for (uint32 c_char = 0; c_char < p_data.length(); ++c_char)
153 {
154 translate(p_data[c_char]);
155 }
156 }
Something went wrong with that request. Please try again.