-
Notifications
You must be signed in to change notification settings - Fork 1
/
LatinMapper.java
97 lines (85 loc) · 2.93 KB
/
LatinMapper.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package com.github.alternet.demo.countries.client;
import java.util.Iterator;
import com.github.alternet.demo.countries.generator.LatinMapperGenerator;
import com.google.gwt.core.client.GWT;
import com.google.gwt.json.client.JSONObject;
import com.google.gwt.json.client.JSONValue;
/**
* Map diacritics and ligatures to latin characters.
*
* @author Philippe Poulard
*/
public abstract class LatinMapper {
/**
* Remove diacritics and ligatures of a string.
*
* @param string The string to latinize.
*
* @return A new latin string.
*/
public static String latinize(String string) {
StringBuilder result = new StringBuilder();
for (int codePoint : unicodeCodePoints(string)) {
String alt = new String(Character.toChars(codePoint));
JSONValue val = MAP.get(alt);
if (val != null) {
alt = val.isString().stringValue();
}
result.append(alt);
}
return result.toString();
}
private static JSONObject MAP;
static {
LatinMapper mapper = GWT.create(LatinMapper.class);
MAP = mapper.map().isObject();
}
/**
* The default implementation is just a template
* and will be replaced by the generator that create
* a child class with the complete mapping.
*
* @return The map of characters to replace.
*
* @see LatinMapperGenerator
*/
protected native JSONValue map() /*-{
var o = {
'Á': 'A', // LATIN CAPITAL LETTER A WITH ACUTE
'Ă': 'A', // LATIN CAPITAL LETTER A WITH BREVE
// ...
'ᵥ': 'v', // LATIN SUBSCRIPT SMALL LETTER V
'ₓ': 'x', // LATIN SUBSCRIPT SMALL LETTER X
};
return @com.google.gwt.json.client.JSONObject::new(Lcom/google/gwt/core/client/JavaScriptObject;)(o);
}-*/;
/**
* Iterate on the unicode code points of a string (a code point is made of 1
* or 2 chars).
*
* @param string
* The actual non-null string.
* @return An iterator on its unicode code points.
*/
private static Iterable<Integer> unicodeCodePoints(final String string) {
return new Iterable<Integer>() {
String text = string;
public Iterator<Integer> iterator() {
return new Iterator<Integer>() {
int nextIndex = 0;
public boolean hasNext() {
return nextIndex < text.length();
}
public Integer next() {
int result = text.codePointAt(nextIndex);
nextIndex += Character.charCount(result);
return result;
}
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
}
}