Skip to content

Commit

Permalink
Support unicode escape with \uXXXX format in Android strings (#1006)
Browse files Browse the repository at this point in the history
As per doc https://developer.android.com/guide/topics/resources/string-resource#FormattingAndStyling

Note on the output, whatever is used as input: \u00A0,  ,   or   the output will be  
  • Loading branch information
aurambaj committed Mar 6, 2024
1 parent b6005e5 commit 44f04c9
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ String unescape(String sourceString) {
String unescapedSourceString;

unescapedSourceString = sourceString.trim();
unescapedSourceString = unescapeUtils.replaceEscapedUnicode(unescapedSourceString);

if (StringUtils.startsWith(unescapedSourceString, "\"")
&& StringUtils.endsWith(unescapedSourceString, "\"")) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public class UnescapeUtils {
private static final Pattern ESCAPED_QUOTES = Pattern.compile("\\\\(\"|')");
private static final Pattern ESCAPED_BACKQUOTES = Pattern.compile("\\\\(`)");
private static final Pattern ESCAPED_CHARACTERS = Pattern.compile("\\\\(.)?");
private static final Pattern ESCAPED_UNICODE = Pattern.compile("\\\\u([0-9a-fA-F]{4})");
private static final Pattern SPACES = Pattern.compile("\\s+");
private static final Pattern LINE_FEED = Pattern.compile("\n");

Expand Down Expand Up @@ -63,10 +64,25 @@ String replaceEscapedBackquotes(String text) {
return ESCAPED_BACKQUOTES.matcher(text).replaceAll("$1");
}

/**
* Replace unicode escape character of the form \\uXXXX.
*
* <p>Must be call before calling other method that would unescape the "u" letter like {@link
* #replaceEscapedCharacters(String)} (String)}
*
* @param text
* @return
*/
String replaceEscapedUnicode(String text) {
return ESCAPED_UNICODE
.matcher(text)
.replaceAll(match -> new String(Character.toChars(Integer.parseInt(match.group(1), 16))));
}

/**
* Replace other escape character with the character itself.
*
* <p>Must be call after replacing espace sequence that need a different treatment like {@link
* <p>Must be call after replacing escape sequence that need a different treatment like {@link
* #replaceEscapedLineFeed(String)}
*
* @param text
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ public void testUnescaping() {

// multi lines and spaces
testUnescaping("\n line1 \n line2 \n", "line1 line2");

// unicode escape
var str = "Unicode\\u00A0escape";
testUnescaping("Unicode\\u00A0escape", "Unicode\u00A0escape");
}

void testUnescaping(String input, String expected) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1699,7 +1699,79 @@ public void testLocalizeAndroidCommentWithTranslatableFalse() throws Exception {
InheritanceMode.USE_PARENT,
null);
logger.debug("localized=\n{}", localizedAsset);
assertEquals(localizedAsset, localizedAsset);
assertEquals(assetContent, localizedAsset);
}

@Test
public void testLocalizeAndroidUnicodeEscape() throws Exception {

Repository repo = repositoryService.createRepository(testIdWatcher.getEntityName("repository"));
RepositoryLocale repoLocale = repositoryService.addRepositoryLocale(repo, "en-GB");

String assetContent =
"""
<?xml version="1.0" encoding="UTF-8"?>
<resources>
<!-- Test Unicode Escapes -->
<string name="unicode_escape">A string with\\u00A0Unicode Escape</string>
<string name="unicode_escape2">A string with&#x00a0;Unicode Escape</string>
<string name="unicode_escape3">A string with&#160;Unicode Escape</string>
<string name="unicode_escape4">A string with&#xa0;Unicode Escape</string>
</resources>""";

asset =
assetService.createAssetWithContent(repo.getId(), "res/values/strings.xml", assetContent);
asset = assetRepository.findById(asset.getId()).orElse(null);
assetId = asset.getId();
tmId = repo.getTm().getId();

PollableFuture<Asset> assetResult =
assetService.addOrUpdateAssetAndProcessIfNeeded(
repo.getId(), asset.getPath(), assetContent, false, null, null, null, null, null, null);
try {
pollableTaskService.waitForPollableTask(assetResult.getPollableTask().getId());
} catch (PollableTaskException | InterruptedException e) {
throw new RuntimeException(e);
}
assetResult.get();

TextUnitSearcherParameters textUnitSearcherParameters = new TextUnitSearcherParameters();
textUnitSearcherParameters.setRepositoryIds(repo.getId());
textUnitSearcherParameters.setStatusFilter(StatusFilter.FOR_TRANSLATION);
List<TextUnitDTO> textUnitDTOs = textUnitSearcher.search(textUnitSearcherParameters);
for (TextUnitDTO textUnitDTO : textUnitDTOs) {
logger.debug("comment=[{}]", textUnitDTO.getComment());
}
assertEquals(4, textUnitDTOs.size());
assertEquals("A string with\u00A0Unicode Escape", textUnitDTOs.get(0).getSource());
assertEquals("Test Unicode Escapes", textUnitDTOs.get(0).getComment());
assertEquals("A string with\u00A0Unicode Escape", textUnitDTOs.get(1).getSource());
assertEquals("A string with\u00A0Unicode Escape", textUnitDTOs.get(2).getSource());
assertEquals("A string with\u00A0Unicode Escape", textUnitDTOs.get(3).getSource());

String localizedAsset =
tmService.generateLocalized(
asset,
assetContent,
repoLocale,
"en-GB",
null,
null,
Status.ALL,
InheritanceMode.USE_PARENT,
null);
logger.error("localized=\n{}", localizedAsset);
String expectedLocalizedAsset =
"""
<?xml version="1.0" encoding="UTF-8"?>
<resources>
<!-- Test Unicode Escapes -->
<string name="unicode_escape">A string with&#x00a0;Unicode Escape</string>
<string name="unicode_escape2">A string with&#x00a0;Unicode Escape</string>
<string name="unicode_escape3">A string with&#x00a0;Unicode Escape</string>
<string name="unicode_escape4">A string with&#x00a0;Unicode Escape</string>
</resources>""";
assertEquals(expectedLocalizedAsset, localizedAsset);
}

@Test
Expand Down Expand Up @@ -2042,7 +2114,7 @@ public void testLocalizeAndroidStringsRemoveUntranslatedSingleItem() throws Exce
InheritanceMode.REMOVE_UNTRANSLATED,
null);
logger.debug("localized=\n{}", localizedAsset);
assertEquals(localizedAsset, localizedAsset);
assertEquals(expectedLocalized, localizedAsset);
}

/**
Expand Down

0 comments on commit 44f04c9

Please sign in to comment.