Skip to content
Browse files
Fixed garbled commit messages on Windows
On windows changelog commit messages with unicode characters are
not saved correctly to changelog.xml when CliGitAPI
implementation is in use.

That happens because "git whatchanged" gives byte stream of data.
Commit messages in that stream are encoded in UTF-8.  It is
necessary to explicitly decode bytestream to strings using UTF-8
encoding, otherwise default system encoding will be used.

This should fix issues:
  • Loading branch information
gtrafimenkov committed Dec 15, 2015
1 parent ae68a85 commit c99c91fcf497e784204398761be5c10f438d0e55
Showing 4 changed files with 73 additions and 1 deletion.
@@ -39,6 +39,7 @@
import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
@@ -780,7 +781,11 @@ public void execute() throws GitException, InterruptedException {
if (out==null) throw new IllegalStateException();

try {
WriterOutputStream w = new WriterOutputStream(out);
// "git whatchanged" std output gives us byte stream of data
// Commit messages in that byte stream are UTF-8 encoded.
// We want to decode bytestream to strings using UTF-8 encoding.

WriterOutputStream w = new WriterOutputStream(out, Charset.forName("UTF-8"));
try {
if (launcher.launch().cmds(args).envs(environment).stdout(w).stderr(listener.getLogger()).pwd(workspace).join() != 0)
throw new GitException("Error launching git whatchanged");
@@ -3635,4 +3635,32 @@ public void test_git_ssh_executable_found_on_windows() throws Exception {
protected abstract String getRemoteBranchPrefix();

* Test parsing of changelog with unicode characters in commit messages.
public void test_unicodeCharsInChangelog() throws Exception {

// Test for

File tempRemoteDir = temporaryDirectoryAllocator.allocate();
extract(new ZipFile("src/test/resources/"), tempRemoteDir);
File pathToTempRepo = new File(tempRemoteDir, "unicodeCharsInChangelogRepo");
w = clone(pathToTempRepo.getAbsolutePath());

// w.git.changelog gives us strings
// We want to collect all the strings and check that unicode characters are still there.

StringWriter sw = new StringWriter();
w.git.changelog("v0", "vLast", sw);
String content = sw.toString();

assertTrue(content.contains("hello in English: hello"));
assertTrue(content.contains("hello in Russian: \u043F\u0440\u0438\u0432\u0435\u0442 (priv\u00E9t)"));
assertTrue(content.contains("hello in Chinese: \u4F60\u597D (n\u01D0 h\u01CEo)"));
assertTrue(content.contains("hello in French: \u00C7a va ?"));
assertTrue(content.contains("goodbye in German: Tsch\u00FCss"));
Binary file not shown.
@@ -0,0 +1,39 @@

rm -rf unicodeCharsInChangelogRepo

set -e

mkdir unicodeCharsInChangelogRepo
pushd unicodeCharsInChangelogRepo
git init
git commit --allow-empty -m "Initial empty commit"

git tag v0

touch 111.txt
git add .
git commit -m "hello in English: hello"

touch 222.txt
git add .
git commit -m "hello in Russian: привет (privét)"

touch 333.txt
git add .
git commit -m "hello in Chinese: 你好 (nǐ hǎo)"

touch 444.txt
git add .
git commit -m "hello in French: Ça va ?"

touch 555.txt
git add .
git commit -m "goodbye in German: Tschüss"
git tag vLast


zip -r unicodeCharsInChangelogRepo

rm -rf unicodeCharsInChangelogRepo

0 comments on commit c99c91f

Please sign in to comment.