Skip to content
Permalink
Browse files
fix: default charset to UTF-8 for text/csv if not specified (#1423)
Some servers don't return the charset. This causes german
characters to be encoded incorrectly, since ISO_8859_1 does not
work very well in such cases defaulting to UTF-8 if its missing.

https://www.iana.org/assignments/media-types/text/csv

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/google-http-java-client/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [x] Ensure the tests and linter pass
- [x] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #1421  ☕️
  • Loading branch information
rohitvvv committed Aug 11, 2021
1 parent 41e54fa commit 26f3da4b6426625d0d88afdad525dbf99c65bc8b
@@ -534,6 +534,11 @@ public Charset getContentCharset() {
// https://tools.ietf.org/html/rfc4627 - JSON must be encoded with UTF-8
return StandardCharsets.UTF_8;
}
// fallback to well-kown charset for text/csv
if ("text".equals(mediaType.getType()) && "csv".equals(mediaType.getSubType())) {
// https://www.iana.org/assignments/media-types/text/csv - CSV must be encoded with UTF-8
return StandardCharsets.UTF_8;
}
}
return StandardCharsets.ISO_8859_1;
}
@@ -68,6 +68,7 @@ public void testParseAsString_none() throws Exception {
private static final String VALID_CONTENT_TYPE = "text/plain";
private static final String VALID_CONTENT_TYPE_WITH_PARAMS =
"application/vnd.com.google.datastore.entity+json; charset=utf-8; version=v1; q=0.9";
private static final String VALID_CONTENT_TYPE_WITHOUT_CHARSET = "text/csv; version=v1; q=0.9";
private static final String INVALID_CONTENT_TYPE = "!!!invalid!!!";
private static final String JSON_CONTENT_TYPE = "application/json";

@@ -194,6 +195,32 @@ public LowLevelHttpResponse execute() throws IOException {
assertEquals("ISO-8859-1", response.getContentCharset().name());
}

public void testParseAsString_validContentTypeWithoutCharSetWithParams() throws Exception {
HttpTransport transport =
new MockHttpTransport() {
@Override
public LowLevelHttpRequest buildRequest(String method, String url) throws IOException {
return new MockLowLevelHttpRequest() {
@Override
public LowLevelHttpResponse execute() throws IOException {
MockLowLevelHttpResponse result = new MockLowLevelHttpResponse();
result.setContent(SAMPLE2);
result.setContentType(VALID_CONTENT_TYPE_WITHOUT_CHARSET);
return result;
}
};
}
};
HttpRequest request =
transport.createRequestFactory().buildGetRequest(HttpTesting.SIMPLE_GENERIC_URL);

HttpResponse response = request.execute();
assertEquals(SAMPLE2, response.parseAsString());
assertEquals(VALID_CONTENT_TYPE_WITHOUT_CHARSET, response.getContentType());
assertNotNull(response.getMediaType());
assertEquals("UTF-8", response.getContentCharset().name());
}

public void testParseAsString_jsonContentType() throws IOException {
HttpTransport transport =
new MockHttpTransport() {

0 comments on commit 26f3da4

Please sign in to comment.