Skip to content

Commit

Permalink
[7.12] URI parts processor handles URLs containing spaces (#71630)
Browse files Browse the repository at this point in the history
  • Loading branch information
danhermann committed Apr 13, 2021
1 parent 9c5b056 commit e339c57
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@

package org.elasticsearch.xpack.ingest;

import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.ingest.AbstractProcessor;
import org.elasticsearch.ingest.ConfigurationUtils;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;

Expand Down Expand Up @@ -54,36 +57,79 @@ public boolean getKeepOriginal() {
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
String value = ingestDocument.getFieldValue(field, String.class);

URI uri;
URI uri = null;
URL url = null;
try {
uri = new URI(value);
} catch (URISyntaxException e) {
throw new IllegalArgumentException("unable to parse URI [" + value + "]");
}
Map<String, Object> uriParts = new HashMap<String, Object>();
uriParts.put("domain", uri.getHost());
if (uri.getFragment() != null) {
uriParts.put("fragment", uri.getFragment());
try {
url = new URL(value);
} catch (MalformedURLException e2) {
throw new IllegalArgumentException("unable to parse URI [" + value + "]");
}
}
Map<String, Object> uriParts = getUriParts(uri, url);
if (keepOriginal) {
uriParts.put("original", value);
}
final String path = uri.getPath();

if (removeIfSuccessful && targetField.equals(field) == false) {
ingestDocument.removeField(field);
}
ingestDocument.setFieldValue(targetField, uriParts);
return ingestDocument;
}

@SuppressForbidden(reason = "URL.getPath is used only if URI.getPath is unavailable")
private static Map<String, Object> getUriParts(URI uri, URL fallbackUrl) {
Map<String, Object> uriParts = new HashMap<String, Object>();
String domain;
String fragment;
String path;
int port;
String query;
String scheme;
String userInfo;

if (uri != null) {
domain = uri.getHost();
fragment = uri.getFragment();
path = uri.getPath();
port = uri.getPort();
query = uri.getQuery();
scheme = uri.getScheme();
userInfo = uri.getUserInfo();
} else if (fallbackUrl != null) {
domain = fallbackUrl.getHost();
fragment = fallbackUrl.getRef();
path = fallbackUrl.getPath();
port = fallbackUrl.getPort();
query = fallbackUrl.getQuery();
scheme = fallbackUrl.getProtocol();
userInfo = fallbackUrl.getUserInfo();
} else {
// should never occur during processor execution
throw new IllegalArgumentException("at least one argument must be non-null");
}

uriParts.put("domain", domain);
if (fragment != null) {
uriParts.put("fragment", fragment);
}
if (path != null) {
uriParts.put("path", path);
if (path.contains(".")) {
int periodIndex = path.lastIndexOf('.');
uriParts.put("extension", periodIndex < path.length() ? path.substring(periodIndex + 1) : "");
}
}
if (uri.getPort() != -1) {
uriParts.put("port", uri.getPort());
if (port != -1) {
uriParts.put("port", port);
}
if (uri.getQuery() != null) {
uriParts.put("query", uri.getQuery());
if (query != null) {
uriParts.put("query", query);
}
uriParts.put("scheme", uri.getScheme());
final String userInfo = uri.getUserInfo();
uriParts.put("scheme", scheme);
if (userInfo != null) {
uriParts.put("user_info", userInfo);
if (userInfo.contains(":")) {
Expand All @@ -93,11 +139,7 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
}
}

if (removeIfSuccessful && targetField.equals(field) == false) {
ingestDocument.removeField(field);
}
ingestDocument.setFieldValue(targetField, uriParts);
return ingestDocument;
return uriParts;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,37 @@ public void testUriParts() throws Exception {
);
}

public void testUrlWithCharactersNotToleratedByUri() throws Exception {
testUriParsing(
"http://www.google.com/path with spaces",
Map.of("scheme", "http", "domain", "www.google.com", "path", "/path with spaces")
);

testUriParsing(
"https://user:pw@testing.google.com:8080/foo with space/bar?foo1=bar1&foo2=bar2#anchorVal",
Map.of(
"scheme",
"https",
"domain",
"testing.google.com",
"fragment",
"anchorVal",
"path",
"/foo with space/bar",
"port",
8080,
"username",
"user",
"password",
"pw",
"user_info",
"user:pw",
"query",
"foo1=bar1&foo2=bar2"
)
);
}

public void testRemoveIfSuccessfulDoesNotRemoveTargetField() throws Exception {
String field = "field";
UriPartsProcessor processor = new UriPartsProcessor(null, null, field, field, true, false);
Expand Down

0 comments on commit e339c57

Please sign in to comment.