Skip to content
Permalink
Browse files Browse the repository at this point in the history
COMMON-3969: Explicit alphanumeric n-gram indexes
Explicitly build alphanumeric n-gram indexes to avoid XSS due
to unescaped characters in the rendered n-gram indexes JSON
  • Loading branch information
deadok22 committed Jul 20, 2020
1 parent 2125dd7 commit c0952a9
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 24 deletions.
Expand Up @@ -51,6 +51,11 @@ public String getName() {
return name;
}

/**
* @deprecated do not use this.
* Please implement your own tokenization of variable names, if needed.
*/
@Deprecated
public String[] getIndexableNames() {
return indexableNames;
}
Expand Down
Expand Up @@ -4,7 +4,11 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.common.collect.*;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.SetMultimap;
import com.google.common.collect.TreeMultimap;
import com.indeed.util.varexport.VarExporter;
import com.indeed.util.varexport.Variable;
import com.indeed.util.varexport.VariableHost;
Expand All @@ -14,6 +18,7 @@
import freemarker.template.Template;
import org.apache.log4j.Logger;

import javax.annotation.Nullable;
import javax.servlet.ServletConfig;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
Expand All @@ -25,7 +30,18 @@
import java.io.PrintWriter;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import java.util.stream.Stream;

/**
* Servlet for displaying variables exported by {@link VarExporter}.
Expand Down Expand Up @@ -287,8 +303,7 @@ public void visit(Variable var) {
}
root.put("vars", varList);
if (withIndex) {
final String varsIndex = buildIndex(varList);
root.put("varsIndex", varsIndex);
root.put("varsIndex", alphanumericNGramIndexesJSON(varList));
}

try {
Expand All @@ -298,19 +313,24 @@ public void visit(Variable var) {
}
}

private String buildIndex(final List<Variable> varList) {
final SetMultimap<String, Integer> uniGram = buildNGramIndex(varList, 1);
final SetMultimap<String, Integer> biGram = buildNGramIndex(varList, 2);
final SetMultimap<String, Integer> triGram = buildNGramIndex(varList, 3);
private String alphanumericNGramIndexesJSON(final List<Variable> varList) {
// The JSON we build here is valid as we only use alphanumeric n-grams

final StringBuilder json = new StringBuilder();

json.append('{').append('\n');
json.append("\"uniGram\":").append('\n');
appendTo(json, uniGram).append(',').append('\n');

json.append("\n\"uniGram\":").append('\n');
appendTo(json, buildAlphanumericNGramIndex(varList, 1)).append(',').append('\n');

json.append("\"biGram\":").append('\n');
appendTo(json, biGram).append(',').append('\n');;
appendTo(json, buildAlphanumericNGramIndex(varList, 2)).append(',').append('\n');;

json.append("\"triGram\":").append('\n');
appendTo(json, triGram);
appendTo(json, buildAlphanumericNGramIndex(varList, 3));

json.append('}');

return json.toString();
}

Expand All @@ -333,19 +353,32 @@ private <K> StringBuilder appendTo(final StringBuilder json, final SetMultimap<K
return json;
}

private SetMultimap<String, Integer> buildNGramIndex(final List<Variable> varList, final int n) {
final TreeMultimap<String, Integer> uniGram = TreeMultimap.create();
for (int index = 0; index < varList.size(); index++) {
final Variable var = varList.get(index);
final String[] indexableNames = var.getIndexableNames();
for (final String indexableName : indexableNames) {
for (int i = 0; i < indexableName.length() - n + 1; i++) {
final String key = indexableName.substring(i, i + n);
uniGram.put(key, index);
}
}
@VisibleForTesting
static SetMultimap<String, Integer> buildAlphanumericNGramIndex(final List<Variable> varList, final int n) {
final TreeMultimap<String, Integer> nGramIndex = TreeMultimap.create();
IntStream
.range(0, varList.size())
.forEach(i ->
alphanumericNGrams(varList.get(i).getName(), n)
.forEach(ngram -> nGramIndex.put(ngram, i))
);
return nGramIndex;
}

private static final Pattern NON_ALPHANUMERIC = Pattern.compile("[^a-z0-9]+");

@VisibleForTesting
static Stream<String> alphanumericNGrams(@Nullable final String in, final int n) {
if (in == null) {
return Stream.empty();
}
return uniGram;
return NON_ALPHANUMERIC
.splitAsStream(in.toLowerCase(Locale.US))
.flatMap(alnumSubString ->
IntStream
.rangeClosed(0, alnumSubString.length() - n)
.mapToObj(i -> alnumSubString.substring(i, i + n))
);
}

private void addVariable(final Variable v, final List<Variable> out) {
Expand Down
Expand Up @@ -2,6 +2,8 @@

import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.SetMultimap;
import com.google.common.collect.TreeMultimap;
import com.indeed.util.varexport.Export;
import com.indeed.util.varexport.ManagedVariable;
import com.indeed.util.varexport.VarExporter;
Expand All @@ -19,13 +21,16 @@
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

import static org.easymock.EasyMock.createMock;
import static org.easymock.EasyMock.expect;
import static org.easymock.EasyMock.expectLastCall;
import static org.easymock.EasyMock.replay;
import static org.easymock.EasyMock.verify;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;

Expand Down Expand Up @@ -164,6 +169,63 @@ public void escapesHtmlInVariableNamesAndValues() throws IOException {
assertFalse(bodyWriter.toString().contains(unescaped));
}

@Test
public void buildAlphaNumericNGramIndex() {
final TreeMultimap<String, Integer> expectedTriGramIndex = TreeMultimap.create();
expectedTriGramIndex.put("abc", 0);
expectedTriGramIndex.put("bcd", 0);
expectedTriGramIndex.put("bcd", 1);
expectedTriGramIndex.put("zzz", 2);

final SetMultimap<String, Integer> actualTriGramIndex = ViewExportedVariablesServlet.buildAlphanumericNGramIndex(
Arrays.asList(
ManagedVariable.builder().setName("abcd").build(),
ManagedVariable.builder().setName("bcd").build(),
ManagedVariable.builder().setName("zzz").build()
),
3
);

assertEquals(expectedTriGramIndex, actualTriGramIndex);
}

@Test
public void alphanumericNGrams() {
assertEquals(0, ViewExportedVariablesServlet.alphanumericNGrams(null, 1).count());
assertEquals(0, ViewExportedVariablesServlet.alphanumericNGrams("zzz", 42).count());

assertEquals(
Arrays.asList("m", "1", "3", "b", "i", "t", "r", "i"),
ViewExportedVariablesServlet
.alphanumericNGrams("m1-3-/bi<</tri", 1)
.collect(Collectors.toList())
);

assertEquals(
Arrays.asList("m1", "bi", "tr", "ri"),
ViewExportedVariablesServlet
.alphanumericNGrams("m1-3-/bi<</tri", 2)
.collect(Collectors.toList())
);

assertEquals(
Arrays.asList(
"m1x",
"1x3",
"x3d",
"3dc",
"dca",
"cas",
"ase",
"tri",
"tri"
),
ViewExportedVariablesServlet
.alphanumericNGrams("m1x3dCaSe-1-/bi<</tri?tri", 3)
.collect(Collectors.toList())
);
}

@Before
public void setUp() throws Exception {
VarExporter.resetGlobal();
Expand Down

0 comments on commit c0952a9

Please sign in to comment.