Skip to content

Commit

Permalink
Fixed field-name duplicates in XML
Browse files Browse the repository at this point in the history
  • Loading branch information
Thiruvalluvan M g committed May 22, 2023
1 parent 35fc4bd commit 84c43d7
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 2 deletions.
5 changes: 3 additions & 2 deletions solr/core/src/java/org/apache/solr/schema/IndexSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.XML;
import org.apache.solr.core.ConfigSetService;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader;
Expand Down Expand Up @@ -698,9 +699,9 @@ protected synchronized Map<String, Boolean> loadFields(ConfigNode n) {
}

for (ConfigNode node : nodes) {
String name = DOMUtil.getAttr(node, NAME, "field definition");
String name = XML.unescapeAttributeValue(DOMUtil.getAttr(node, NAME, "field definition"));
log.trace("reading field def {}", name);
String type = DOMUtil.getAttr(node, TYPE, "field " + name);
String type = XML.unescapeAttributeValue(DOMUtil.getAttr(node, TYPE, "field " + name));

FieldType ft = fieldTypes.get(type);
if (ft == null) {
Expand Down
85 changes: 85 additions & 0 deletions solr/core/src/test/org/apache/solr/schema/TestIndexSchema.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;

import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystems;
import org.apache.commons.io.input.ReaderInputStream;
import org.apache.lucene.util.Version;
import org.apache.solr.core.SolrResourceLoader;
import org.junit.Assert;
import org.junit.Test;

public class TestIndexSchema {
private static final String schemaXML =
"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
+ "<schema name=\"test-x\" version=\"0.1\">\n"
+ " <types>\n"
+ " <fieldType name=\"int\" class=\"solr.TrieIntField\" precisionStep=\"0\" omitNorms=\"true\" positionIncrementGap=\"0\"/>\n"
+ " </types>\n"
+ " <fields></fields>\n"
+ " </schema>\n";

private static IndexSchema newIndexSchema(String xml) {
SolrResourceLoader loader = new SolrResourceLoader(FileSystems.getDefault().getPath("."));
return new IndexSchema(
"test",
IndexSchemaFactory.getConfigResource(null, asInputStream(xml), loader, "test-resource"),
Version.LATEST,
loader,
null);
}

private static void addIntField(IndexSchema sch, String name) {
sch.fields.put(name, new SchemaField(name, sch.fieldTypes.get("int")));
}

private static String toXml(IndexSchema sch) throws IOException {
StringWriter sw = new StringWriter();
sch.persist(sw);
return sw.toString();
}

@Test
public void testDuplicateFields() throws IOException {
IndexSchema sch = newIndexSchema(schemaXML);
Assert.assertNotNull(sch);
addIntField(sch, "a#20;b");
addIntField(sch, "a\u0014b");
String xml = toXml(sch);
IndexSchema sch2 = newIndexSchema(xml);
Assert.assertEquals(2, sch2.fields.size());
}

@Test
public void testFieldNameFidelity() throws IOException {
IndexSchema sch = newIndexSchema(schemaXML);
Assert.assertNotNull(sch);
addIntField(sch, "a\u0014b");
String xml = toXml(sch);
IndexSchema sch2 = newIndexSchema(xml);
Assert.assertEquals(1, sch2.fields.size());
Assert.assertNotNull(sch2.fields.get("a\u0014b"));
}

private static ReaderInputStream asInputStream(String xml) {
return new ReaderInputStream(new StringReader(xml), StandardCharsets.UTF_8);
}
}
85 changes: 85 additions & 0 deletions solr/solrj/src/java/org/apache/solr/common/util/XML.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import java.io.IOException;
import java.io.Writer;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

/** */
Expand Down Expand Up @@ -82,6 +84,52 @@ public static void escapeAttributeValue(String str, Writer out) throws IOExcepti
escape(str, out, attribute_escapes);
}

private static final Pattern ctrlEscPattern =
Pattern.compile("([^#]|^)((?:##)*)(#([0-9]{1,2});)");
private static final Pattern literalEscPattern = Pattern.compile("([^#]|^)((?:##)+)([0-9])");

public static String unescapeAttributeValue(String str) {
{
Matcher m1 = ctrlEscPattern.matcher(str);
if (m1.find()) {
StringBuilder sb = new StringBuilder();
do {
m1.appendReplacement(sb, m1.group(1));
String g2 = m1.group(2);
String g4 = m1.group(4);
int c = Integer.parseInt(g4);
if (c < 32) {
sb.append(g2.substring(g2.length() / 2));
sb.append((char) c);
} else {
sb.append(g2);
sb.append(m1.group(3));
}
} while (m1.find());
m1.appendTail(sb);
str = sb.toString();
}
}
{
Matcher m2 = literalEscPattern.matcher(str);
if (m2.find()) {
StringBuilder sb = new StringBuilder();
do {
m2.appendReplacement(sb, m2.group(1));
String g2 = m2.group(2);
sb.append(g2.substring(g2.length() / 2));
sb.append(m2.group(3));
} while (m2.find());
m2.appendTail(sb);
str = sb.toString();
}
}
return str.replace("&quot;", "\"")
.replace("&amp;", "&")
.replace("&lt;", "<")
.replace("&gt;", ">");
}

public static void escapeAttributeValue(char[] chars, int start, int length, Writer out)
throws IOException {
escape(chars, start, length, out, attribute_escapes);
Expand Down Expand Up @@ -164,7 +212,44 @@ private static void escape(char[] chars, int offset, int length, Writer out, Str
}
}

private static final Pattern toEscPattern =
Pattern.compile("([^#]|^)(#*)(?:(([0-9]{1,2});)|([\0-\37]))");

/**
* Replace any character control c with value less than 32 #nn; where nn is the decimal
* representation of c. Since this encoding will be ambiguous with literal '#nn;', any occurrence
* of '#' is repeated. Now this will become ambiguous with '#'s preceding control characters. They
* are also repeated. Thus, when the number of consecutive '#' is even then they are literal '#'s
* and when it is odd, the last '#' should be considered encoding of control characters.
*
* <p>Also replace characters like '#' into "&qout;" etc.
*
* @param str Input string
* @param out Output writer to write into
* @param escapes Escape character map
* @throws IOException If write operation fails
*/
private static void escape(String str, Writer out, String[] escapes) throws IOException {
Matcher m = toEscPattern.matcher(str);
if (m.find()) {
StringBuilder sb = new StringBuilder();
do {
m.appendReplacement(sb, m.group(1));
sb.append(m.group(2));
String g3 = m.group(3);
if (g3 != null) {
if (Integer.parseInt(m.group(4)) < 32) {
sb.append(m.group(2));
}
sb.append(g3);
} else {
sb.append(m.group(2));
sb.append(escapes[m.group(5).charAt(0)]);
}
} while (m.find());
m.appendTail(sb);
str = sb.toString();
}
for (int i = 0; i < str.length(); i++) {
char ch = str.charAt(i);
if (ch < escapes.length) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ private void doSimpleTest(String input, String expectedOutput) throws IOExceptio
XML.escapeCharData(input, sw);
final String result = sw.toString();
assertEquals("Escaped output does not match expected value", expectedOutput, result);
assertEquals(
"Unescape does not reverse the effect of escape",
input,
XML.unescapeAttributeValue(result));
}

public void testNoEscape() throws IOException {
Expand Down Expand Up @@ -65,4 +69,22 @@ public void testAmpAndTagWithAccents() throws IOException {
public void testGt() throws IOException {
doSimpleTest("a ]]> b", "a ]]&gt; b");
}

public void testCtrl() throws IOException {
doSimpleTest("\u0014", "#20;");
doSimpleTest("a\u0014b", "a#20;b");
doSimpleTest("#\u0014", "###20;");
doSimpleTest("#15;a#\u0014#b\u0015c#20;d", "##15;a###20;#b#21;c##20;d");
}

public void testLiteralPound() throws IOException {
doSimpleTest("a#1;", "a##1;");
doSimpleTest("a#12;", "a##12;");
doSimpleTest("a#b", "a#b");
doSimpleTest("a#;", "a#;"); // No digits
doSimpleTest("a#15", "a#15"); // no semicolon
doSimpleTest("a#32;", "a#32;"); // value more than 31
doSimpleTest("a#023;", "a#023;"); // more than 2 digits
doSimpleTest("a##b", "a##b");
}
}

0 comments on commit 84c43d7

Please sign in to comment.