Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

[feature] new range index: add a simple mechanism to normalise index …

…keys on the fly.
  • Loading branch information...
commit 25990313b3264f4034d0784641c768c529a7c939 1 parent 2b6411d
@wolfgangmm wolfgangmm authored
View
9 extensions/indexes/range/src/org/exist/indexing/range/ComplexRangeIndexConfigElement.java
@@ -125,4 +125,13 @@ public int getType(String fieldName) {
}
return Type.STRING;
}
+
+ @Override
+ public org.exist.indexing.range.conversion.TypeConverter getTypeConverter(String fieldName) {
+ RangeIndexConfigField field = fields.get(fieldName);
+ if (field != null) {
+ return field.getTypeConverter();
+ }
+ return null;
+ }
}
View
2  extensions/indexes/range/src/org/exist/indexing/range/RangeIndex.java
@@ -35,7 +35,7 @@
*/
public class RangeIndex extends LuceneIndex {
- private static final Logger LOG = Logger.getLogger(RangeIndex.class);
+ protected static final Logger LOG = Logger.getLogger(RangeIndex.class);
public final static String ID = RangeIndex.class.getName();
View
30 extensions/indexes/range/src/org/exist/indexing/range/RangeIndexConfigElement.java
@@ -5,12 +5,9 @@
import org.apache.lucene.document.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
-import org.exist.EXistException;
import org.exist.dom.QName;
import org.exist.indexing.lucene.LuceneIndexConfig;
-import org.exist.storage.Indexable;
import org.exist.storage.NodePath;
-import org.exist.util.ByteConversion;
import org.exist.util.Collations;
import org.exist.util.DatabaseConfigurationException;
import org.exist.util.XMLString;
@@ -18,7 +15,6 @@
import org.exist.xquery.value.*;
import org.w3c.dom.Element;
-import javax.xml.datatype.DatatypeConstants;
import javax.xml.datatype.XMLGregorianCalendar;
import java.io.IOException;
import java.util.Map;
@@ -33,6 +29,7 @@
protected boolean includeNested = false;
protected boolean caseSensitive = true;
protected int wsTreatment = XMLString.SUPPRESS_NONE;
+ private org.exist.indexing.range.conversion.TypeConverter typeConverter = null;
public RangeIndexConfigElement(Element node, Map<String, String> namespaces) throws DatabaseConfigurationException {
String match = node.getAttribute("match");
@@ -83,10 +80,29 @@ public RangeIndexConfigElement(Element node, Map<String, String> namespaces) thr
if (caseStr != null && caseStr.length() > 0) {
caseSensitive = caseStr.equalsIgnoreCase("yes");
}
+ String custom = node.getAttribute("converter");
+ if (custom != null && custom.length() > 0) {
+ try {
+ Class customClass = Class.forName(custom);
+ typeConverter = (org.exist.indexing.range.conversion.TypeConverter) customClass.newInstance();
+ } catch (ClassNotFoundException e) {
+ RangeIndex.LOG.warn("Class for custom-type not found: " + custom);
+ } catch (InstantiationException e) {
+ RangeIndex.LOG.warn("Failed to initialize custom-type: " + custom, e);
+ } catch (IllegalAccessException e) {
+ RangeIndex.LOG.warn("Failed to initialize custom-type: " + custom, e);
+ }
+ }
}
public Field convertToField(String fieldName, String content) throws IOException {
- int fieldType = getType(fieldName);
+ // check if a converter is defined for this index to handle on-the-fly conversions
+ final org.exist.indexing.range.conversion.TypeConverter custom = getTypeConverter(fieldName);
+ if (custom != null) {
+ return custom.toField(fieldName, content);
+ }
+ // no converter: handle default types
+ final int fieldType = getType(fieldName);
try {
switch (fieldType) {
case Type.INTEGER:
@@ -249,6 +265,10 @@ public int getType() {
return type;
}
+ public org.exist.indexing.range.conversion.TypeConverter getTypeConverter(String fieldName) {
+ return typeConverter;
+ }
+
public NodePath getNodePath() {
return path;
}
View
18 extensions/indexes/range/src/org/exist/indexing/range/RangeIndexConfigField.java
@@ -44,6 +44,7 @@
private NodePath path = null;
private NodePath relPath = null;
private int type = Type.STRING;
+ private org.exist.indexing.range.conversion.TypeConverter typeConverter = null;
protected boolean includeNested = false;
protected int wsTreatment = XMLString.SUPPRESS_NONE;
protected boolean caseSensitive = true;
@@ -76,6 +77,19 @@ public RangeIndexConfigField(NodePath parentPath, Element elem, Map<String, Stri
throw new DatabaseConfigurationException("Invalid type declared for range index on " + match + ": " + typeStr);
}
}
+ String custom = elem.getAttribute("converter");
+ if (custom != null && custom.length() > 0) {
+ try {
+ Class customClass = Class.forName(custom);
+ typeConverter = (org.exist.indexing.range.conversion.TypeConverter) customClass.newInstance();
+ } catch (ClassNotFoundException e) {
+ RangeIndex.LOG.warn("Class for custom-type not found: " + custom);
+ } catch (InstantiationException e) {
+ RangeIndex.LOG.warn("Failed to initialize custom-type: " + custom, e);
+ } catch (IllegalAccessException e) {
+ RangeIndex.LOG.warn("Failed to initialize custom-type: " + custom, e);
+ }
+ }
String nested = elem.getAttribute("nested");
includeNested = (nested == null || nested.equalsIgnoreCase("yes"));
path.setIncludeDescendants(includeNested);
@@ -108,6 +122,10 @@ public int getType() {
return type;
}
+ public org.exist.indexing.range.conversion.TypeConverter getTypeConverter() {
+ return typeConverter;
+ }
+
public boolean match(NodePath other) {
return path.match(other);
}
View
83 extensions/indexes/range/src/org/exist/indexing/range/conversion/DateConverter.java
@@ -0,0 +1,83 @@
+/*
+ * eXist Open Source Native XML Database
+ * Copyright (C) 2014 The eXist Project
+ * http://exist-db.org
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * $Id$
+ */
+package org.exist.indexing.range.conversion;
+
+import org.apache.log4j.Logger;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.LongField;
+import org.exist.indexing.range.RangeIndexConfigElement;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.value.DateValue;
+import org.exist.xquery.value.TimeUtils;
+
+import javax.xml.datatype.XMLGregorianCalendar;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Simple normalization of dates: if there is only a year, transform it into a date: yyy-01-01.
+ * If full date is given, but with missing digits: fill them in.
+ */
+public class DateConverter implements TypeConverter {
+
+ protected static final Logger LOG = Logger.getLogger(DateConverter.class);
+
+ private final static Pattern DATE_REGEX = Pattern.compile("(\\d+)-(\\d+)-(\\d+)");
+
+ @Override
+ public Field toField(String fieldName, String content) {
+ try {
+ DateValue dv;
+ if (content.indexOf('-') < 0) {
+ // just year
+ int year = Integer.parseInt(content);
+ XMLGregorianCalendar calendar = TimeUtils.getInstance().newXMLGregorianCalendar();
+ calendar.setYear(year);
+ calendar.setDay(1);
+ calendar.setMonth(1);
+ dv = new DateValue(calendar);
+ } else {
+ // try to handle missing digits as in 1980-8-4
+ Matcher matcher = DATE_REGEX.matcher(content);
+ if (matcher.matches()) {
+ try {
+ content = String.format("%04d-%02d-%02d", Integer.parseInt(matcher.group(1)), Integer.parseInt(matcher.group(2)), Integer.parseInt(matcher.group(3)));
+ } catch (NumberFormatException e) {
+ // invalid content: ignore
+ }
+ }
+ dv = new DateValue(content);
+ }
+ final long dl = RangeIndexConfigElement.dateToLong(dv);
+ return new LongField(fieldName, dl, LongField.TYPE_NOT_STORED);
+ } catch (XPathException e) {
+ // wrong type: ignore
+ LOG.debug("Invalid date format: " + content, e);
+ } catch (NumberFormatException e) {
+ // wrong type: ignore
+ LOG.debug("Invalid date format: " + content, e);
+ } catch (Exception e) {
+ LOG.debug("Invalid date format: " + content, e);
+ }
+ return null;
+ }
+}
View
42 extensions/indexes/range/src/org/exist/indexing/range/conversion/TypeConverter.java
@@ -0,0 +1,42 @@
+/*
+ * eXist Open Source Native XML Database
+ * Copyright (C) 2014 The eXist Project
+ * http://exist-db.org
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * $Id$
+ */
+package org.exist.indexing.range.conversion;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.util.BytesRef;
+import org.exist.xquery.value.AtomicValue;
+
+/**
+ * Interface for on-the-fly type conversion when populating an index.
+ */
+public interface TypeConverter {
+
+ /**
+ * All content to be indexed will be passed to this method. It should
+ * return a Lucene field with a type appropriate for the particular content.
+ *
+ * @param fieldName name of the field being indexed
+ * @param content the content to be written to the index
+ * @return a lucene field to be added to the document
+ */
+ public Field toField(String fieldName, String content);
+}
View
26 extensions/indexes/range/test/src/xquery/types.xql
@@ -11,11 +11,13 @@ declare variable $tt:COLLECTION_CONFIG :=
<fulltext default="none" attributes="false"/>
<range>
<create qname="date" type="xs:date"/>
+ <create qname="date4" type="xs:date" converter="org.exist.indexing.range.conversion.DateConverter"/>
<create qname="time" type="xs:time"/>
<create qname="dateTime" type="xs:dateTime"/>
<create qname="entry">
<field name="date" match="date2" type="xs:date"/>
<field name="int2" match="int2" type="xs:integer"/>
+ <field name="date3" match="date3" type="xs:date" converter="org.exist.indexing.range.conversion.DateConverter"/>
</create>
<create qname="string-lc" type="xs:string" case="no"/>
<create qname="string" type="xs:string"/>
@@ -30,6 +32,8 @@ declare variable $tt:XML :=
<id>E1</id>
<date>1918-02-11</date>
<date2>1918-02-11</date2>
+ <date3>1918</date3>
+ <date4>1918</date4>
<time>09:00:00Z</time>
<dateTime>1918-02-11T09:00:00Z</dateTime>
<string-lc>UPPERCASE</string-lc>
@@ -41,6 +45,8 @@ declare variable $tt:XML :=
<id>E2</id>
<date>2012-01-20</date>
<date2>2012-01-20</date2>
+ <date3>800-12-1</date3>
+ <date4>800-12-1</date4>
<time>10:00:00Z</time>
<dateTime>2012-01-20T10:00:00Z</dateTime>
<string-lc>lowercase</string-lc>
@@ -52,6 +58,8 @@ declare variable $tt:XML :=
<id>E3</id>
<date>2013-02-04</date>
<date2>2013-02-04</date2>
+ <date3>2000-01-01</date3>
+ <date4>2000-01-01</date4>
<time>10:00:00+01:00</time>
<dateTime>2012-01-20T11:00:00+01:00</dateTime>
<string-lc>MiXeDmOdE</string-lc>
@@ -475,3 +483,21 @@ declare
function tt:ends-with($string as xs:string) {
collection($tt:COLLECTION)//entry[ends-with(string, $string)]/id/string()
};
+
+declare
+ %test:args("1918-01-01")
+ %test:assertEquals("E1")
+ %test:args("0800-12-01")
+ %test:assertEquals("E2")
+function tt:date-normalized($date as xs:date) {
+ collection($tt:COLLECTION)//entry[date3 = $date]/id/string()
+};
+
+declare
+ %test:args("1918-01-01")
+ %test:assertEquals("E1")
+ %test:args("0800-12-01")
+ %test:assertEquals("E2")
+function tt:date-field-normalized($date as xs:date) {
+ collection($tt:COLLECTION)//entry[date4 = $date]/id/string()
+};
Please sign in to comment.
Something went wrong with that request. Please try again.