Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
...
Checking mergeability… Don't worry, you can still create the pull request.
  • 3 commits
  • 7 files changed
  • 0 commit comments
  • 2 contributors
View
9 extensions/indexes/range/src/org/exist/indexing/range/ComplexRangeIndexConfigElement.java
@@ -125,4 +125,13 @@ public int getType(String fieldName) {
}
return Type.STRING;
}
+
+ @Override
+ public org.exist.indexing.range.conversion.TypeConverter getTypeConverter(String fieldName) {
+ RangeIndexConfigField field = fields.get(fieldName);
+ if (field != null) {
+ return field.getTypeConverter();
+ }
+ return null;
+ }
}
View
2  extensions/indexes/range/src/org/exist/indexing/range/RangeIndex.java
@@ -35,7 +35,7 @@
*/
public class RangeIndex extends LuceneIndex {
- private static final Logger LOG = Logger.getLogger(RangeIndex.class);
+ protected static final Logger LOG = Logger.getLogger(RangeIndex.class);
public final static String ID = RangeIndex.class.getName();
View
30 extensions/indexes/range/src/org/exist/indexing/range/RangeIndexConfigElement.java
@@ -5,12 +5,9 @@
import org.apache.lucene.document.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
-import org.exist.EXistException;
import org.exist.dom.QName;
import org.exist.indexing.lucene.LuceneIndexConfig;
-import org.exist.storage.Indexable;
import org.exist.storage.NodePath;
-import org.exist.util.ByteConversion;
import org.exist.util.Collations;
import org.exist.util.DatabaseConfigurationException;
import org.exist.util.XMLString;
@@ -18,7 +15,6 @@
import org.exist.xquery.value.*;
import org.w3c.dom.Element;
-import javax.xml.datatype.DatatypeConstants;
import javax.xml.datatype.XMLGregorianCalendar;
import java.io.IOException;
import java.util.Map;
@@ -33,6 +29,7 @@
protected boolean includeNested = false;
protected boolean caseSensitive = true;
protected int wsTreatment = XMLString.SUPPRESS_NONE;
+ private org.exist.indexing.range.conversion.TypeConverter typeConverter = null;
public RangeIndexConfigElement(Element node, Map<String, String> namespaces) throws DatabaseConfigurationException {
String match = node.getAttribute("match");
@@ -83,10 +80,29 @@ public RangeIndexConfigElement(Element node, Map<String, String> namespaces) thr
if (caseStr != null && caseStr.length() > 0) {
caseSensitive = caseStr.equalsIgnoreCase("yes");
}
+ String custom = node.getAttribute("converter");
+ if (custom != null && custom.length() > 0) {
+ try {
+ Class customClass = Class.forName(custom);
+ typeConverter = (org.exist.indexing.range.conversion.TypeConverter) customClass.newInstance();
+ } catch (ClassNotFoundException e) {
+ RangeIndex.LOG.warn("Class for custom-type not found: " + custom);
+ } catch (InstantiationException e) {
+ RangeIndex.LOG.warn("Failed to initialize custom-type: " + custom, e);
+ } catch (IllegalAccessException e) {
+ RangeIndex.LOG.warn("Failed to initialize custom-type: " + custom, e);
+ }
+ }
}
public Field convertToField(String fieldName, String content) throws IOException {
- int fieldType = getType(fieldName);
+ // check if a converter is defined for this index to handle on-the-fly conversions
+ final org.exist.indexing.range.conversion.TypeConverter custom = getTypeConverter(fieldName);
+ if (custom != null) {
+ return custom.toField(fieldName, content);
+ }
+ // no converter: handle default types
+ final int fieldType = getType(fieldName);
try {
switch (fieldType) {
case Type.INTEGER:
@@ -249,6 +265,10 @@ public int getType() {
return type;
}
+ public org.exist.indexing.range.conversion.TypeConverter getTypeConverter(String fieldName) {
+ return typeConverter;
+ }
+
public NodePath getNodePath() {
return path;
}
View
18 extensions/indexes/range/src/org/exist/indexing/range/RangeIndexConfigField.java
@@ -44,6 +44,7 @@
private NodePath path = null;
private NodePath relPath = null;
private int type = Type.STRING;
+ private org.exist.indexing.range.conversion.TypeConverter typeConverter = null;
protected boolean includeNested = false;
protected int wsTreatment = XMLString.SUPPRESS_NONE;
protected boolean caseSensitive = true;
@@ -76,6 +77,19 @@ public RangeIndexConfigField(NodePath parentPath, Element elem, Map<String, Stri
throw new DatabaseConfigurationException("Invalid type declared for range index on " + match + ": " + typeStr);
}
}
+ String custom = elem.getAttribute("converter");
+ if (custom != null && custom.length() > 0) {
+ try {
+ Class customClass = Class.forName(custom);
+ typeConverter = (org.exist.indexing.range.conversion.TypeConverter) customClass.newInstance();
+ } catch (ClassNotFoundException e) {
+ RangeIndex.LOG.warn("Class for custom-type not found: " + custom);
+ } catch (InstantiationException e) {
+ RangeIndex.LOG.warn("Failed to initialize custom-type: " + custom, e);
+ } catch (IllegalAccessException e) {
+ RangeIndex.LOG.warn("Failed to initialize custom-type: " + custom, e);
+ }
+ }
String nested = elem.getAttribute("nested");
includeNested = (nested == null || nested.equalsIgnoreCase("yes"));
path.setIncludeDescendants(includeNested);
@@ -108,6 +122,10 @@ public int getType() {
return type;
}
+ public org.exist.indexing.range.conversion.TypeConverter getTypeConverter() {
+ return typeConverter;
+ }
+
public boolean match(NodePath other) {
return path.match(other);
}
View
83 extensions/indexes/range/src/org/exist/indexing/range/conversion/DateConverter.java
@@ -0,0 +1,83 @@
+/*
+ * eXist Open Source Native XML Database
+ * Copyright (C) 2014 The eXist Project
+ * http://exist-db.org
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * $Id$
+ */
+package org.exist.indexing.range.conversion;
+
+import org.apache.log4j.Logger;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.LongField;
+import org.exist.indexing.range.RangeIndexConfigElement;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.value.DateValue;
+import org.exist.xquery.value.TimeUtils;
+
+import javax.xml.datatype.XMLGregorianCalendar;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Simple normalization of dates: if there is only a year, transform it into a date: yyy-01-01.
+ * If full date is given, but with missing digits: fill them in.
+ */
+public class DateConverter implements TypeConverter {
+
+ protected static final Logger LOG = Logger.getLogger(DateConverter.class);
+
+ private final static Pattern DATE_REGEX = Pattern.compile("(\\d+)-(\\d+)-(\\d+)");
+
+ @Override
+ public Field toField(String fieldName, String content) {
+ try {
+ DateValue dv;
+ if (content.indexOf('-') < 0) {
+ // just year
+ int year = Integer.parseInt(content);
+ XMLGregorianCalendar calendar = TimeUtils.getInstance().newXMLGregorianCalendar();
+ calendar.setYear(year);
+ calendar.setDay(1);
+ calendar.setMonth(1);
+ dv = new DateValue(calendar);
+ } else {
+ // try to handle missing digits as in 1980-8-4
+ Matcher matcher = DATE_REGEX.matcher(content);
+ if (matcher.matches()) {
+ try {
+ content = String.format("%04d-%02d-%02d", Integer.parseInt(matcher.group(1)), Integer.parseInt(matcher.group(2)), Integer.parseInt(matcher.group(3)));
+ } catch (NumberFormatException e) {
+ // invalid content: ignore
+ }
+ }
+ dv = new DateValue(content);
+ }
+ final long dl = RangeIndexConfigElement.dateToLong(dv);
+ return new LongField(fieldName, dl, LongField.TYPE_NOT_STORED);
+ } catch (XPathException e) {
+ // wrong type: ignore
+ LOG.debug("Invalid date format: " + content, e);
+ } catch (NumberFormatException e) {
+ // wrong type: ignore
+ LOG.debug("Invalid date format: " + content, e);
+ } catch (Exception e) {
+ LOG.debug("Invalid date format: " + content, e);
+ }
+ return null;
+ }
+}
View
42 extensions/indexes/range/src/org/exist/indexing/range/conversion/TypeConverter.java
@@ -0,0 +1,42 @@
+/*
+ * eXist Open Source Native XML Database
+ * Copyright (C) 2014 The eXist Project
+ * http://exist-db.org
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * $Id$
+ */
+package org.exist.indexing.range.conversion;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.util.BytesRef;
+import org.exist.xquery.value.AtomicValue;
+
+/**
+ * Interface for on-the-fly type conversion when populating an index.
+ */
+public interface TypeConverter {
+
+ /**
+ * All content to be indexed will be passed to this method. It should
+ * return a Lucene field with a type appropriate for the particular content.
+ *
+ * @param fieldName name of the field being indexed
+ * @param content the content to be written to the index
+ * @return a lucene field to be added to the document
+ */
+ public Field toField(String fieldName, String content);
+}
View
26 extensions/indexes/range/test/src/xquery/types.xql
@@ -11,11 +11,13 @@ declare variable $tt:COLLECTION_CONFIG :=
<fulltext default="none" attributes="false"/>
<range>
<create qname="date" type="xs:date"/>
+ <create qname="date4" type="xs:date" converter="org.exist.indexing.range.conversion.DateConverter"/>
<create qname="time" type="xs:time"/>
<create qname="dateTime" type="xs:dateTime"/>
<create qname="entry">
<field name="date" match="date2" type="xs:date"/>
<field name="int2" match="int2" type="xs:integer"/>
+ <field name="date3" match="date3" type="xs:date" converter="org.exist.indexing.range.conversion.DateConverter"/>
</create>
<create qname="string-lc" type="xs:string" case="no"/>
<create qname="string" type="xs:string"/>
@@ -30,6 +32,8 @@ declare variable $tt:XML :=
<id>E1</id>
<date>1918-02-11</date>
<date2>1918-02-11</date2>
+ <date3>1918</date3>
+ <date4>1918</date4>
<time>09:00:00Z</time>
<dateTime>1918-02-11T09:00:00Z</dateTime>
<string-lc>UPPERCASE</string-lc>
@@ -41,6 +45,8 @@ declare variable $tt:XML :=
<id>E2</id>
<date>2012-01-20</date>
<date2>2012-01-20</date2>
+ <date3>800-12-1</date3>
+ <date4>800-12-1</date4>
<time>10:00:00Z</time>
<dateTime>2012-01-20T10:00:00Z</dateTime>
<string-lc>lowercase</string-lc>
@@ -52,6 +58,8 @@ declare variable $tt:XML :=
<id>E3</id>
<date>2013-02-04</date>
<date2>2013-02-04</date2>
+ <date3>2000-01-01</date3>
+ <date4>2000-01-01</date4>
<time>10:00:00+01:00</time>
<dateTime>2012-01-20T11:00:00+01:00</dateTime>
<string-lc>MiXeDmOdE</string-lc>
@@ -475,3 +483,21 @@ declare
function tt:ends-with($string as xs:string) {
collection($tt:COLLECTION)//entry[ends-with(string, $string)]/id/string()
};
+
+declare
+ %test:args("1918-01-01")
+ %test:assertEquals("E1")
+ %test:args("0800-12-01")
+ %test:assertEquals("E2")
+function tt:date-normalized($date as xs:date) {
+ collection($tt:COLLECTION)//entry[date3 = $date]/id/string()
+};
+
+declare
+ %test:args("1918-01-01")
+ %test:assertEquals("E1")
+ %test:args("0800-12-01")
+ %test:assertEquals("E2")
+function tt:date-field-normalized($date as xs:date) {
+ collection($tt:COLLECTION)//entry[date4 = $date]/id/string()
+};

No commit comments for this range

Something went wrong with that request. Please try again.