Skip to content

Commit

Permalink
HSEARCH-2434 Implement null markers differently for the text datatype…
Browse files Browse the repository at this point in the history
… in Elasticsearch 5
  • Loading branch information
yrodiere committed Mar 21, 2017
1 parent 814c5df commit 58c7ada
Show file tree
Hide file tree
Showing 23 changed files with 308 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@


/**
* A base class for {@link NullMarkerCodec}s that index null values as the JSON "null" value.
*
* @author Yoann Rodiere
*/
abstract class ElasticsearchNullMarkerCodec implements NullMarkerCodec {
public abstract class ElasticsearchAsNullNullMarkerCodec implements NullMarkerCodec {

protected final NullMarker nullMarker;

public ElasticsearchNullMarkerCodec(NullMarker nullMarker) {
public ElasticsearchAsNullNullMarkerCodec(NullMarker nullMarker) {
super();
this.nullMarker = nullMarker;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
/**
* @author Sanne Grinovero
*/
public class ElasticsearchStringNullMarkerCodec extends ElasticsearchNullMarkerCodec {
public class ElasticsearchAsNullStringNullMarkerCodec extends ElasticsearchAsNullNullMarkerCodec {

private final BytesRef encodedToken;

public ElasticsearchStringNullMarkerCodec(NullMarker nullMarker) {
public ElasticsearchAsNullStringNullMarkerCodec(NullMarker nullMarker) {
super( nullMarker );
this.encodedToken = new BytesRef( (String) nullMarker.nullEncoded() );
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Hibernate Search, full-text search for your domain model
*
* License: GNU Lesser General Public License (LGPL), version 2.1 or later
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
*/
package org.hibernate.search.elasticsearch.nulls.codec.impl;

import org.hibernate.search.bridge.spi.NullMarker;
import org.hibernate.search.engine.nulls.codec.impl.NullMarkerCodec;

/**
* A base class for {@link NullMarkerCodec}s that index null values as a non-null token.
* <p>
* This is necessary because Elasticsearch doesn't support 'null_value' on the 'text' datatype.
*
* @author Yoann Rodiere
*/
abstract class ElasticsearchAsTokenNullMarkerCodec implements NullMarkerCodec {

protected final NullMarker nullMarker;

public ElasticsearchAsTokenNullMarkerCodec(NullMarker nullMarker) {
super();
this.nullMarker = nullMarker;
}

@Override
public NullMarker getNullMarker() {
return nullMarker;
}

@Override
public String toString() {
return getClass().getSimpleName() + "[" + nullMarker + "]";
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Hibernate Search, full-text search for your domain model
*
* License: GNU Lesser General Public License (LGPL), version 2.1 or later
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
*/
package org.hibernate.search.elasticsearch.nulls.codec.impl;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.hibernate.search.bridge.LuceneOptions;
import org.hibernate.search.bridge.spi.NullMarker;
import org.hibernate.search.engine.nulls.codec.impl.NullMarkerCodec;

/**
* A {@link NullMarkerCodec}s that index null values as the string token itself.
* <p>
* This is necessary because Elasticsearch doesn't support 'null_value' on the 'text' datatype.
*
* @author Sanne Grinovero
*/
public class ElasticsearchAsTokenStringNullMarkerCodec extends ElasticsearchAsTokenNullMarkerCodec {

private final BytesRef encodedToken;

public ElasticsearchAsTokenStringNullMarkerCodec(NullMarker nullMarker) {
super( nullMarker );
this.encodedToken = new BytesRef( (String) nullMarker.nullEncoded() );
}

@Override
public void encodeNullValue(String name, Document document, LuceneOptions luceneOptions) {
luceneOptions.addFieldToDocument( name, (String) nullMarker.nullEncoded(), document );
}

@Override
public Query createNullMatchingQuery(String fieldName) {
return new TermQuery( new Term( fieldName, encodedToken ) );
}

@Override
public boolean representsNullValue(IndexableField field) {
String stringValue = field.stringValue();
return nullMarker.nullEncoded().equals( stringValue );
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
/**
* @author Sanne Grinovero
*/
public class ElasticsearchBooleanNullMarkerCodec extends ElasticsearchNullMarkerCodec {
public class ElasticsearchBooleanNullMarkerCodec extends ElasticsearchAsNullNullMarkerCodec {

private final BytesRef encodedToken;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
/**
* @author Sanne Grinovero
*/
public class ElasticsearchDoubleNullMarkerCodec extends ElasticsearchNullMarkerCodec {
public class ElasticsearchDoubleNullMarkerCodec extends ElasticsearchAsNullNullMarkerCodec {

public ElasticsearchDoubleNullMarkerCodec(NullMarker nullMarker) {
super( nullMarker );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
/**
* @author Sanne Grinovero
*/
public class ElasticsearchFloatNullMarkerCodec extends ElasticsearchNullMarkerCodec {
public class ElasticsearchFloatNullMarkerCodec extends ElasticsearchAsNullNullMarkerCodec {

public ElasticsearchFloatNullMarkerCodec(final NullMarker nullMarker) {
super( nullMarker );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
/**
* @author Sanne Grinovero
*/
public class ElasticsearchIntegerNullMarkerCodec extends ElasticsearchNullMarkerCodec {
public class ElasticsearchIntegerNullMarkerCodec extends ElasticsearchAsNullNullMarkerCodec {

public ElasticsearchIntegerNullMarkerCodec(final NullMarker nullMarker) {
super( nullMarker );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
/**
* @author Sanne Grinovero
*/
public class ElasticsearchLongNullMarkerCodec extends ElasticsearchNullMarkerCodec {
public class ElasticsearchLongNullMarkerCodec extends ElasticsearchAsNullNullMarkerCodec {

public ElasticsearchLongNullMarkerCodec(final NullMarker nullMarker) {
super( nullMarker );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

import org.hibernate.search.bridge.spi.NullMarker;
import org.hibernate.search.elasticsearch.logging.impl.Log;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchAsNullStringNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchBooleanNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchDoubleNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchFloatNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchIntegerNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchLongNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchStringNullMarkerCodec;
import org.hibernate.search.engine.metadata.impl.DocumentFieldPath;
import org.hibernate.search.engine.metadata.impl.PartialDocumentFieldMetadata;
import org.hibernate.search.engine.nulls.codec.impl.NullMarkerCodec;
import org.hibernate.search.engine.nulls.impl.MissingValueStrategy;
import org.hibernate.search.util.logging.impl.LoggerFactory;
Expand All @@ -25,10 +25,11 @@ public final class Elasticsearch2MissingValueStrategy implements MissingValueStr
public static final Elasticsearch2MissingValueStrategy INSTANCE = new Elasticsearch2MissingValueStrategy();

@Override
public NullMarkerCodec createNullMarkerCodec(Class<?> entityType, DocumentFieldPath path, NullMarker nullMarker) {
public NullMarkerCodec createNullMarkerCodec(Class<?> entityType,
PartialDocumentFieldMetadata fieldMetadata, NullMarker nullMarker) {
Object nullEncoded = nullMarker.nullEncoded();
if ( nullEncoded instanceof String ) {
return new ElasticsearchStringNullMarkerCodec( nullMarker );
return new ElasticsearchAsNullStringNullMarkerCodec( nullMarker );
}
else if ( nullEncoded instanceof Integer ) {
return new ElasticsearchIntegerNullMarkerCodec( nullMarker );
Expand All @@ -46,7 +47,8 @@ else if ( nullEncoded instanceof Boolean ) {
return new ElasticsearchBooleanNullMarkerCodec( nullMarker );
}
else {
throw LOG.unsupportedNullTokenType( entityType, path.getAbsoluteName(), nullEncoded.getClass() );
throw LOG.unsupportedNullTokenType( entityType, fieldMetadata.getPath().getAbsoluteName(),
nullEncoded.getClass() );
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@

import org.hibernate.search.bridge.spi.NullMarker;
import org.hibernate.search.elasticsearch.logging.impl.Log;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchAsTokenStringNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchBooleanNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchDoubleNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchFloatNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchIntegerNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchLongNullMarkerCodec;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchStringNullMarkerCodec;
import org.hibernate.search.engine.metadata.impl.DocumentFieldPath;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchAsNullStringNullMarkerCodec;
import org.hibernate.search.elasticsearch.util.impl.FieldHelper;
import org.hibernate.search.elasticsearch.util.impl.FieldHelper.ExtendedFieldType;
import org.hibernate.search.engine.metadata.impl.PartialDocumentFieldMetadata;
import org.hibernate.search.engine.nulls.codec.impl.NullMarkerCodec;
import org.hibernate.search.engine.nulls.impl.MissingValueStrategy;
import org.hibernate.search.util.logging.impl.LoggerFactory;
Expand All @@ -25,10 +28,21 @@ public final class Elasticsearch5MissingValueStrategy implements MissingValueStr
public static final Elasticsearch5MissingValueStrategy INSTANCE = new Elasticsearch5MissingValueStrategy();

@Override
public NullMarkerCodec createNullMarkerCodec(Class<?> entityType, DocumentFieldPath path, NullMarker nullMarker) {
public NullMarkerCodec createNullMarkerCodec(Class<?> entityType,
PartialDocumentFieldMetadata fieldMetadata, NullMarker nullMarker) {
Object nullEncoded = nullMarker.nullEncoded();
if ( nullEncoded instanceof String ) {
return new ElasticsearchStringNullMarkerCodec( nullMarker );
if ( isTextDataType( fieldMetadata ) ) {
/*
* The "text" datatype does not support null_value,
* which implies a slightly different null value handling
* on our side.
*/
return new ElasticsearchAsTokenStringNullMarkerCodec( nullMarker );
}
else {
return new ElasticsearchAsNullStringNullMarkerCodec( nullMarker );
}
}
else if ( nullEncoded instanceof Integer ) {
return new ElasticsearchIntegerNullMarkerCodec( nullMarker );
Expand All @@ -46,7 +60,22 @@ else if ( nullEncoded instanceof Boolean ) {
return new ElasticsearchBooleanNullMarkerCodec( nullMarker );
}
else {
throw LOG.unsupportedNullTokenType( entityType, path.getAbsoluteName(), nullEncoded.getClass() );
throw LOG.unsupportedNullTokenType( entityType, fieldMetadata.getPath().getAbsoluteName(),
nullEncoded.getClass() );
}
}

@SuppressWarnings("deprecation")
private boolean isTextDataType(PartialDocumentFieldMetadata fieldMetadata) {
// The text datatype is always analyzed, otherwise we use the "keyword" datatype
if ( fieldMetadata.getIndex().isAnalyzed() ) {
ExtendedFieldType fieldType = FieldHelper.getType( fieldMetadata );
if ( ExtendedFieldType.STRING.equals( fieldType )
// We also use strings when the type is unknown
|| ExtendedFieldType.UNKNOWN.equals( fieldType ) ) {
return true;
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.hibernate.search.elasticsearch.bridge.builtin.impl.ElasticsearchBridgeDefinedField;
import org.hibernate.search.elasticsearch.impl.ToElasticsearch;
import org.hibernate.search.elasticsearch.logging.impl.Log;
import org.hibernate.search.elasticsearch.nulls.codec.impl.ElasticsearchAsNullNullMarkerCodec;
import org.hibernate.search.elasticsearch.schema.impl.model.DataType;
import org.hibernate.search.elasticsearch.schema.impl.model.DynamicType;
import org.hibernate.search.elasticsearch.schema.impl.model.IndexMetadata;
Expand Down Expand Up @@ -479,9 +480,15 @@ protected DataType getStringType(PropertyMapping propertyMapping, Index index) {

private void addNullValue(PropertyMapping propertyMapping, ElasticsearchMappingBuilder mappingBuilder, DocumentFieldMetadata fieldMetadata) {
NullMarkerCodec nullMarkerCodec = fieldMetadata.getNullMarkerCodec();
NullMarker nullMarker = nullMarkerCodec.getNullMarker();
if ( nullMarker != null ) {
JsonPrimitive nullTokenJson = convertIndexedNullTokenToJson( mappingBuilder, fieldMetadata.getPath(), nullMarker.nullEncoded() );
/*
* We may have a codec that doesn't index null values as null,
* because 'null_value' is not supported for the 'text' datatype.
* See ElasticsearchMissingValueStrategy.
*/
if ( nullMarkerCodec instanceof ElasticsearchAsNullNullMarkerCodec ) {
NullMarker nullMarker = nullMarkerCodec.getNullMarker();
JsonPrimitive nullTokenJson = convertIndexedNullTokenToJson( mappingBuilder,
fieldMetadata.getPath(), nullMarker.nullEncoded() );
propertyMapping.setNullValue( nullTokenJson );
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@

import org.hibernate.search.bridge.spi.FieldType;
import org.hibernate.search.engine.metadata.impl.BridgeDefinedField;
import org.hibernate.search.engine.metadata.impl.DocumentFieldMetadata;
import org.hibernate.search.engine.metadata.impl.PartialDocumentFieldMetadata;
import org.hibernate.search.engine.metadata.impl.PartialPropertyMetadata;
import org.hibernate.search.engine.metadata.impl.PropertyMetadata;
import org.hibernate.search.engine.metadata.impl.SortableFieldMetadata;
import org.hibernate.search.engine.metadata.impl.TypeMetadata;
Expand Down Expand Up @@ -104,25 +105,31 @@ private static ExtendedFieldType toExtendedFieldType(NumericEncodingType numeric
}
}

public static ExtendedFieldType getType(DocumentFieldMetadata fieldMetadata) {
public static ExtendedFieldType getType(PartialDocumentFieldMetadata fieldMetadata) {
// Always use user-provided type in priority
BridgeDefinedField overriddenField = fieldMetadata.getBridgeDefinedFields().get( fieldMetadata.getAbsoluteName() );
BridgeDefinedField overriddenField = fieldMetadata.getBridgeDefinedFields().get( fieldMetadata.getPath().getAbsoluteName() );
if ( overriddenField != null ) {
return getType( overriddenField );
}

PropertyMetadata propertyMetata = fieldMetadata.getSourceProperty();
PartialPropertyMetadata propertyMetata = fieldMetadata.getSourceProperty();
Class<?> propertyClass = propertyMetata == null ? null : propertyMetata.getPropertyClass();
if ( propertyClass == null ) {
return ExtendedFieldType.UNKNOWN;
}

if ( fieldMetadata.isNumeric() ) {
return toExtendedFieldType( fieldMetadata.getNumericEncodingType() );
}
else {
return getType( propertyClass );
}
}

public static ExtendedFieldType getType(Class<?> propertyClass) {
if ( boolean.class.equals( propertyClass ) || Boolean.class.isAssignableFrom( propertyClass ) ) {
return ExtendedFieldType.BOOLEAN;
}
else if ( fieldMetadata.isNumeric() ) {
return toExtendedFieldType( fieldMetadata.getNumericEncodingType() );
}
else if ( Date.class.isAssignableFrom( propertyClass ) ) {
return ExtendedFieldType.DATE;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,7 @@ public void testMapping() throws Exception {
"'null_value':-1" +
"}," +
"'firstName':{" +
"'type':'text'," +
"'null_value':'<NULL>'" +
"'type':'text'" +
"}," +
"'fullName':{" +
"'type':'text'" +
Expand Down Expand Up @@ -437,7 +436,7 @@ public void testNullTokenMapping() {
"\"dateOfBirth\": null," +
"\"subscriptionEndDate\":null," +
"\"driveWidth\": null," +
"\"firstName\": null," +
"\"firstName\": <NULL>," +
"\"handicap\": 0.0," + // not nullable
"\"id\": '3'," +
"\"puttingStrength\": \"0.0\"," + // not nullable
Expand Down

0 comments on commit 58c7ada

Please sign in to comment.