Skip to content

Commit

Permalink
Introduced workarounds to work with Solr 7.x. Also added a
Browse files Browse the repository at this point in the history
FileNameDataProcessor.
  • Loading branch information
Pascal Essiembre authored and Pascal Essiembre committed Feb 2, 2018
1 parent ae483a5 commit 99d8a50
Show file tree
Hide file tree
Showing 8 changed files with 367 additions and 105 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ classes/
/.project
/.settings
/dist/
/gradle.properties
16 changes: 9 additions & 7 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ apply plugin: 'project-report'
sourceCompatibility = 1.8
version = project.property('versionString') + '_build' + project.property('buildNumber')

def solrVersion = '7.2.1'

repositories {
mavenLocal()
mavenCentral()
Expand All @@ -17,17 +19,17 @@ repositories {

dependencies {
// ---< Lucene >---
compile group: 'org.apache.lucene', name: 'lucene-core', version: '6.4.0'
compile group: 'org.apache.lucene', name: 'lucene-analyzers-common', version: '6.4.0'
compile group: 'org.apache.lucene', name: 'lucene-queryparser', version: '6.4.0'
compile group: 'org.apache.lucene', name: 'lucene-queries', version: '6.4.0'
compile group: 'org.apache.lucene', name: 'lucene-core', version: solrVersion
compile group: 'org.apache.lucene', name: 'lucene-analyzers-common', version: solrVersion
compile group: 'org.apache.lucene', name: 'lucene-queryparser', version: solrVersion
compile group: 'org.apache.lucene', name: 'lucene-queries', version: solrVersion

// ---< Solr >---
// https://mvnrepository.com/artifact/org.apache.solr/solr-core
compile group: 'org.apache.solr', name: 'solr-core', version: '6.4.0'
compile group: 'org.apache.solr', name: 'solr-core', version: solrVersion
// https://mvnrepository.com/artifact/org.apache.solr/solr-dataimporthandler
compile group: 'org.apache.solr', name: 'solr-dataimporthandler', version: '6.4.0'
compile group: 'org.apache.solr', name: 'solr-solrj', version: '6.4.0'
compile group: 'org.apache.solr', name: 'solr-dataimporthandler', version: solrVersion
compile group: 'org.apache.solr', name: 'solr-solrj', version: solrVersion
// https://mvnrepository.com/artifact/org.apache.solr/solr-solrj

// ---< Commons >---
Expand Down
6 changes: 3 additions & 3 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Tue Aug 29 16:24:53 CEST 2017
#Fri Feb 02 17:11:26 EST 2018
versionString=6.4.0_b01
buildNumber=154
buildDate=2017-08-29-1624
buildNumber=162
buildDate=2018-02-02-1711
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ public String toExternal(IndexableField f) {
@Override
public ByteBuffer toObject(IndexableField f) {
BytesRef bytes = f.binaryValue();
return ByteBuffer.wrap(bytes.bytes, bytes.offset, bytes.length);
if (bytes != null) {
return ByteBuffer.wrap(bytes.bytes, bytes.offset, bytes.length);
}
return ByteBuffer.allocate(0);
}

@Override
Expand All @@ -93,7 +96,7 @@ public UninvertingReader.Type getUninversionType(SchemaField sf) {
}

@Override
public IndexableField createField(SchemaField field, Object val, float boost) {
public IndexableField createField(SchemaField field, Object val /*, float boost*/) {
if (val == null) return null;
if (!field.stored()) {
return null;
Expand All @@ -118,7 +121,7 @@ public IndexableField createField(SchemaField field, Object val, float boost) {

Field f = new org.apache.lucene.document.BinaryDocValuesField(field.getName(), new BytesRef(buf, offset, len));
// Field f = new org.apache.lucene.document.StoredField(field.getName(), buf, offset, len);
f.setBoost(boost);
//f.setBoost(boost);
return f;
}
}
219 changes: 156 additions & 63 deletions src/main/java/net/semanticmetadata/lire/solr/LireRequestHandler.java

Large diffs are not rendered by default.

104 changes: 75 additions & 29 deletions src/main/java/net/semanticmetadata/lire/solr/LireValueSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,24 @@

package net.semanticmetadata.lire.solr;

import net.semanticmetadata.lire.imageanalysis.features.GlobalFeature;
import net.semanticmetadata.lire.imageanalysis.features.global.ColorLayout;
import net.semanticmetadata.lire.imageanalysis.features.global.EdgeHistogram;
import org.apache.lucene.index.*;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.DocTermsIndexDocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.solr.common.util.Base64;

import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import net.semanticmetadata.lire.imageanalysis.features.GlobalFeature;
import net.semanticmetadata.lire.imageanalysis.features.global.ColorLayout;
import net.semanticmetadata.lire.solr.tools.RandomAccessBinaryDocValues;

/**
* A query function for sorting results based on the LIRE CBIR functions.
Expand All @@ -74,7 +78,9 @@ public class LireValueSource extends ValueSource {
* @param maxDistance the distance value returned if there is no distance calculation possible.
*/
public LireValueSource(String featureField, byte[] hist, double maxDistance) {
if (featureField != null) field = featureField;
if (featureField != null) {
field = featureField;
}
if (!field.endsWith("_hi")) { // TODO: Make that somewhat not so string dependent .. maybe connect with the postfix in FeatureRegistry
field += "_hi";
}
Expand Down Expand Up @@ -189,28 +195,50 @@ public double doubleVal(int doc) {
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final FieldInfo fieldInfo = readerContext.reader().getFieldInfos().fieldInfo(field);
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) {
final BinaryDocValues binaryValues = DocValues.getBinary(readerContext.reader(), field);
final Bits docsWithField = DocValues.getDocsWithField(readerContext.reader(), field);
// final BinaryDocValues binaryValues = DocValues.getBinary(readerContext.reader(), field);
// final Bits docsWithField = DocValues.getDocsWithField(readerContext.reader(), field);
final BinaryDocValues binaryValues = new RandomAccessBinaryDocValues(() -> {
try {
return DocValues.getBinary(readerContext.reader(), field);
} catch (IOException e) {
throw new RuntimeException("BinaryDocValues problem.", e);
}

});



return new FunctionValues() {
@Override
public boolean exists(int doc) {
return docsWithField.get(doc);
public boolean exists(int doc) throws IOException {
return binaryValues.advance(doc) == doc;
// return docsWithField.get(doc);
}

@Override
public boolean bytesVal(int doc, BytesRefBuilder target) {
target.copyBytes(binaryValues.get(doc));
public boolean bytesVal(int doc, BytesRefBuilder target)
throws IOException {
// target.copyBytes(binaryValues.get(doc));
// return target.length() > 0;
BytesRef bytesRef;
if (binaryValues.advance(doc) == doc) {
bytesRef = binaryValues.binaryValue();
} else {
bytesRef = new BytesRef(BytesRef.EMPTY_BYTES);
}
target.copyBytes(bytesRef);
return target.length() > 0;

}


@Override
public float floatVal(int doc) {
public float floatVal(int doc) throws IOException {
return (float) doubleVal(doc);
}

public String strVal(int doc) {
@Override
public String strVal(int doc) throws IOException {
final BytesRefBuilder bytes = new BytesRefBuilder();
return bytesVal(doc, bytes)
? bytes.get().utf8ToString()
Expand All @@ -222,27 +250,35 @@ public String strVal(int doc) {
* in this case it is the double form the distance function.
* @param doc
* @return the distance as Double, mapping to {@link FunctionValues#doubleVal(int)}
* @throws IOException
*/
@Override
public Object objectVal(int doc) {
public Object objectVal(int doc) throws IOException {
return doubleVal(doc);
}

@Override
public String toString(int doc) {
public String toString(int doc) throws IOException {
return description() + '=' + strVal(doc);
}

@Override
/**
* This method has to be implemented to support sorting!
*/
public double doubleVal(int doc) {
if (binaryValues.get(doc).length > 0) {
tmpFeature.setByteArrayRepresentation(binaryValues.get(doc).bytes, binaryValues.get(doc).offset, binaryValues.get(doc).length);
public double doubleVal(int doc) throws IOException {
BytesRef bytesRef = getBytesRef(binaryValues, doc);
if (bytesRef.length > 0) {
tmpFeature.setByteArrayRepresentation(
bytesRef.bytes,
bytesRef.offset,
bytesRef.length);
// tmpFeature.setByteArrayRepresentation(binaryValues.get(doc).bytes, binaryValues.get(doc).offset, binaryValues.get(doc).length);
return tmpFeature.getDistance(feature);
} else
}
else {
return maxDistance; // make sure max distance is returned for those without value
}
}
};
} else {
Expand All @@ -259,25 +295,37 @@ public Object objectVal(int doc) {
}

@Override
public String toString(int doc) {
public String toString(int doc) throws IOException {
return description() + '=' + strVal(doc);
}


@Override
public double doubleVal(int doc) {
return maxDistance;
}
};
}
}

private BytesRef getBytesRef(BinaryDocValues bdv, int docId)
throws IOException {
if (bdv != null && bdv.advance(docId) == docId) {
// if (bdv != null && bdv.docID() < docId && bdv.advance(docId) == docId) {
// if (bdv != null && bdv.advanceExact(docId)) {
return bdv.binaryValue();
}
return new BytesRef(BytesRef.EMPTY_BYTES);
}

@Override
public boolean equals(Object o) {
if (o instanceof LireValueSource)
if (o instanceof LireValueSource) {
// check if the function has had the same parameters.
return objectHashBase.equals(((LireValueSource) o).objectHashBase);
else
} else {
return false;
}
}

@Override
Expand All @@ -289,6 +337,4 @@ public int hashCode() {
public String description() {
return "distance to a given feature vector";
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package net.semanticmetadata.lire.solr.indexing;

import org.apache.commons.lang3.StringUtils;

/**
* This data processor removes the path from the file to keep only
* the last segment (the actual file name), and store it as the title. The
* same file name is used for the identifier, minus the extension (if any).
* @author Pascal Essiembre
*/
public class FileNameDataProcessor implements ImageDataProcessor {

@Override
public CharSequence getTitle(String filename) {
return StringUtils.substringAfterLast(
filename.replace('\\', '/'), "/");
}

@Override
public CharSequence getIdentifier(String filename) {
return StringUtils.substringBefore(StringUtils.substringAfterLast(
filename.replace('\\', '/'), "/"), ".");
}

@Override
public CharSequence getAdditionalFields(String filename) {
return "";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package net.semanticmetadata.lire.solr.tools;

import java.io.IOException;
import java.util.function.Supplier;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;


/**
* Wraps a {@link BinaryDocValues} creation strategy so it can be reset
* if needed. This is a hack to port liresolr to Lucene 7+ (which enforces
* ordered doc values) until a more formal solution is in place.
* @author Pascal Essiembre
*/
public class RandomAccessBinaryDocValues extends BinaryDocValues {

private final Supplier<BinaryDocValues> supplier;
private BinaryDocValues docValues;

public RandomAccessBinaryDocValues(Supplier<BinaryDocValues> supplier) {
super();
this.supplier = supplier;
this.docValues = supplier.get();
}

@Override
public BytesRef binaryValue() throws IOException {
if (docValues == null) {
return new BytesRef(BytesRef.EMPTY_BYTES);
}
return docValues.binaryValue();
}

@Override
public boolean advanceExact(int target) throws IOException {
resetIfNeeded(target);
if (docValues == null) {
return false;
}
return docValues.advanceExact(target);
}

@Override
public int docID() {
if (docValues == null) {
return NO_MORE_DOCS;
}
return docValues.docID();
}

@Override
public int nextDoc() throws IOException {
if (docValues == null) {
return NO_MORE_DOCS;
}
return docValues.nextDoc();
}

@Override
public int advance(int target) throws IOException {
resetIfNeeded(target);
if (docValues == null) {
return NO_MORE_DOCS;
}
return docValues.advance(target);
}

@Override
public long cost() {
if (docValues == null) {
return 0;
}
return docValues.cost();
}

private void resetIfNeeded(int target) {
if (docValues == null) {
docValues = supplier.get();
} else {
int id = docValues.docID();
if (id != -1 && id != NO_MORE_DOCS
&& target < docValues.docID()) {
docValues = supplier.get();
}
}
}
}

0 comments on commit 99d8a50

Please sign in to comment.