diff --git a/solr/core/src/java/org/apache/solr/schema/NVectorField.java b/solr/core/src/java/org/apache/solr/schema/NVectorField.java new file mode 100644 index 00000000000..171667d824b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/NVectorField.java @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.valuesource.MultiValueSource; +import org.apache.lucene.search.SortField; +import org.apache.solr.common.SolrException; +import org.apache.solr.response.TextResponseWriter; +import org.apache.solr.search.QParser; +import org.apache.solr.uninverting.UninvertingReader; +import org.apache.solr.util.NVectorUtil; + +import java.io.IOException; +import java.text.NumberFormat; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +public class NVectorField extends CoordinateFieldType { + + private static final String DEFAULT_SEPARATOR = ","; + String separator = DEFAULT_SEPARATOR; + + @Override + protected void init(IndexSchema schema, Map args) { + super.init(schema, args); + separator = args.getOrDefault("separator", DEFAULT_SEPARATOR); + dimension = 3; + createSuffixCache(3); + } + + @Override + public List createFields(SchemaField field, Object value) { + String externalVal = value.toString(); + String[] point = parseCommaSeparatedList(externalVal, dimension, separator); + String[] nvector; + try { + NumberFormat format = NumberFormat.getInstance(Locale.getDefault()); + format.setParseIntegerOnly(false); + nvector = NVectorUtil.latLongToNVector(point, format); + } catch (ParseException e) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "format exception parsing: "+externalVal); + } + List f = new ArrayList<>((dimension * 2) + 1); + + if (field.indexed()) { + for (int i = 0; i < dimension; i++) { + SchemaField sf = subField(field, i, schema); + f.addAll(sf.createFields(nvector[i])); + } + } + + if (field.stored()) { + f.add(createField(field.getName(), externalVal, StoredField.TYPE)); + } + return f; + } + + @Override + public ValueSource getValueSource(SchemaField field, QParser parser) { + ArrayList vs = new ArrayList<>(dimension); + for (int i = 0; i < dimension; i++) { + SchemaField sub = subField(field, i, schema); + vs.add(sub.getType().getValueSource(sub, parser)); + } + return new NVectorValueSource(vs); + } + + /** + * Given a string containing dimension values encoded in it, separated by commas, return a + * String array of length dimension containing the values. + * + * @param externalVal The value to parse + * @param dimension The expected number of values for the point + * @param separator The separator between values + * @return An array of the values that make up the point (aka vector) + * @throws SolrException if the dimension specified does not match the number found + */ + public static String[] parseCommaSeparatedList(String externalVal, int dimension, String separator) + throws SolrException { + char sep = separator.charAt(0); + String[] out = new String[dimension]; + int idx = externalVal.indexOf(sep); + int end = idx; + int start = 0; + int i = 0; + if (idx == -1 + && dimension == 1 + && externalVal.length() > 0) { // we have a single point, dimension better be 1 + out[0] = externalVal.trim(); + i = 1; + } else if (idx > 0) { // if it is zero, that is an error + // Parse out a comma separated list of values, as in: 73.5,89.2,7773.4 + for (; i < dimension; i++) { + while (start < end && externalVal.charAt(start) == ' ') start++; + while (end > start && externalVal.charAt(end - 1) == ' ') end--; + if (start == end) { + break; + } + out[i] = externalVal.substring(start, end); + start = idx + 1; + end = externalVal.indexOf(sep, start); + idx = end; + if (end == -1) { + end = externalVal.length(); + } + } + } + if (i != dimension) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "incompatible dimension (" + + dimension + + ") and values (" + + externalVal + + "). Only " + + i + + " values specified"); + } + return out; + } + + @Override + protected void checkSupportsDocValues() { + // DocValues supported only when enabled at the fieldType + if (!hasProperty(DOC_VALUES)) { + throw new UnsupportedOperationException( + "PointType can't have docValues=true in the field definition, use docValues=true in the fieldType definition, or in subFieldType/subFieldSuffix"); + } + } + + @Override + public UninvertingReader.Type getUninversionType(SchemaField sf) { + return null; + } + + @Override + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { + writer.writeStr(name, f.stringValue(), true); + } + + @Override + public SortField getSortField(SchemaField field, boolean top) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, "Sorting not supported on NVector " + field.getName()); + } + + @Override + public boolean isPolyField() { + return true; + } +} + +final class NVectorValueSource extends MultiValueSource { + private final List sources; + + public NVectorValueSource(List sources) { + this.sources = sources; + } + + @Override + public FunctionValues getValues(Map context, LeafReaderContext readerContext) + throws IOException { + final FunctionValues x = sources.get(0).getValues(context, readerContext); + final FunctionValues y = sources.get(1).getValues(context, readerContext); + final FunctionValues z = sources.get(2).getValues(context, readerContext); + return new FunctionValues() { + + @Override + public void byteVal(int doc, byte[] vals) throws IOException { + vals[0] = x.byteVal(doc); + vals[1] = y.byteVal(doc); + vals[2] = z.byteVal(doc); + } + + @Override + public void shortVal(int doc, short[] vals) throws IOException { + vals[0] = x.shortVal(doc); + vals[1] = y.shortVal(doc); + vals[2] = z.shortVal(doc); + } + + @Override + public void intVal(int doc, int[] vals) throws IOException { + vals[0] = x.intVal(doc); + vals[1] = y.intVal(doc); + vals[2] = z.intVal(doc); + } + + @Override + public void longVal(int doc, long[] vals) throws IOException { + vals[0] = x.longVal(doc); + vals[1] = y.longVal(doc); + vals[2] = z.longVal(doc); + } + + @Override + public void floatVal(int doc, float[] vals) throws IOException { + vals[0] = x.floatVal(doc); + vals[1] = y.floatVal(doc); + vals[2] = z.floatVal(doc); + } + + @Override + public void doubleVal(int doc, double[] vals) throws IOException { + vals[0] = x.doubleVal(doc); + vals[1] = y.doubleVal(doc); + vals[2] = z.doubleVal(doc); + } + + @Override + public void strVal(int doc, String[] vals) throws IOException { + vals[0] = x.strVal(doc); + vals[1] = y.strVal(doc); + vals[2] = z.strVal(doc); + } + + @Override + public String toString(int doc) throws IOException { + return "nvector(" + x.toString(doc) + "," + y.toString(doc) + "," + z.toString(doc) + ")"; + } + }; + } + + @Override + public String description() { + StringBuilder sb = new StringBuilder(); + sb.append("nvector("); + boolean firstTime = true; + for (ValueSource source : sources) { + if (firstTime) { + firstTime = false; + } else { + sb.append(','); + } + sb.append(source); + } + sb.append(")"); + return sb.toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof NVectorValueSource)) return false; + + NVectorValueSource that = (NVectorValueSource) o; + + return sources.equals(that.sources); + } + + @Override + public int hashCode() { + return sources.hashCode(); + } + + @Override + public int dimension() { + return sources.size(); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java index cd86b3acadd..3e35bdf2f9c 100644 --- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java @@ -81,6 +81,7 @@ import org.apache.solr.search.function.distance.SquaredEuclideanFunction; import org.apache.solr.search.function.distance.StringDistanceFunction; import org.apache.solr.search.function.distance.VectorDistanceFunction; +import org.apache.solr.search.function.distance.NVectorValueSourceParser; import org.apache.solr.search.join.ChildFieldValueSourceParser; import org.apache.solr.util.DateMathParser; import org.apache.solr.util.PayloadUtils; @@ -386,6 +387,8 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError { addParser("geodist", new GeoDistValueSourceParser()); + addParser("nvdist", new NVectorValueSourceParser()); + addParser( "hsin", new ValueSourceParser() { diff --git a/solr/core/src/java/org/apache/solr/search/function/distance/NVectorFunction.java b/solr/core/src/java/org/apache/solr/search/function/distance/NVectorFunction.java new file mode 100644 index 00000000000..e1ce5afb9f5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/function/distance/NVectorFunction.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.function.distance; + +import static org.apache.solr.util.NVectorUtil.nVectorDist; +import static org.apache.solr.util.NVectorUtil.nVectorDotProduct; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.docvalues.DoubleDocValues; +import org.apache.lucene.queries.function.valuesource.MultiValueSource; +import org.apache.lucene.search.*; +import org.apache.solr.common.SolrException; + +public class NVectorFunction extends ValueSource { + private final MultiValueSource nvector1; + private final MultiValueSource nvector2; + private final double radius; + + public NVectorFunction(MultiValueSource nvector1, MultiValueSource nvector2, double radius) { + this.nvector1 = nvector1; + this.nvector2 = nvector2; + if (nvector1.dimension() != 3 || nvector2.dimension() != 3) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, "Illegal dimension for value sources"); + } + this.radius = radius; + } + + @Override + public FunctionValues getValues(Map context, LeafReaderContext readerContext) + throws IOException { + + final FunctionValues fv = getDotProductValues(context,readerContext); + + return new DoubleDocValues(this) { + + @Override + public double doubleVal(int doc) throws IOException { + double dotProduct = fv.doubleVal(doc); + return nVectorDist(dotProduct, radius); + } + + @Override + public String toString(int doc) throws IOException { + return fv.toString(doc); + } + }; + } + public FunctionValues getDotProductValues(Map context, LeafReaderContext readerContext) throws IOException { + + final FunctionValues nvector_v1 = nvector1.getValues(context, readerContext); + final FunctionValues nvector_v2 = nvector2.getValues(context, readerContext); + return new DoubleDocValues(this) { + + @Override + public double doubleVal(int doc) throws IOException { + double[] nvector_dv1 = new double[nvector1.dimension()]; + double[] nvector_dv2 = new double[nvector2.dimension()]; + nvector_v1.doubleVal(doc, nvector_dv1); + nvector_v2.doubleVal(doc, nvector_dv2); + return nVectorDotProduct(nvector_dv1, nvector_dv2); + } + + @Override + public String toString(int doc) throws IOException { + return name() + ',' + nvector_v1.toString(doc) + ',' + nvector_v2.toString(doc) + ')'; + } + }; + } + + protected String name() { + return "nvector"; + } + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NVectorFunction that = (NVectorFunction) o; + return Double.compare(that.radius, radius) == 0 + && nvector1.equals(that.nvector1) + && nvector2.equals(that.nvector2); + } + @Override + public int hashCode() { + return Objects.hash(nvector1, nvector2, radius); + } + @Override + public void createWeight(Map context, IndexSearcher searcher) throws IOException { + nvector1.createWeight(context, searcher); + nvector2.createWeight(context, searcher); + } + @Override + public String description() { + return name() + '(' + nvector1 + ',' + nvector2 + ')'; + } + @Override + public SortField getSortField(boolean reverse){ return new NVectorValueSourceSortField(reverse); } + class NVectorValueSourceSortField extends SortField { + public NVectorValueSourceSortField(boolean reverse) { + super(description(), SortField.Type.REWRITEABLE, reverse); + } + + @Override + public SortField rewrite(IndexSearcher searcher) throws IOException { + Map context = ValueSource.newContext(searcher); + createWeight(context, searcher); + return new SortField(getField(), new NVectorValueSourceComparatorSource(context), getReverse()); + } + } + class NVectorValueSourceComparatorSource extends FieldComparatorSource { + private final Map context; + + public NVectorValueSourceComparatorSource(Map context) { + this.context = context; + } + + @Override + public FieldComparator newComparator( + String fieldname, int numHits, boolean enableSkipping, boolean reversed) { + return new NVectorValueSourceComparator(context, numHits); + } + } + //Please note: The comparisons are INVERTED here, for performance on sorting we compare + // with the dot product, which is negatively correlated to acos(dot_product). Converting the dot product to distance with d=R*acos(dot_product), for example: + // where d = distance, dp = dot_product + // d:19628.29698448594 dp:-0.9981573955675561 + //d:18583.651644725564 dp:-0.9748645959351536 + //d:18180.44788490305 dp:-0.9588220684898193 + //d:18052.6639699517 dp:-0.9529332319157707 + //d:17882.7920545206 dp:-0.9445116994940388 + //.... + //d:1930.2292960794525 dp:0.954454358625817 + //d:1843.0056018566079 dp:0.9584495044607074 + //d:455.63526860635756 dp:0.9974437510266485 + //d:336.9277320011129 dp:0.9986019397287412 + //d:0.0 dp:1.0 + // A HIGHER dot_product equates to a closer (LOWER) distance hence we invert + class NVectorValueSourceComparator extends SimpleFieldComparator { + private final double[] values; + private FunctionValues docVals; + private double bottom; + private final Map fcontext; + private double topValue; + + NVectorValueSourceComparator(Map fcontext, int numHits) { + this.fcontext = fcontext; + values = new double[numHits]; + } + + @Override + public int compare(int slot1, int slot2) { + return Double.compare(values[slot2],values[slot1]); + } + + @Override + public int compareBottom(int doc) throws IOException { + return Double.compare(docVals.doubleVal(doc),bottom); + } + + @Override + public void copy(int slot, int doc) throws IOException { + values[slot] = docVals.doubleVal(doc); + } + + @Override + public void doSetNextReader(LeafReaderContext context) throws IOException { + docVals = getDotProductValues(fcontext, context); + } + + @Override + public void setBottom(final int bottom) { + this.bottom = values[bottom]; + } + + @Override + public void setTopValue(final Double value) { + this.topValue = value; + } + + @Override + public Double value(int slot) { + return values[slot]; + } + + @Override + public int compareTop(int doc) throws IOException { + return Double.compare(docVals.doubleVal(doc),topValue); + } + } +} + + diff --git a/solr/core/src/java/org/apache/solr/search/function/distance/NVectorValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/function/distance/NVectorValueSourceParser.java new file mode 100644 index 00000000000..91317e09cee --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/function/distance/NVectorValueSourceParser.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.function.distance; + +import static org.locationtech.spatial4j.distance.DistanceUtils.EARTH_MEAN_RADIUS_KM; + +import java.util.Arrays; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource; +import org.apache.lucene.queries.function.valuesource.MultiValueSource; +import org.apache.lucene.queries.function.valuesource.VectorValueSource; +import org.apache.solr.common.SolrException; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; +import org.apache.solr.util.NVectorUtil; + +public class NVectorValueSourceParser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + double lat = fp.parseDouble(); + double lon = fp.parseDouble(); + + ValueSource vs1 = fp.parseValueSource(); + if (!(vs1 instanceof MultiValueSource)) + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field must a MultiValueSource"); + MultiValueSource nvector_vs1 = (MultiValueSource) vs1; + + double[] nvector = NVectorUtil.latLongToNVector(lat, lon); + + MultiValueSource nvector_vs2 = + new VectorValueSource( + Arrays.asList( + new DoubleConstValueSource(nvector[0]), + new DoubleConstValueSource(nvector[1]), + new DoubleConstValueSource(nvector[2]))); + + double radius = fp.hasMoreArguments() ? fp.parseDouble() : EARTH_MEAN_RADIUS_KM; + + return new NVectorFunction(nvector_vs1, nvector_vs2, radius); + } +} diff --git a/solr/core/src/java/org/apache/solr/util/NVectorUtil.java b/solr/core/src/java/org/apache/solr/util/NVectorUtil.java new file mode 100644 index 00000000000..47fd2bbbf55 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/util/NVectorUtil.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util; + +import org.apache.lucene.util.SloppyMath; +import org.apache.solr.schema.NVectorField; +import org.apache.solr.search.function.distance.NVectorFunction; + +import java.text.NumberFormat; +import java.text.ParseException; + +import static org.locationtech.spatial4j.distance.DistanceUtils.EARTH_MEAN_RADIUS_KM; + +/** + * NVectorUtil : This class contains helper methods used by {@link NVectorFunction} and {@link NVectorField} + * to convert between n-vectors and lat,lon as well as calculating dot product for sorting and + * calculating the great circle (surface) distance + */ +public class NVectorUtil { + + private static final double pip2 = Math.PI/2; + + /** + * + * @param lat the latitude + * @param lon the longitude + * @return the NVector as double[3] + */ + public static double[] latLongToNVector(double lat, double lon) { + double latRad = Math.toRadians(lat); + double lonRad = Math.toRadians(lon); + double x = Math.cos(latRad) * Math.cos(lonRad); + double y = Math.cos(latRad) * Math.sin(lonRad); + double z = Math.sin(latRad); + return new double[] {x, y, z}; + } + + + /** + * @param lat the latitude + * @param lon the longitude + * @return string rep of the n-vector + */ + public static String[] latLongToNVector(String lat, String lon) { + double[] nvec = latLongToNVector(Double.parseDouble(lat), Double.parseDouble(lon)); + return new String[] { + Double.toString(nvec[0]), Double.toString(nvec[1]), Double.toString(nvec[2]) + }; + } + + /** + * @param point string rep of lat,lon + * @param formatter for parsing the string into a double wrt the locale + * @return string rep of the n-vector + * @throws ParseException If the string for point cannot be parsed + */ + public static String[] latLongToNVector(String[] point, NumberFormat formatter) throws ParseException { + double[] nvec = latLongToNVector(formatter.parse(point[0]).doubleValue(),formatter.parse(point[1]).doubleValue()); + return new String[] { + Double.toString(nvec[0]), Double.toString(nvec[1]), Double.toString(nvec[2]) + }; + } + + /** + * @param n the nvector + * @return the lat lon for this n-vector + */ + public static double[] nVectorToLatLong(double[] n) { + return new double[] { + Math.toDegrees(Math.asin(n[2])),Math.toDegrees(Math.atan(n[1] / n[0])) + }; + } + + public static double[] nVectorToLatLong(String[] n) { + return nVectorToLatLong( + new double[] { + Double.parseDouble(n[0]), Double.parseDouble(n[1]), Double.parseDouble(n[2]) + }); + } + + /** + * @param a the first n-vector + * @param b the second n-vector + * @return scalar doc product of both n-vectors + */ + public static double nVectorDotProduct(double[] a, double[] b) { + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; + } + + /** + * @param a the first n-vector + * @param b the second n-vector + * @return the great circle (surface) distance between the two n-vectors + */ + public static double nVectorDist(double[] a, double[] b) { + return nVectorDist(a, b, EARTH_MEAN_RADIUS_KM); + } + + /** + * @param a the first n-vector + * @param b the second n-vector + * @param radius he radius of the ellipsoid + * @return the great circle (surface) distance between the two n-vectors + */ + public static double nVectorDist(double[] a, double[] b, double radius) { + return nVectorDist(nVectorDotProduct(a, b), radius); + } + + /** + * @param dotProduct the scalar dot product of two n-vectors + * @param radius the radius of the ellipsoid + * @return the great circle (surface) distance between the two n-vectors + */ + public static double nVectorDist(double dotProduct, double radius){ + return radius * (pip2 - SloppyMath.asin(dotProduct)); + } + +} diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-nvector.xml b/solr/core/src/test-files/solr/collection1/conf/schema-nvector.xml new file mode 100644 index 00000000000..4cfc2fb494b --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-nvector.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + + I am your default sim + + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-nvector.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-nvector.xml new file mode 100644 index 00000000000..d41555b0172 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-nvector.xml @@ -0,0 +1,528 @@ + + + + + + + + + + + + + ${solr.data.dir:} + + + + 1000000 + 2000000 + 3000000 + 4000000 + ${solr.hdfs.home:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.global:true} + ${solr.hdfs.blockcache.write.enabled:false} + ${solr.hdfs.blockcache.blocksperbank:10} + ${solr.hdfs.blockcache.slab.count:1} + + + + + ${tests.luceneMatchVersion:LATEST} + + + + + + + + + ${solr.autoCommit.maxTime:-1} + + + + + + ${solr.ulog.dir:} + + + + ${solr.commitwithin.softcommit:true} + + + + + + + ${solr.max.booleanClauses:1024} + + + + + + + + + + + + true + + + + + + 10 + + + + + + + + + + + + 2000 + + + + + + + + true + + + + + dismax + *:* + 0.01 + + text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 + + + text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 + + + weight^0.5 recip(rord(id),1,1000,1000)^0.3 + + + 3<-1 5<-2 6<90% + + 100 + + + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + lowerpunctfilt + + + default + lowerfilt + spellchecker1 + false + + + direct + DirectSolrSpellChecker + lowerfilt + 3 + + + wordbreak + solr.WordBreakSolrSpellChecker + lowerfilt + true + true + 10 + + + multipleFields + lowerfilt1and2 + spellcheckerMultipleFields + false + + + + jarowinkler + lowerfilt + + org.apache.lucene.search.spell.JaroWinklerDistance + spellchecker2 + + + + solr.FileBasedSpellChecker + external + spellings.txt + UTF-8 + spellchecker3 + + + + freq + lowerfilt + spellcheckerFreq + + freq + false + + + fqcn + lowerfilt + spellcheckerFQCN + org.apache.solr.spelling.SampleComparator + false + + + perDict + org.apache.solr.handler.component.DummyCustomParamSpellChecker + lowerfilt + + + + + + + + + + + + + false + + false + + 1 + + + spellcheck + + + + + direct + false + false + 1 + + + spellcheck + + + + + default + wordbreak + 20 + + + spellcheck + + + + + direct + wordbreak + 20 + + + spellcheck + + + + + dismax + lowerfilt1^1 + + + spellcheck + + + + + + + + + + + + + + + tvComponent + + + + + + + + + + + + 100 + + + + + + 70 + + + + + + + ]]> + ]]> + + + + + + + + + + + + + 10 + .,!? + + + + + + WORD + en + US + + + + + + + + + max-age=30, public + + + + + + foo_s + + + foo_s:bar + + + + + foo_s + foo_s:bar + + + + + prefix-${solr.test.sys.prop2}-suffix + + + + + + + uniq + uniq2 + uniq3 + + + + + + + + + regex_dup_A_s + x + x_x + + + + regex_dup_B_s + x + x_x + + + + + + + + regex_dup_A_s + x + x_x + + + regex_dup_B_s + x + x_x + + + + + + + org.apache.solr.rest.ManagedResourceStorage$InMemoryStorageIO + + + + + + text + + + + + + text + + + nl + + + diff --git a/solr/core/src/test/org/apache/solr/search/function/distance/NVectorDistTest.java b/solr/core/src/test/org/apache/solr/search/function/distance/NVectorDistTest.java new file mode 100644 index 00000000000..390dcc8c579 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/function/distance/NVectorDistTest.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.function.distance; + +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.Locale; + +public class NVectorDistTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_ + initCore("solrconfig-nvector.xml", "schema-nvector.xml"); + } + + @Override + public void setUp() throws Exception { + Locale.setDefault(Locale.ENGLISH);//for parsing lat/log correctly in this test + super.setUp(); + assertU(delQ("*:*")); + assertU(commit()); + } + + @Test + public void testNVector() throws Exception { + assertU(adoc("id", "0", "nvector", "52.02471051274793, -0.49007556238612354")); + assertU(commit()); + assertJQ( + req("defType", "lucene", "q", "*:*", "fl", "id,nvector*", "sort", "id asc"), + "/response/docs/[0]== {" + + "'id':'0'," + + "'nvector_0_d1':0.6152990562577377," + + "'nvector_1_d1':-0.005263047078845837," + + "'nvector_2_d1':0.7882762026750415," + + "'nvector':'52.02471051274793, -0.49007556238612354'}"); + + assertJQ( + req( + "defType", "lucene", + "q", "*:*", + "nvd", "nvdist(52.01966071979866, -0.4983083573742952,nvector)", + "fl", "dist:$nvd", + "sort", "$nvd asc"), + "/response/docs/[0]/dist==0.7953814512052634"); + } + + @Test + public void testNVectorRadiusFilter() throws Exception { + assertU(adoc("id", "0", "nvector", "52.02471051274793, -0.49007556238612354")); + assertU(adoc("id", "1", "nvector", "51.927619, -0.186636")); + assertU(adoc("id", "2", "nvector", "51.480043, -0.196508")); + assertU(commit()); + + assertJQ( + req( + "defType", "lucene", + "lat", "52.01966071979866", + "lon", "-0.4983083573742952", + "dist", "nvdist($lat,$lon,nvector)", + "q", "*:*", + "fl","id", + "sort", "$dist asc"), + "/response/numFound==3", + "/response/docs/[0]/id=='0'", + "/response/docs/[1]/id=='1'" + ); + + assertJQ( + req( + "defType", "lucene", + "lat", "52.01966071979866", + "lon", "-0.4983083573742952", + "dist", "nvdist($lat,$lon,nvector)", + "q", "*:*", + "fl","id", + "sort", "$dist desc"), + "/response/numFound==3", + "/response/docs/[0]/id=='2'", + "/response/docs/[1]/id=='1'" + ); + + assertJQ( + req( + "defType", "lucene", + "lat", "52.01966071979866", + "lon", "-0.4983083573742952", + "dist", "nvdist($lat,$lon,nvector)", + "q", "*:*", + "fl","id,dist:$dist", + "sort", "$dist asc"), + "/response/numFound==3", + "/response/docs/[0]/id=='0'", + "/response/docs/[0]/dist==0.7953814512052634", + "/response/docs/[1]/id=='1'", + "/response/docs/[1]/dist==23.675588801593264", + "/response/docs/[2]/id=='2'", + "/response/docs/[2]/dist==63.49776326818523" + ); + + assertJQ( + req( + "defType", "lucene", + "lat", "52.01966071979866", + "lon", "-0.4983083573742952", + "dist", "nvdist($lat,$lon,nvector)", + "q", "*:*", + "fl","id,dist:$dist", + "sort", "$dist desc"), + "/response/numFound==3", + "/response/docs/[0]/id=='2'", + "/response/docs/[0]/dist==63.49776326818523", + "/response/docs/[1]/id=='1'", + "/response/docs/[1]/dist==23.675588801593264", + "/response/docs/[2]/id=='0'", + "/response/docs/[2]/dist==0.7953814512052634" + + + ); + + assertJQ( + req( + "defType", "lucene", + "q", "{!frange u=30}nvdist(52.01966071979866, -0.4983083573742952,nvector)", + "fl", "id,dist:nvdist(52.01966071979866, -0.4983083573742952,nvector)", + "sort", "nvdist(52.01966071979866, -0.4983083573742952,nvector) asc"), + "/response/numFound==2", + "/response/docs/[0]/id=='0'", + "/response/docs/[0]/dist==0.7953814512052634", + "/response/docs/[1]/id=='1'", + "/response/docs/[1]/dist==23.675588801562068"); + + assertJQ( + req( + "defType", "lucene", + "dist", "nvdist(52.01966071979866, -0.4983083573742952,nvector)", + "q", "{!frange u=30}$dist", + "fl", "id,dist:$dist", + "sort", "$dist asc"), + "/response/numFound==2", + "/response/docs/[0]/id=='0'", + "/response/docs/[0]/dist==0.7953814512052634", + "/response/docs/[1]/id=='1'", + "/response/docs/[1]/dist==23.675588801562068"); + + assertJQ( + req( + "defType", "lucene", + "lat", "52.01966071979866", + "lon", "-0.4983083573742952", + "dist", "nvdist($lat,$lon,nvector)", + "q", "*:*", + "fq", "{!frange u=30}$dist", + "fl", "id,dist:$dist", + "sort", "$dist asc"), + "/response/numFound==2", + "/response/docs/[0]/id=='0'", + "/response/docs/[0]/dist==0.7953814512052634", + "/response/docs/[1]/id=='1'", + "/response/docs/[1]/dist==23.675588801562068"); + + } +} diff --git a/solr/core/src/test/org/apache/solr/util/NVectorUtilTest.java b/solr/core/src/test/org/apache/solr/util/NVectorUtilTest.java new file mode 100644 index 00000000000..89a9b82e1d9 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/util/NVectorUtilTest.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class NVectorUtilTest { + + @Test + public void latLongToNVector() { + double lat = 52.024535; + double lon = -0.490155; + double[] n = NVectorUtil.latLongToNVector(lat, lon); + double[] ll = NVectorUtil.nVectorToLatLong(n); + assertEquals(lat, ll[0], 0.0001); + assertEquals(lon, ll[1], 0.0001); + } + + @Test + public void latLongToNVectorStr() { + String lat = "52.024535"; + String lon = "-0.490155"; + String[] n = NVectorUtil.latLongToNVector(lat, lon); + double[] ll = NVectorUtil.nVectorToLatLong(n); + assertEquals(Double.parseDouble(lat), ll[0], 0.0001); + assertEquals(Double.parseDouble(lon), ll[1], 0.0001); + } + + @Test + public void NVectorDist() { + double[] a = NVectorUtil.latLongToNVector(52.019819, -0.490155); + double[] b = NVectorUtil.latLongToNVector(52.019660, -0.498308); + double dist = NVectorUtil.nVectorDist(a, b); + assertEquals(0.5581762827572362, dist, 0.0001); + a = NVectorUtil.latLongToNVector(52.02456414691066, -0.49013542948214134); + b = NVectorUtil.latLongToNVector(51.92756819110318, -0.18695373636718815); + assertEquals(23.400242809617353, NVectorUtil.nVectorDist(a, b), 0.0001); + } +}