Skip to content

Commit

Permalink
Geo spatial interfaces (#16029)
Browse files Browse the repository at this point in the history
This PR creates an interface for ImmutableRTree and moved the existing implementation to new class which represent 32 bit implementation (stores coordinate as floats). This PR makes the ImmutableRTree extendable to create higher precision implementation as well (64 bit).
In all spatial bound filters, we accept float as input which might not be accurate in the case of high precision implementation of ImmutableRTree. This PR changed the bound filters to accepts the query bounds as double instead of float and it is backward compatible change as it compares double to existing float values in RTree. Previously it was comparing input float to RTree floats which can cause precision loss, now it is little better as it compares double to float which is still not 100% accurate.
There are no changes in the way that we query spatial dimension today except input bound parsing. There is little improvement in string filter predicate which now parse double strings instead of float and compares double to double which is 100% accurate but string predicate is only called when we dont have spatial index.
With allowing the interface to extend ImmutableRTree, we allow to create high precision (HP) implementation and defines new search strategies to perform HP search Iterable<ImmutableBitmap> search(ImmutableDoubleNode node, Bound bound);
With possible HP implementations, Radius bound filter can not really focus on accuracy, it is calculating Euclidean distance in comparing. As EARTH 🌍 is round and not flat, Euclidean distances are not accurate in geo system. This PR adds new param called 'radiusUnit' which allows you to specify units like meters, km, miles etc. It uses https://en.wikipedia.org/wiki/Haversine_formula to check if given geo point falls inside circle or not. Added a test that generates set of points inside and outside in RadiusBoundTest.
  • Loading branch information
pranavbhole committed Apr 1, 2024
1 parent 27b4028 commit 20de7fd
Show file tree
Hide file tree
Showing 22 changed files with 637 additions and 233 deletions.
5 changes: 3 additions & 2 deletions docs/querying/geo.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,9 @@ The `radius` bound has the following elements:

|Property|Description|Required|
|--------|-----------|--------|
|`coords`|Origin coordinates in the form [x, y]|yes|
|`radius`|The float radius value|yes|
|`coords`|Center coordinates in the form [x, y]|yes|
|`radius`|The float radius value according to specified unit|yes|
|`radiusUnit`|String value of radius unit in lowercase, default value is 'euclidean'. Allowed units are euclidean, meters, miles, kilometers.|no|

#### Polygon

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.collections.spatial;

import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.spatial.search.Bound;

public interface BaseImmutableRTee
{
Iterable<ImmutableBitmap> search(Bound bound);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.collections.spatial;

import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;

import java.nio.ByteBuffer;
import java.util.Iterator;

/**
* Byte layout:
* Header
* 0 to 1 : the MSB is a boolean flag for isLeaf, the next 15 bits represent the number of children of a node
* Body
* 2 to 2 + numDims * Float.BYTES : minCoordinates
* 2 + numDims * Float.BYTES to 2 + 2 * numDims * Float.BYTES : maxCoordinates
* concise set
* rest (children) : Every 4 bytes is storing an offset representing the position of a child.
*
* The child offset is an offset from the initialOffset
*/
public class ImmutableFloatNode implements ImmutableNode<float[]>
{
public static final int HEADER_NUM_BYTES = 2;

private final int numDims;
private final int initialOffset;
private final int offsetFromInitial;

private final short numChildren;
private final boolean isLeaf;
private final int childrenOffset;

private final ByteBuffer data;

private final BitmapFactory bitmapFactory;

public ImmutableFloatNode(
int numDims,
int initialOffset,
int offsetFromInitial,
ByteBuffer data,
BitmapFactory bitmapFactory
)
{
this.bitmapFactory = bitmapFactory;
this.numDims = numDims;
this.initialOffset = initialOffset;
this.offsetFromInitial = offsetFromInitial;
short header = data.getShort(initialOffset + offsetFromInitial);
this.isLeaf = (header & 0x8000) != 0;
this.numChildren = (short) (header & 0x7FFF);
final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Float.BYTES;
int bitmapSize = data.getInt(sizePosition);
this.childrenOffset = initialOffset
+ offsetFromInitial
+ HEADER_NUM_BYTES
+ 2 * numDims * Float.BYTES
+ Integer.BYTES
+ bitmapSize;

this.data = data;
}

public ImmutableFloatNode(
int numDims,
int initialOffset,
int offsetFromInitial,
short numChildren,
boolean leaf,
ByteBuffer data,
BitmapFactory bitmapFactory
)
{
this.bitmapFactory = bitmapFactory;
this.numDims = numDims;
this.initialOffset = initialOffset;
this.offsetFromInitial = offsetFromInitial;
this.numChildren = numChildren;
this.isLeaf = leaf;
final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Float.BYTES;
int bitmapSize = data.getInt(sizePosition);
this.childrenOffset = initialOffset
+ offsetFromInitial
+ HEADER_NUM_BYTES
+ 2 * numDims * Float.BYTES
+ Integer.BYTES
+ bitmapSize;

this.data = data;
}

@Override
public BitmapFactory getBitmapFactory()
{
return bitmapFactory;
}

@Override
public int getInitialOffset()
{
return initialOffset;
}

@Override
public int getOffsetFromInitial()
{
return offsetFromInitial;
}

@Override
public int getNumDims()
{
return numDims;
}

@Override
public boolean isLeaf()
{
return isLeaf;
}

@Override
public float[] getMinCoordinates()
{
return getCoords(initialOffset + offsetFromInitial + HEADER_NUM_BYTES);
}

@Override
public float[] getMaxCoordinates()
{
return getCoords(initialOffset + offsetFromInitial + HEADER_NUM_BYTES + numDims * Float.BYTES);
}

@Override
public ImmutableBitmap getImmutableBitmap()
{
final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Float.BYTES;
int numBytes = data.getInt(sizePosition);
data.position(sizePosition + Integer.BYTES);
ByteBuffer tmpBuffer = data.slice();
tmpBuffer.limit(numBytes);
return bitmapFactory.mapImmutableBitmap(tmpBuffer.asReadOnlyBuffer());
}

@Override
@SuppressWarnings("ArgumentParameterSwap")
public Iterable<ImmutableNode<float[]>> getChildren()
{
return new Iterable<ImmutableNode<float[]>>()
{
@Override
public Iterator<ImmutableNode<float[]>> iterator()
{
return new Iterator<ImmutableNode<float[]>>()
{
private int count = 0;

@Override
public boolean hasNext()
{
return (count < numChildren);
}

@Override
public ImmutableNode<float[]> next()
{
if (isLeaf) {
return new ImmutableFloatPoint(
numDims,
initialOffset,
data.getInt(childrenOffset + (count++) * Integer.BYTES),
data,
bitmapFactory
);
}
return new ImmutableFloatNode(
numDims,
initialOffset,
data.getInt(childrenOffset + (count++) * Integer.BYTES),
data,
bitmapFactory
);
}

@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
};
}

@Override
public ByteBuffer getData()
{
return data;
}

private float[] getCoords(int offset)
{
final float[] retVal = new float[numDims];

final ByteBuffer readOnlyBuffer = data.asReadOnlyBuffer();
readOnlyBuffer.position(offset);
readOnlyBuffer.asFloatBuffer().get(retVal);

return retVal;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@

import java.nio.ByteBuffer;

public class ImmutablePoint extends ImmutableNode
public class ImmutableFloatPoint extends ImmutableFloatNode
{
public ImmutablePoint(
public ImmutableFloatPoint(
int numDims,
int initialOffset,
int offsetFromInitial,
Expand All @@ -36,7 +36,7 @@ public ImmutablePoint(
super(numDims, initialOffset, offsetFromInitial, (short) 0, true, data, bitmapFactory);
}

public ImmutablePoint(ImmutableNode node)
public ImmutableFloatPoint(ImmutableNode node)
{
super(
node.getNumDims(),
Expand All @@ -55,7 +55,7 @@ public float[] getCoords()
}

@Override
public Iterable<ImmutableNode> getChildren()
public Iterable<ImmutableNode<float[]>> getChildren()
{
// should never get here
throw new UnsupportedOperationException();
Expand Down

0 comments on commit 20de7fd

Please sign in to comment.