Skip to content

Commit

Permalink
MB-54131: Geoshape query decode optimization (#14)
Browse files Browse the repository at this point in the history
* MB-54131: Geoshape query decode optimization

	- Added buffer pool pointer to polygon
	- Added parent polygon pointer to loop
	- Added decoder function to select, populate and return a pointer to the buffer from a pool of buffers
	- Updated loop decode function to handle chunks of data being read
	- Updated polygon decode function to pass on the buffer pool to the loops
	- Updated extractShapesFromBytes and FilterGeoShapesOnRelation to include the buffer pool in its parameters

* MB-54131: Geoshape query decode optimization

 - Changed buffer pool from pointer to slice
 - Removed randomly formatted comments

* MB-54131: Geoshape query decode

  + add .gitignore

* MB-54131: Geoshape query decode optimization

 - Added a buffer pool struct
 - Increased size of the maximum buffer to 24KB
 - Added better comments

* MB-54131: Geoshape query decode optimization

 - Minor formatting fixes
 - Reusing buffers instead of creating everytime

* MB-54131: Geoshape query decode optimization

 - Better comments
 - Removed some magic numbers

* MB-54131: Geoshape query decode optimization

 - Changed buffer pool implementation to give the smallest buffer that fits the data completely
 - Changed decoder implementation to allow buffer sizes bigger than number of bytes needed

* MB-54131: Geoshape query decode optimization

 - Renamed some constants

* MB-54131: Geoshape query decode optimization

 - Added benchmark for loop decode
  • Loading branch information
Likith101 committed Aug 30, 2023
1 parent 7135870 commit 045f1ed
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 10 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.DS_Store
10 changes: 5 additions & 5 deletions geojson/geojson_shapes_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ var jsoniter = jsoniterator.ConfigCompatibleWithStandardLibrary
// the `relation` filter and confirms whether the shape in the document
// satisfies the given relation.
func FilterGeoShapesOnRelation(shape index.GeoJSON, targetShapeBytes []byte,
relation string, reader **bytes.Reader) (bool, error) {
relation string, reader **bytes.Reader, bufPool *s2.GeoBufferPool) (bool, error) {

shapeInDoc, err := extractShapesFromBytes(targetShapeBytes, reader)
shapeInDoc, err := extractShapesFromBytes(targetShapeBytes, reader, bufPool)
if err != nil {
return false, err
}
Expand All @@ -43,7 +43,7 @@ func FilterGeoShapesOnRelation(shape index.GeoJSON, targetShapeBytes []byte,

// extractShapesFromBytes unmarshal the bytes to retrieve the
// embedded geojson shape.
func extractShapesFromBytes(targetShapeBytes []byte, r **bytes.Reader) (
func extractShapesFromBytes(targetShapeBytes []byte, r **bytes.Reader, bufPool *s2.GeoBufferPool) (
index.GeoJSON, error) {
if (*r) == nil {
*r = bytes.NewReader(targetShapeBytes[1:])
Expand Down Expand Up @@ -109,7 +109,7 @@ func extractShapesFromBytes(targetShapeBytes []byte, r **bytes.Reader) (
return mls, nil

case PolygonTypePrefix:
pgn := &Polygon{s2pgn: &s2.Polygon{}}
pgn := &Polygon{s2pgn: &s2.Polygon{BufPool: bufPool}}
err := pgn.s2pgn.Decode(*r)
if err != nil {
return nil, err
Expand Down Expand Up @@ -156,7 +156,7 @@ func extractShapesFromBytes(targetShapeBytes []byte, r **bytes.Reader) (
gc := &GeometryCollection{Shapes: make([]index.GeoJSON, numShapes)}

for i := int32(0); i < numShapes; i++ {
shape, err := extractShapesFromBytes(inputBytes[:lengths[i]], r)
shape, err := extractShapesFromBytes(inputBytes[:lengths[i]], r, nil)
if err != nil {
return nil, err
}
Expand Down
64 changes: 64 additions & 0 deletions s2/buffer_pool.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package s2

// GeoBufferPool represents a pool of buffers ranging from a given
// max size to a min size in steps of 2. It uses a lazy approach only allocating
// the buffers when it is needed.

type GeoBufferPool struct {
buffers [][]byte
maxSize int
minSize int
}

func NewGeoBufferPool(maxSize int, minSize int) *GeoBufferPool {
// Calculating the number of buffers required. Assuming that
// the value of minSize is correct, the buffers will be of size
// minSize, 2 * minSize, 4 * minSize and so on till it is less
// than or equal to the maxSize. If it is not equal to maxSize,
// then a suitable value less than maxSize will be set as maxSize
length := 0
temp := minSize
for temp <= maxSize {
length = length + 1
temp = temp * 2
}
maxSize = temp / 2

return &GeoBufferPool{
buffers: make([][]byte, length),
maxSize: maxSize,
minSize: minSize,
}
}

func (b *GeoBufferPool) Get(size int) ([]byte) {
bufSize := b.minSize

for i := range b.buffers {
if size <= bufSize || i == len(b.buffers) - 1{
if b.buffers[i] == nil {
b.buffers[i] = make([]byte, bufSize)
}

return b.buffers[i]
} else {
bufSize = bufSize * 2
}
}

return nil
}
14 changes: 14 additions & 0 deletions s2/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,17 @@ func (d *decoder) readUvarint() (x uint64) {
x, d.err = binary.ReadUvarint(d.r)
return
}

func (d *decoder) readFloat64Array(size int, buf []byte) int {
if d.err != nil || buf == nil {
return 0
}

if size >= len(buf) {
_, d.err = io.ReadFull(d.r, buf)
return len(buf)
} else {
_, d.err = io.ReadFull(d.r, buf[0:size])
return size
}
}
38 changes: 34 additions & 4 deletions s2/loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package s2

import (
"encoding/binary"
"fmt"
"io"
"math"
Expand All @@ -24,6 +25,9 @@ import (
"github.com/golang/geo/s1"
)

const SizeOfFloat = 8
const SizeOfVertex = 3 * SizeOfFloat

// Loop represents a simple spherical polygon. It consists of a sequence
// of vertices where the first vertex is implicitly connected to the
// last. All loops are defined to have a CCW orientation, i.e. the interior of
Expand Down Expand Up @@ -66,6 +70,9 @@ type Loop struct {

// index is the spatial index for this Loop.
index *ShapeIndex

// A buffer pool to be used while decoding the polygon
BufPool *GeoBufferPool
}

// LoopFromPoints constructs a loop from the given points.
Expand Down Expand Up @@ -1287,11 +1294,34 @@ func (l *Loop) decode(d *decoder) {
return
}
l.vertices = make([]Point, nvertices)
for i := range l.vertices {
l.vertices[i].X = d.readFloat64()
l.vertices[i].Y = d.readFloat64()
l.vertices[i].Z = d.readFloat64()

// Each vertex requires 24 bytes of storage
numBytesNeeded := int(nvertices) * SizeOfVertex

i := 0

for numBytesNeeded > 0 {
arr := l.BufPool.Get(numBytesNeeded)
numBytesRead := d.readFloat64Array(numBytesNeeded, arr)

if numBytesRead == 0 {
break
}

numBytesNeeded = numBytesNeeded - numBytesRead

// Parsing one vertex at a time into the vertex array of the loop
// by going through the buffer in steps of SizeOfVertex and converting
// floatSize worth of bytes into the float values
for j := 0; j < int(numBytesRead/SizeOfVertex); j++ {
l.vertices[i+j].X = math.Float64frombits(binary.LittleEndian.Uint64(arr[SizeOfFloat*(j*3) : SizeOfFloat*(j*3+1)]))
l.vertices[i+j].Y = math.Float64frombits(binary.LittleEndian.Uint64(arr[SizeOfFloat*(j*3+1) : SizeOfFloat*(j*3+2)]))
l.vertices[i+j].Z = math.Float64frombits(binary.LittleEndian.Uint64(arr[SizeOfFloat*(j*3+2) : SizeOfFloat*(j*3+3)]))
}

i = i + int(numBytesRead/SizeOfVertex)
}

l.index = NewShapeIndex()
l.originInside = d.readBool()
l.depth = int(d.readUint32())
Expand Down
59 changes: 59 additions & 0 deletions s2/loop_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package s2
import (
"fmt"
"math"
"os"
"testing"

"github.com/golang/geo/r1"
Expand Down Expand Up @@ -1817,3 +1818,61 @@ func BenchmarkLoopContainsPoint(b *testing.B) {
vertices *= 2
}
}

func BenchmarkLoopDecode(b *testing.B) {

points := make([][]float64, 0)

points = append(points, []float64{10, 10})
for i := 1; i < 2000; i++ {
points = append(points, []float64{10 - 0.01*float64(i), 10})
}
points = append(points, []float64{-10, 10})
for i := 1; i < 2000; i++ {
points = append(points, []float64{-10, 10 - 0.01*float64(i)})
}
points = append(points, []float64{-10, -10})
for i := 1; i < 2000; i++ {
points = append(points, []float64{-10 + 0.01*float64(i), -10})
}
points = append(points, []float64{10, -10})
for i := 1; i < 2000; i++ {
points = append(points, []float64{10, -10 + 0.01*float64(i)})
}
points = append(points, []float64{10, 10})

pointString := ""

for i := 0; i < len(points); i++ {

if i == 0 {
pointString = pointString + fmt.Sprintf("%f:%f", points[i][0], points[i][1])
} else {
pointString = pointString + fmt.Sprintf(", %f:%f", points[i][0], points[i][1])
}
}

loop := LoopFromPoints(parsePoints(pointString))

f, err := os.OpenFile("testLoop.txt", os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0600)
if err != nil {
b.Fatalf("%v", err)
}
loop.Encode(f)
f.Close()

bufPool := NewGeoBufferPool(24 * 1024, 24)
b.ResetTimer()
for i := 0; i < b.N; i++ {
f, err := os.Open("testLoop.txt")
if err != nil {
b.Fatalf("%v", err)
}
l := &Loop{
BufPool: bufPool,
}
l.decode(&decoder{r: asByteReader(f)})
f.Close()
}
os.Remove("testLoop.txt")
}
6 changes: 5 additions & 1 deletion s2/polygon.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ type Polygon struct {
// preceding loops in the polygon. This field is used for polygons that
// have a large number of loops, and may be empty for polygons with few loops.
cumulativeEdges []int

// A buffer pool to be used while decoding the polygon
BufPool *GeoBufferPool
}

// PolygonFromLoops constructs a polygon from the given set of loops. The polygon
Expand Down Expand Up @@ -1133,7 +1136,7 @@ func (p *Polygon) Decode(r io.Reader) error {
const maxEncodedLoops = 10000000

func (p *Polygon) decode(d *decoder) {
*p = Polygon{}
*p = Polygon{BufPool: p.BufPool}
d.readUint8() // Ignore irrelevant serialized owns_loops_ value.

p.hasHoles = d.readBool()
Expand All @@ -1151,6 +1154,7 @@ func (p *Polygon) decode(d *decoder) {
p.loops = make([]*Loop, nloops)
for i := range p.loops {
p.loops[i] = new(Loop)
p.loops[i].BufPool = p.BufPool
p.loops[i].decode(d)
p.numVertices += len(p.loops[i].vertices)
}
Expand Down

0 comments on commit 045f1ed

Please sign in to comment.