Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

builtins: Implement ST_GeneratePoints function #58288

Merged
merged 1 commit into from Jan 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/generated/sql/functions.md
Expand Up @@ -1696,6 +1696,10 @@ Bottom Left.</p>
<p>Smaller densify_frac gives a more accurate Fréchet distance. However, the computation time and memory usage increases with the square of the number of subsegments.</p>
<p>This function utilizes the GEOS module.</p>
</span></td></tr>
<tr><td><a name="st_generatepoints"></a><code>st_generatepoints(geometry: geometry, npoints: int4) &rarr; geometry</code></td><td><span class="funcdesc"><p>Generates pseudo-random points until the requested number are found within the input area. Uses system time as a seed.</p>
</span></td></tr>
<tr><td><a name="st_generatepoints"></a><code>st_generatepoints(geometry: geometry, npoints: int4, seed: int4) &rarr; geometry</code></td><td><span class="funcdesc"><p>Generates pseudo-random points until the requested number are found within the input area.</p>
</span></td></tr>
<tr><td><a name="st_geogfromewkb"></a><code>st_geogfromewkb(val: <a href="bytes.html">bytes</a>) &rarr; geography</code></td><td><span class="funcdesc"><p>Returns the Geography from an EWKB representation.</p>
</span></td></tr>
<tr><td><a name="st_geogfromewkt"></a><code>st_geogfromewkt(val: <a href="string.html">string</a>) &rarr; geography</code></td><td><span class="funcdesc"><p>Returns the Geography from an EWKT representation.</p>
Expand Down
2 changes: 2 additions & 0 deletions pkg/geo/geomfn/BUILD.bazel
Expand Up @@ -15,6 +15,7 @@ go_library(
"envelope.go",
"flip_coordinates.go",
"force_layout.go",
"generate_points.go",
"geomfn.go",
"linear_reference.go",
"linestring.go",
Expand Down Expand Up @@ -66,6 +67,7 @@ go_test(
"envelope_test.go",
"flip_coordinates_test.go",
"force_layout_test.go",
"generate_points_test.go",
"geomfn_test.go",
"linear_reference_test.go",
"linestring_test.go",
Expand Down
219 changes: 219 additions & 0 deletions pkg/geo/geomfn/generate_points.go
@@ -0,0 +1,219 @@
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package geomfn

import (
"math"
"math/rand"

"github.com/cockroachdb/cockroach/pkg/geo"
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
"github.com/cockroachdb/cockroach/pkg/geo/geos"
"github.com/cockroachdb/errors"
"github.com/twpayne/go-geom"
)

// GenerateRandomPoints generates provided number of pseudo-random points for the input area.
func GenerateRandomPoints(g geo.Geometry, nPoints int, rng *rand.Rand) (geo.Geometry, error) {
if nPoints < 0 {
return geo.Geometry{}, nil
}
pointsAsGeometry, err := generateRandomPoints(g, nPoints, rng)
if err != nil {
return geo.Geometry{}, errors.Wrap(err, "generating random points error")
}
return pointsAsGeometry, nil
}

// generateRandomPoints returns a MultiPoint geometry consisting of randomly generated points
// that are covered by the geometry provided.
// nPoints is the number of points to return.
// rng is the random numbers generator.
func generateRandomPoints(g geo.Geometry, nPoints int, rng *rand.Rand) (geo.Geometry, error) {
var generateRandomPointsFunction func(g geo.Geometry, nPoints int, rng *rand.Rand) (*geom.MultiPoint, error)
switch g.ShapeType() {
case geopb.ShapeType_Polygon:
generateRandomPointsFunction = generateRandomPointsFromPolygon
case geopb.ShapeType_MultiPolygon:
generateRandomPointsFunction = generateRandomPointsFromMultiPolygon
default:
return geo.Geometry{}, errors.Newf("unsupported type: %v", g.ShapeType().String())
}
// This is to be checked once we know Geometry type is supported,
// so that we can keep consistency with PostGIS implementation.
if nPoints == 0 {
return geo.Geometry{}, nil
}
empty, err := IsEmpty(g)
if err != nil {
return geo.Geometry{}, errors.Wrap(err, "could not check if geometry is empty")
}
if empty {
return geo.Geometry{}, nil
}
mpt, err := generateRandomPointsFunction(g, nPoints, rng)
if err != nil {
return geo.Geometry{}, err
}
srid := g.SRID()
mpt.SetSRID(int(srid))
out, err := geo.MakeGeometryFromGeomT(mpt)
if err != nil {
return geo.Geometry{}, errors.Wrap(err, "could not transform geom.T into geometry")
}
return out, nil
}

// generateRandomPointsFromPolygon returns a given number of randomly generated points that are within the Polygon provided.
// In order to get more uniformed number of points, it's generating a grid, and then generating a random point within each grid cell.
// Read more: http://lin-ear-th-inking.blogspot.com/2010/05/more-random-points-in-jts.html.
// Rough description of the algorithm:
// 1. Generate the grid cells. The higher number of points to generate and bounding box area/polygon area ratio, the smaller cell size.
// 2. Shuffle the array of grid cells indexes. This way the order in which grid cells are chosen for point generation is random.
// 3. For each grid cell, generate a random point within it. If the point intersects with our geometry, add it to the results.
// 4. When there are enough points in the results, stop. Otherwise go to step 3.
func generateRandomPointsFromPolygon(
g geo.Geometry, nPoints int, rng *rand.Rand,
) (*geom.MultiPoint, error) {
area, err := Area(g)
if err != nil {
return nil, errors.Wrap(err, "could not calculate Polygon area")
}
if area == 0.0 {
return nil, errors.New("zero area input Polygon")
}
bbox := g.CartesianBoundingBox()
bboxWidth := bbox.HiX - bbox.LoX
bboxHeight := bbox.HiY - bbox.LoY
bboxArea := bboxHeight * bboxWidth
// Gross up our test set a bit to increase odds of getting coverage in one pass.
sampleNPoints := float64(nPoints) * bboxArea / area

sampleSqrt := math.Round(math.Sqrt(sampleNPoints))
if sampleSqrt == 0 {
sampleSqrt = 1
}
var sampleHeight, sampleWidth int
var sampleCellSize float64
// Calculate the grids we're going to randomize within.
otan marked this conversation as resolved.
Show resolved Hide resolved
if bboxWidth > bboxHeight {
sampleWidth = int(sampleSqrt)
sampleHeight = int(math.Ceil(sampleNPoints / float64(sampleWidth)))
sampleCellSize = bboxWidth / float64(sampleWidth)
} else {
sampleHeight = int(sampleSqrt)
sampleWidth = int(math.Ceil(sampleNPoints / float64(sampleHeight)))
sampleCellSize = bboxHeight / float64(sampleHeight)
}
// Prepare the polygon for fast true/false testing.
gPrep, err := geos.PrepareGeometry(g.EWKB())
if err != nil {
return nil, errors.Wrap(err, "could not prepare geometry")
}
res, err := func() (*geom.MultiPoint, error) {
// Generate a slice of points - for every cell on a grid store coordinates.
n := sampleHeight * sampleWidth
cells := make([]geom.Coord, n)
for i := 0; i < sampleWidth; i++ {
for j := 0; j < sampleHeight; j++ {
cells[i*sampleHeight+j] = geom.Coord{float64(i), float64(j)}
}
}
// Shuffle the points. Without shuffling, the generated point will
// always be adjacent to the previous one (in terms of grid cells).
if n > 1 {
rng.Shuffle(n, func(i int, j int) {
temp := cells[j]
cells[j] = cells[i]
cells[i] = temp
})
}
results := geom.NewMultiPoint(geom.XY)
// Generate points and test them.
for nPointsGenerated, iterations := 0, 0; nPointsGenerated < nPoints && iterations <= nPoints*10; iterations++ {
for _, cell := range cells {
y := bbox.LoY + cell.X()*sampleCellSize
x := bbox.LoX + cell.Y()*sampleCellSize
x += rng.Float64() * sampleCellSize
y += rng.Float64() * sampleCellSize
if x > bbox.HiX || y > bbox.HiY {
continue
}
gpt, err := geo.MakeGeometryFromPointCoords(x, y)
if err != nil {
return nil, errors.Wrap(err, "could not create geometry Point")
}
intersects, err := geos.PreparedIntersects(gPrep, gpt.EWKB())
if err != nil {
return nil, errors.Wrap(err, "could not check prepared intersection")
}
if intersects {
nPointsGenerated++
p := geom.NewPointFlat(geom.XY, []float64{x, y})
srid := g.SRID()
p.SetSRID(int(srid))
err = results.Push(p)
if err != nil {
return nil, errors.Wrap(err, "could not add point to the results")
}
if nPointsGenerated == nPoints {
return results, nil
}
}
}
}
return results, nil
}()
if err != nil {
destroyErr := geos.PreparedGeomDestroy(gPrep)
return nil, errors.CombineErrors(destroyErr, err)
}
return res, nil
}

func generateRandomPointsFromMultiPolygon(
g geo.Geometry, nPoints int, rng *rand.Rand,
) (*geom.MultiPoint, error) {
results := geom.NewMultiPoint(geom.XY)

area, err := Area(g)
if err != nil {
return nil, errors.Wrap(err, "could not calculate MultiPolygon area")
}

gt, err := g.AsGeomT()
if err != nil {
return nil, errors.Wrap(err, "could not transform MultiPolygon into geom.T")
}

gmp := gt.(*geom.MultiPolygon)
for i := 0; i < gmp.NumPolygons(); i++ {
poly := gmp.Polygon(i)
subarea := poly.Area()
subNPoints := int(math.Round(float64(nPoints) * subarea / area))
if subNPoints > 0 {
g, err := geo.MakeGeometryFromGeomT(poly)
if err != nil {
return nil, errors.Wrap(err, "could not transform geom.T into Geometry")
}
subMPT, err := generateRandomPointsFromPolygon(g, subNPoints, rng)
if err != nil {
return nil, errors.Wrap(err, "error generating points for Polygon")
}
for j := 0; j < subMPT.NumPoints(); j++ {
if err := results.Push(subMPT.Point(j)); err != nil {
return nil, errors.Wrap(err, "could not push point to the results")
}
}
}
}
return results, nil
}
128 changes: 128 additions & 0 deletions pkg/geo/geomfn/generate_points_test.go
@@ -0,0 +1,128 @@
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package geomfn

import (
"math/rand"
"testing"

"github.com/cockroachdb/cockroach/pkg/geo"
"github.com/stretchr/testify/require"
)

func TestGenerateRandomPoints(t *testing.T) {
type args struct {
g geo.Geometry
nPoints int
seed int64
}
testCases := []struct {
name string
args args
want geo.Geometry
}{
{
"number of points to generate less than minimum",
args{geo.MustParseGeometry("POLYGON((0 0, 1 0, 1 1.1, 0 0))"), -1, 1},
geo.Geometry{},
},
{
"supported geometry, zero points to generate",
args{geo.MustParseGeometry("POLYGON((0 0, 1 0, 1 1.1, 0 0))"), 0, 1},
geo.Geometry{},
},
{
"empty geometry",
args{geo.MustParseGeometry("POLYGON EMPTY"), 4, 1},
otan marked this conversation as resolved.
Show resolved Hide resolved
geo.Geometry{},
},
{
"Polygon - square",
args{geo.MustParseGeometry("POLYGON((1 1,1 2,2 2,2 1,1 1))"), 4, 2},
geo.MustParseGeometry("MULTIPOINT(1.55948870460174560115 1.30713530828859747501, 1.4638274793965568854 1.71250027479276534237, 1.6030771445518701146 1.60597601333107697918, 1.06344848690006488212 1.30988904426268293335)"),
},
{
"Polygon, width > height",
args{geo.MustParseGeometry("POLYGON((0 0,1 2,3 2,2 0,0 0))"), 7, 14},
geo.MustParseGeometry("MULTIPOINT(0.3830566789833752539 0.23117069178437729682, 2.51179632398947294547 1.69060434239713908156, 1.94222431495883451902 0.56512577777958861169, 1.87408814538545565043 1.21241169406013726828, 2.29499038937696608897 1.19218670122114289711, 1.93144882885377144888 0.79976266805657403314, 1.33675111888047548625 1.70583597131752906506)"),
},
{
"Polygon, width < height",
args{geo.MustParseGeometry("POLYGON((0 0,2 5,2.5 4,3 5,3 1,0 0))"), 5, 1996},
geo.MustParseGeometry("MULTIPOINT(0.69465779472271460548 0.92001319545446269554, 2.4417593921811042712 3.71642371685872197062, 2.79787890688424933927 3.8425013135166361522, 1.05776032659919683176 1.77173131482243051416, 1.79695770199420046254 2.42853164217679839965)"),
},
{
"Polygon with a hole",
args{geo.MustParseGeometry("POLYGON((-1 -1, -1 1, 0 2, 1 1, 1 -1, 0 -2, -1 -1),(-0.5 -0.5, -0.5 0.5, 0.5 0.5, 0.5 -0.5, 0 -0.5, -0.5 -0.5))"), 7, 42},
geo.MustParseGeometry("MULTIPOINT(0.62392209943416365725 1.14891034830739080519, 0.5066157824317096825 -1.39898866703452817717, -0.71853651145541486134 1.15646037366762399756, -0.03771709502060871522 -0.80984418343170694321, 0.56879738017966463559 -0.74897977355716416348, 0.05783156505961972726 1.37709385168907050279, 0.71334734167548885519 -0.61452515240937377605)"),
},
{
"MultiPolygon",
args{geo.MustParseGeometry("MULTIPOLYGON(((0 0,4 0,4 4,0 4,0 0),(1 1,2 1,2 2,1 2,1 1)), ((0 0,-4 0,-4 -4,0 -4,0 0),(1 1,2 1,2 2,1 2,1 1)))"), 9, 17},
geo.MustParseGeometry("MULTIPOINT(0.17846255116778333982 3.74754647968727550023, 2.27256794763602965048 0.21170437107020742551, 3.78118853421934808523 3.80677821706058949758, 1.04599577267150789517 3.86644467452649553962, 3.25038586104516369346 1.99712423764354585209, -1.69784827781519487289 -0.41663496633749641518, -3.13128096103860187327 -0.52028622879791064371, -2.49857072552626657824 -2.30333494646855019283, -0.37133983304899031985 -3.89307989068656556952, -0.31767322799652175647 -1.36504243564259120092)"),
},
{
"Polygon with a specified SRID",
args{geo.MustParseGeometry("SRID=4326;POLYGON((0 0,2 5,2.5 4,3 5,3 1,0 0))"), 5, 1996},
otan marked this conversation as resolved.
Show resolved Hide resolved
geo.MustParseGeometry("SRID=4326;MULTIPOINT(0.69465779472271460548 0.92001319545446269554, 2.4417593921811042712 3.71642371685872197062, 2.79787890688424933927 3.8425013135166361522, 1.05776032659919683176 1.77173131482243051416, 1.79695770199420046254 2.42853164217679839965)"),
},
{
"Requires too many iterations over grid",
args{geo.MustParseGeometry("POLYGON((0 0,0 100,0.00001 0.00000001,0.99999 0.00000001,1 100,1 0,0 0))"), 5, 1996},
geo.MustParseGeometry("MULTIPOINT(0.9999990842784447497849 37.4716712014091797300352, 0.0000035681765926535594 47.1117932985884309005087, 0.000004226396968502996 32.4473691709023412954593, 0.9999921320300672045178 13.3385008252821712915193)"),
},
{
"Invalid Polygon",
args{geo.MustParseGeometry("POLYGON((-1 -1,1 -1,0 0,0 1,0 -1,0 0,-1 0,-1 -1))"), 5, 1996},
geo.MustParseGeometry("MULTIPOINT(-0.26747218234566871864 -0.88507288494238345322, -0.87713357493233190532 -0.97615754673482446613, -0.44243286372996359912 -0.3727648333352959753, 0.43580610882637776937 -0.55479608815084269224, -0.55844006437980153734 -0.47716362944650048128)"),
},
}
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
rng := rand.New(rand.NewSource(tt.args.seed))
got, err := GenerateRandomPoints(tt.args.g, tt.args.nPoints, rng)
require.NoError(t, err)
require.Equal(t, tt.want, got)
})
}

t.Run("errors", func(t *testing.T) {
errorTestCases := []struct {
name string
args args
errMatch string
}{
{
"unsupported geometry, zero points to generate",
args{geo.MustParseGeometry("LINESTRING(50 50,150 150,150 50)"), 0, 1},
"unsupported type: LineString",
},
{
"unsupported geometry type: LineString",
args{geo.MustParseGeometry("LINESTRING(50 50,150 150,150 50)"), 4, 1},
"unsupported type: LineString",
},
{
"Polygon with zero area",
args{geo.MustParseGeometry("POLYGON((0 0, 1 1, 1 1, 0 0))"), 4, 1},
"zero area input Polygon",
},
}
for _, tt := range errorTestCases {
t.Run(tt.name, func(t *testing.T) {
rng := rand.New(rand.NewSource(tt.args.seed))
_, err := GenerateRandomPoints(tt.args.g, tt.args.nPoints, rng)
require.Error(t, err)
require.Contains(t, err.Error(), tt.errMatch)
})
}
})
}