From 1af7a03f6f924cca5b285ac2294acdbf563d394f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Knycha=C5=82a?=
Smaller densify_frac gives a more accurate Fréchet distance. However, the computation time and memory usage increases with the square of the number of subsegments.
This function utilizes the GEOS module.
+st_generatepoints(geometry: geometry, npoints: int4) → geometry
Generates pseudo-random points until the requested number are found within the input area. Uses system time as a seed.
+st_generatepoints(geometry: geometry, npoints: int4, seed: int4) → geometry
Generates pseudo-random points until the requested number are found within the input area.
+st_geogfromewkb(val: bytes) → geography
Returns the Geography from an EWKB representation.
st_geogfromewkt(val: string) → geography
Returns the Geography from an EWKT representation.
diff --git a/pkg/geo/geomfn/BUILD.bazel b/pkg/geo/geomfn/BUILD.bazel index ac586f324f75..caf06e8c9333 100644 --- a/pkg/geo/geomfn/BUILD.bazel +++ b/pkg/geo/geomfn/BUILD.bazel @@ -15,6 +15,7 @@ go_library( "envelope.go", "flip_coordinates.go", "force_layout.go", + "generate_points.go", "geomfn.go", "linear_reference.go", "linestring.go", @@ -66,6 +67,7 @@ go_test( "envelope_test.go", "flip_coordinates_test.go", "force_layout_test.go", + "generate_points_test.go", "geomfn_test.go", "linear_reference_test.go", "linestring_test.go", diff --git a/pkg/geo/geomfn/generate_points.go b/pkg/geo/geomfn/generate_points.go new file mode 100644 index 000000000000..91cafcf83239 --- /dev/null +++ b/pkg/geo/geomfn/generate_points.go @@ -0,0 +1,219 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package geomfn + +import ( + "math" + "math/rand" + + "github.com/cockroachdb/cockroach/pkg/geo" + "github.com/cockroachdb/cockroach/pkg/geo/geopb" + "github.com/cockroachdb/cockroach/pkg/geo/geos" + "github.com/cockroachdb/errors" + "github.com/twpayne/go-geom" +) + +// GenerateRandomPoints generates provided number of pseudo-random points for the input area. +func GenerateRandomPoints(g geo.Geometry, nPoints int, rng *rand.Rand) (geo.Geometry, error) { + if nPoints < 0 { + return geo.Geometry{}, nil + } + pointsAsGeometry, err := generateRandomPoints(g, nPoints, rng) + if err != nil { + return geo.Geometry{}, errors.Wrap(err, "generating random points error") + } + return pointsAsGeometry, nil +} + +// generateRandomPoints returns a MultiPoint geometry consisting of randomly generated points +// that are covered by the geometry provided. +// nPoints is the number of points to return. +// rng is the random numbers generator. +func generateRandomPoints(g geo.Geometry, nPoints int, rng *rand.Rand) (geo.Geometry, error) { + var generateRandomPointsFunction func(g geo.Geometry, nPoints int, rng *rand.Rand) (*geom.MultiPoint, error) + switch g.ShapeType() { + case geopb.ShapeType_Polygon: + generateRandomPointsFunction = generateRandomPointsFromPolygon + case geopb.ShapeType_MultiPolygon: + generateRandomPointsFunction = generateRandomPointsFromMultiPolygon + default: + return geo.Geometry{}, errors.Newf("unsupported type: %v", g.ShapeType().String()) + } + // This is to be checked once we know Geometry type is supported, + // so that we can keep consistency with PostGIS implementation. + if nPoints == 0 { + return geo.Geometry{}, nil + } + empty, err := IsEmpty(g) + if err != nil { + return geo.Geometry{}, errors.Wrap(err, "could not check if geometry is empty") + } + if empty { + return geo.Geometry{}, nil + } + mpt, err := generateRandomPointsFunction(g, nPoints, rng) + if err != nil { + return geo.Geometry{}, err + } + srid := g.SRID() + mpt.SetSRID(int(srid)) + out, err := geo.MakeGeometryFromGeomT(mpt) + if err != nil { + return geo.Geometry{}, errors.Wrap(err, "could not transform geom.T into geometry") + } + return out, nil +} + +// generateRandomPointsFromPolygon returns a given number of randomly generated points that are within the Polygon provided. +// In order to get more uniformed number of points, it's generating a grid, and then generating a random point within each grid cell. +// Read more: http://lin-ear-th-inking.blogspot.com/2010/05/more-random-points-in-jts.html. +// Rough description of the algorithm: +// 1. Generate the grid cells. The higher number of points to generate and bounding box area/polygon area ratio, the smaller cell size. +// 2. Shuffle the array of grid cells indexes. This way the order in which grid cells are chosen for point generation is random. +// 3. For each grid cell, generate a random point within it. If the point intersects with our geometry, add it to the results. +// 4. When there are enough points in the results, stop. Otherwise go to step 3. +func generateRandomPointsFromPolygon( + g geo.Geometry, nPoints int, rng *rand.Rand, +) (*geom.MultiPoint, error) { + area, err := Area(g) + if err != nil { + return nil, errors.Wrap(err, "could not calculate Polygon area") + } + if area == 0.0 { + return nil, errors.New("zero area input Polygon") + } + bbox := g.CartesianBoundingBox() + bboxWidth := bbox.HiX - bbox.LoX + bboxHeight := bbox.HiY - bbox.LoY + bboxArea := bboxHeight * bboxWidth + // Gross up our test set a bit to increase odds of getting coverage in one pass. + sampleNPoints := float64(nPoints) * bboxArea / area + + sampleSqrt := math.Round(math.Sqrt(sampleNPoints)) + if sampleSqrt == 0 { + sampleSqrt = 1 + } + var sampleHeight, sampleWidth int + var sampleCellSize float64 + // Calculate the grids we're going to randomize within. + if bboxWidth > bboxHeight { + sampleWidth = int(sampleSqrt) + sampleHeight = int(math.Ceil(sampleNPoints / float64(sampleWidth))) + sampleCellSize = bboxWidth / float64(sampleWidth) + } else { + sampleHeight = int(sampleSqrt) + sampleWidth = int(math.Ceil(sampleNPoints / float64(sampleHeight))) + sampleCellSize = bboxHeight / float64(sampleHeight) + } + // Prepare the polygon for fast true/false testing. + gPrep, err := geos.PrepareGeometry(g.EWKB()) + if err != nil { + return nil, errors.Wrap(err, "could not prepare geometry") + } + res, err := func() (*geom.MultiPoint, error) { + // Generate a slice of points - for every cell on a grid store coordinates. + n := sampleHeight * sampleWidth + cells := make([]geom.Coord, n) + for i := 0; i < sampleWidth; i++ { + for j := 0; j < sampleHeight; j++ { + cells[i*sampleHeight+j] = geom.Coord{float64(i), float64(j)} + } + } + // Shuffle the points. Without shuffling, the generated point will + // always be adjacent to the previous one (in terms of grid cells). + if n > 1 { + rng.Shuffle(n, func(i int, j int) { + temp := cells[j] + cells[j] = cells[i] + cells[i] = temp + }) + } + results := geom.NewMultiPoint(geom.XY) + // Generate points and test them. + for nPointsGenerated, iterations := 0, 0; nPointsGenerated < nPoints && iterations <= nPoints*10; iterations++ { + for _, cell := range cells { + y := bbox.LoY + cell.X()*sampleCellSize + x := bbox.LoX + cell.Y()*sampleCellSize + x += rng.Float64() * sampleCellSize + y += rng.Float64() * sampleCellSize + if x > bbox.HiX || y > bbox.HiY { + continue + } + gpt, err := geo.MakeGeometryFromPointCoords(x, y) + if err != nil { + return nil, errors.Wrap(err, "could not create geometry Point") + } + intersects, err := geos.PreparedIntersects(gPrep, gpt.EWKB()) + if err != nil { + return nil, errors.Wrap(err, "could not check prepared intersection") + } + if intersects { + nPointsGenerated++ + p := geom.NewPointFlat(geom.XY, []float64{x, y}) + srid := g.SRID() + p.SetSRID(int(srid)) + err = results.Push(p) + if err != nil { + return nil, errors.Wrap(err, "could not add point to the results") + } + if nPointsGenerated == nPoints { + return results, nil + } + } + } + } + return results, nil + }() + if err != nil { + destroyErr := geos.PreparedGeomDestroy(gPrep) + return nil, errors.CombineErrors(destroyErr, err) + } + return res, nil +} + +func generateRandomPointsFromMultiPolygon( + g geo.Geometry, nPoints int, rng *rand.Rand, +) (*geom.MultiPoint, error) { + results := geom.NewMultiPoint(geom.XY) + + area, err := Area(g) + if err != nil { + return nil, errors.Wrap(err, "could not calculate MultiPolygon area") + } + + gt, err := g.AsGeomT() + if err != nil { + return nil, errors.Wrap(err, "could not transform MultiPolygon into geom.T") + } + + gmp := gt.(*geom.MultiPolygon) + for i := 0; i < gmp.NumPolygons(); i++ { + poly := gmp.Polygon(i) + subarea := poly.Area() + subNPoints := int(math.Round(float64(nPoints) * subarea / area)) + if subNPoints > 0 { + g, err := geo.MakeGeometryFromGeomT(poly) + if err != nil { + return nil, errors.Wrap(err, "could not transform geom.T into Geometry") + } + subMPT, err := generateRandomPointsFromPolygon(g, subNPoints, rng) + if err != nil { + return nil, errors.Wrap(err, "error generating points for Polygon") + } + for j := 0; j < subMPT.NumPoints(); j++ { + if err := results.Push(subMPT.Point(j)); err != nil { + return nil, errors.Wrap(err, "could not push point to the results") + } + } + } + } + return results, nil +} diff --git a/pkg/geo/geomfn/generate_points_test.go b/pkg/geo/geomfn/generate_points_test.go new file mode 100644 index 000000000000..6b1b93b2dd39 --- /dev/null +++ b/pkg/geo/geomfn/generate_points_test.go @@ -0,0 +1,128 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package geomfn + +import ( + "math/rand" + "testing" + + "github.com/cockroachdb/cockroach/pkg/geo" + "github.com/stretchr/testify/require" +) + +func TestGenerateRandomPoints(t *testing.T) { + type args struct { + g geo.Geometry + nPoints int + seed int64 + } + testCases := []struct { + name string + args args + want geo.Geometry + }{ + { + "number of points to generate less than minimum", + args{geo.MustParseGeometry("POLYGON((0 0, 1 0, 1 1.1, 0 0))"), -1, 1}, + geo.Geometry{}, + }, + { + "supported geometry, zero points to generate", + args{geo.MustParseGeometry("POLYGON((0 0, 1 0, 1 1.1, 0 0))"), 0, 1}, + geo.Geometry{}, + }, + { + "empty geometry", + args{geo.MustParseGeometry("POLYGON EMPTY"), 4, 1}, + geo.Geometry{}, + }, + { + "Polygon - square", + args{geo.MustParseGeometry("POLYGON((1 1,1 2,2 2,2 1,1 1))"), 4, 2}, + geo.MustParseGeometry("MULTIPOINT(1.55948870460174560115 1.30713530828859747501, 1.4638274793965568854 1.71250027479276534237, 1.6030771445518701146 1.60597601333107697918, 1.06344848690006488212 1.30988904426268293335)"), + }, + { + "Polygon, width > height", + args{geo.MustParseGeometry("POLYGON((0 0,1 2,3 2,2 0,0 0))"), 7, 14}, + geo.MustParseGeometry("MULTIPOINT(0.3830566789833752539 0.23117069178437729682, 2.51179632398947294547 1.69060434239713908156, 1.94222431495883451902 0.56512577777958861169, 1.87408814538545565043 1.21241169406013726828, 2.29499038937696608897 1.19218670122114289711, 1.93144882885377144888 0.79976266805657403314, 1.33675111888047548625 1.70583597131752906506)"), + }, + { + "Polygon, width < height", + args{geo.MustParseGeometry("POLYGON((0 0,2 5,2.5 4,3 5,3 1,0 0))"), 5, 1996}, + geo.MustParseGeometry("MULTIPOINT(0.69465779472271460548 0.92001319545446269554, 2.4417593921811042712 3.71642371685872197062, 2.79787890688424933927 3.8425013135166361522, 1.05776032659919683176 1.77173131482243051416, 1.79695770199420046254 2.42853164217679839965)"), + }, + { + "Polygon with a hole", + args{geo.MustParseGeometry("POLYGON((-1 -1, -1 1, 0 2, 1 1, 1 -1, 0 -2, -1 -1),(-0.5 -0.5, -0.5 0.5, 0.5 0.5, 0.5 -0.5, 0 -0.5, -0.5 -0.5))"), 7, 42}, + geo.MustParseGeometry("MULTIPOINT(0.62392209943416365725 1.14891034830739080519, 0.5066157824317096825 -1.39898866703452817717, -0.71853651145541486134 1.15646037366762399756, -0.03771709502060871522 -0.80984418343170694321, 0.56879738017966463559 -0.74897977355716416348, 0.05783156505961972726 1.37709385168907050279, 0.71334734167548885519 -0.61452515240937377605)"), + }, + { + "MultiPolygon", + args{geo.MustParseGeometry("MULTIPOLYGON(((0 0,4 0,4 4,0 4,0 0),(1 1,2 1,2 2,1 2,1 1)), ((0 0,-4 0,-4 -4,0 -4,0 0),(1 1,2 1,2 2,1 2,1 1)))"), 9, 17}, + geo.MustParseGeometry("MULTIPOINT(0.17846255116778333982 3.74754647968727550023, 2.27256794763602965048 0.21170437107020742551, 3.78118853421934808523 3.80677821706058949758, 1.04599577267150789517 3.86644467452649553962, 3.25038586104516369346 1.99712423764354585209, -1.69784827781519487289 -0.41663496633749641518, -3.13128096103860187327 -0.52028622879791064371, -2.49857072552626657824 -2.30333494646855019283, -0.37133983304899031985 -3.89307989068656556952, -0.31767322799652175647 -1.36504243564259120092)"), + }, + { + "Polygon with a specified SRID", + args{geo.MustParseGeometry("SRID=4326;POLYGON((0 0,2 5,2.5 4,3 5,3 1,0 0))"), 5, 1996}, + geo.MustParseGeometry("SRID=4326;MULTIPOINT(0.69465779472271460548 0.92001319545446269554, 2.4417593921811042712 3.71642371685872197062, 2.79787890688424933927 3.8425013135166361522, 1.05776032659919683176 1.77173131482243051416, 1.79695770199420046254 2.42853164217679839965)"), + }, + { + "Requires too many iterations over grid", + args{geo.MustParseGeometry("POLYGON((0 0,0 100,0.00001 0.00000001,0.99999 0.00000001,1 100,1 0,0 0))"), 5, 1996}, + geo.MustParseGeometry("MULTIPOINT(0.9999990842784447497849 37.4716712014091797300352, 0.0000035681765926535594 47.1117932985884309005087, 0.000004226396968502996 32.4473691709023412954593, 0.9999921320300672045178 13.3385008252821712915193)"), + }, + { + "Invalid Polygon", + args{geo.MustParseGeometry("POLYGON((-1 -1,1 -1,0 0,0 1,0 -1,0 0,-1 0,-1 -1))"), 5, 1996}, + geo.MustParseGeometry("MULTIPOINT(-0.26747218234566871864 -0.88507288494238345322, -0.87713357493233190532 -0.97615754673482446613, -0.44243286372996359912 -0.3727648333352959753, 0.43580610882637776937 -0.55479608815084269224, -0.55844006437980153734 -0.47716362944650048128)"), + }, + } + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + rng := rand.New(rand.NewSource(tt.args.seed)) + got, err := GenerateRandomPoints(tt.args.g, tt.args.nPoints, rng) + require.NoError(t, err) + require.Equal(t, tt.want, got) + }) + } + + t.Run("errors", func(t *testing.T) { + errorTestCases := []struct { + name string + args args + errMatch string + }{ + { + "unsupported geometry, zero points to generate", + args{geo.MustParseGeometry("LINESTRING(50 50,150 150,150 50)"), 0, 1}, + "unsupported type: LineString", + }, + { + "unsupported geometry type: LineString", + args{geo.MustParseGeometry("LINESTRING(50 50,150 150,150 50)"), 4, 1}, + "unsupported type: LineString", + }, + { + "Polygon with zero area", + args{geo.MustParseGeometry("POLYGON((0 0, 1 1, 1 1, 0 0))"), 4, 1}, + "zero area input Polygon", + }, + } + for _, tt := range errorTestCases { + t.Run(tt.name, func(t *testing.T) { + rng := rand.New(rand.NewSource(tt.args.seed)) + _, err := GenerateRandomPoints(tt.args.g, tt.args.nPoints, rng) + require.Error(t, err) + require.Contains(t, err.Error(), tt.errMatch) + }) + } + }) +} diff --git a/pkg/geo/geos/geos.cc b/pkg/geo/geos/geos.cc index d60083f7c1b9..b62f86dd4370 100644 --- a/pkg/geo/geos/geos.cc +++ b/pkg/geo/geos/geos.cc @@ -129,6 +129,11 @@ typedef int (*CR_GEOS_HausdorffDistance_r)(CR_GEOS_Handle, CR_GEOS_Geometry, CR_ typedef int (*CR_GEOS_HausdorffDistanceDensify_r)(CR_GEOS_Handle, CR_GEOS_Geometry, CR_GEOS_Geometry, double, double*); +typedef CR_GEOS_PreparedInternalGeometry (*CR_GEOS_Prepare_r)(CR_GEOS_Handle, CR_GEOS_Geometry); +typedef void (*CR_GEOS_PreparedGeom_destroy_r)(CR_GEOS_Handle, CR_GEOS_PreparedInternalGeometry); + +typedef char (*CR_GEOS_PreparedIntersects_r)(CR_GEOS_Handle, CR_GEOS_PreparedInternalGeometry, CR_GEOS_Geometry); + typedef char (*CR_GEOS_Covers_r)(CR_GEOS_Handle, CR_GEOS_Geometry, CR_GEOS_Geometry); typedef char (*CR_GEOS_CoveredBy_r)(CR_GEOS_Handle, CR_GEOS_Geometry, CR_GEOS_Geometry); typedef char (*CR_GEOS_Contains_r)(CR_GEOS_Handle, CR_GEOS_Geometry, CR_GEOS_Geometry); @@ -242,6 +247,11 @@ struct CR_GEOS { CR_GEOS_HausdorffDistance_r GEOSHausdorffDistance_r; CR_GEOS_HausdorffDistanceDensify_r GEOSHausdorffDistanceDensify_r; + CR_GEOS_Prepare_r GEOSPrepare_r; + CR_GEOS_PreparedGeom_destroy_r GEOSPreparedGeom_destroy_r; + + CR_GEOS_PreparedIntersects_r GEOSPreparedIntersects_r; + CR_GEOS_Covers_r GEOSCovers_r; CR_GEOS_CoveredBy_r GEOSCoveredBy_r; CR_GEOS_Contains_r GEOSContains_r; @@ -341,6 +351,9 @@ struct CR_GEOS { INIT(GEOSFrechetDistanceDensify_r); INIT(GEOSHausdorffDistance_r); INIT(GEOSHausdorffDistanceDensify_r); + INIT(GEOSPrepare_r); + INIT(GEOSPreparedGeom_destroy_r); + INIT(GEOSPreparedIntersects_r); INIT(GEOSCovers_r); INIT(GEOSCoveredBy_r); INIT(GEOSContains_r); @@ -1160,6 +1173,70 @@ CR_GEOS_Status CR_GEOS_HausdorffDistanceDensify(CR_GEOS* lib, CR_GEOS_Slice a, C return toGEOSString(error.data(), error.length()); } +// +// PreparedGeometry +// + +CR_GEOS_Status CR_GEOS_Prepare(CR_GEOS* lib, CR_GEOS_Slice a, CR_GEOS_PreparedGeometry** ret) { + std::string error; + auto handle = initHandleWithErrorBuffer(lib, &error); + + auto wkbReader = lib->GEOSWKBReader_create_r(handle); + auto geom = lib->GEOSWKBReader_read_r(handle, wkbReader, a.data, a.len); + lib->GEOSWKBReader_destroy_r(handle, wkbReader); + if (geom != nullptr) { + auto preparedGeom = lib->GEOSPrepare_r(handle, geom); + auto tmp = new CR_GEOS_PreparedGeometry(); + tmp->g = geom; + tmp->p = preparedGeom; + *ret = tmp; + } + lib->GEOS_finish_r(handle); + return toGEOSString(error.data(), error.length()); +} + +CR_GEOS_Status CR_GEOS_PreparedGeometryDestroy(CR_GEOS* lib, CR_GEOS_PreparedGeometry* a) { + std::string error; + auto handle = initHandleWithErrorBuffer(lib, &error); + lib->GEOSPreparedGeom_destroy_r(handle, a->p); + lib->GEOSGeom_destroy_r(handle, a->g); + lib->GEOS_finish_r(handle); + delete a; + return toGEOSString(error.data(), error.length()); +} + +template