Skip to content

Commit

Permalink
Added change to add geopandas data frame to Datahub Space
Browse files Browse the repository at this point in the history
Signed-off-by: Omkar Mestry <omkar.mestry@here.com>
  • Loading branch information
omanges committed Sep 4, 2020
1 parent 348da83 commit f55a748
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 19 deletions.
1 change: 1 addition & 0 deletions changes/features/pr.71.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added feature to upload data from `Geopandas Dataframe` file to the space.
10 changes: 10 additions & 0 deletions tests/space/test_space_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pathlib import Path
from time import sleep

import geopandas as gpd
import pytest
from geojson import GeoJSON

Expand Down Expand Up @@ -690,3 +691,12 @@ def test_add_features_duplicate(empty_space):
empty_space.add_features(geojson, features_size=100)
stats = empty_space.get_statistics()
assert stats["count"]["value"] == 180


@pytest.mark.skipif(not XYZ_TOKEN, reason="No token found.")
def test_add_features_geopandas(empty_space):
geojson_file = Path(__file__).parents[1] / "data" / "countries.geo.json"
df = gpd.read_file(geojson_file)
empty_space.add_features_geopandas(data=df)
stats = empty_space.get_statistics()
assert stats["count"]["value"] == 292
46 changes: 27 additions & 19 deletions xyzspaces/spaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from geojson import Feature, GeoJSON

from .apis import HubApi
from .utils import divide_bbox, grouper, wkt_to_geojson
from .utils import divide_bbox, flatten_geometry, grouper, wkt_to_geojson

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -1179,24 +1179,7 @@ def add_features_kml(

gdf = gpd.read_file(path, driver="KML")

geometry = gdf.geometry
flattened_geometry = []

flattened_gdf = gpd.GeoDataFrame()

for geom in geometry:
if geom.type in [
"GeometryCollection",
"MultiPoint",
"MultiLineString",
"MultiPolygon",
]:
for subgeom in geom:
flattened_geometry.append(subgeom)
else:
flattened_geometry.append(geom)

flattened_gdf.geometry = flattened_geometry
flattened_gdf = flatten_geometry(gdf)

with tempfile.NamedTemporaryFile() as temp:
flattened_gdf.to_file(temp.name, driver="GeoJSON")
Expand Down Expand Up @@ -1228,3 +1211,28 @@ def add_features_geobuf(
features_size=features_size,
chunk_size=chunk_size,
)

def add_features_geopandas(
self,
data: gpd.GeoDataFrame,
features_size: int = 2000,
chunk_size: int = 1,
):
"""
Add features from GeoPandas dataframe to a space.
:param data: GeoPandas dataframe to be uploaded
:param features_size: The number of features to upload at
a time.
:param chunk_size: Number of chunks for each process to handle. The default value
is 1, for a large number of features please use `chunk_size` greater than 1.
"""
flattened_gdf = flatten_geometry(data)

with tempfile.NamedTemporaryFile() as temp:
flattened_gdf.to_file(temp.name, driver="GeoJSON")
self.add_features_geojson(
path=temp.name,
features_size=features_size,
chunk_size=chunk_size,
)
33 changes: 33 additions & 0 deletions xyzspaces/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from itertools import zip_longest
from typing import List, Optional

import geopandas as gpd
from geojson import Feature, FeatureCollection, Point, Polygon
from shapely import geometry, wkt
from turfpy.measurement import bbox, bbox_polygon, distance, length
Expand Down Expand Up @@ -234,3 +235,35 @@ def divide_bbox(
except Exception:
logger.debug("The intersection geometry is incorrect")
return final


def flatten_geometry(data: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
"""
Flatten the geometries in the given GeoPandas dataframe.
Flatten geometry is formed by extracting individual geometries from
GeometryCollection, MultiPoint, MultiLineString, MultiPolygon.
:param data: GeoPandas dataframe to be flatten
:return: Flat GeoPandas dataframe
"""
geometry = data.geometry

flattened_geometry = []

flattened_gdf = gpd.GeoDataFrame()

for geom in geometry:
if geom.type in [
"GeometryCollection",
"MultiPoint",
"MultiLineString",
"MultiPolygon",
]:
for subgeom in geom:
flattened_geometry.append(subgeom)
else:
flattened_geometry.append(geom)

flattened_gdf.geometry = flattened_geometry

return flattened_gdf

0 comments on commit f55a748

Please sign in to comment.