From 86d41dc0cd80ddd19677ab1edd65f34bba090ae2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 26 Jan 2024 10:30:54 +0100 Subject: [PATCH 1/4] REF: factor out dataset and layer creation for ogr_write --- pyogrio/_io.pyx | 144 ++++++++++++++++++++++++++++-------------------- 1 file changed, 83 insertions(+), 61 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 12fb85a3..a0d6e254 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1679,13 +1679,11 @@ cdef infer_field_types(list dtypes): return field_types -# TODO: set geometry and field data as memory views? -def ogr_write( - str path, str layer, str driver, geometry, fields, field_data, field_mask, - str crs, str geometry_type, str encoding, object dataset_kwargs, - object layer_kwargs, bint promote_to_multi=False, bint nan_as_null=True, - bint append=False, dataset_metadata=None, layer_metadata=None, - gdal_tz_offsets=None +cdef bint create_dataset_layer( + str path, str layer, str driver, str crs, str geometry_type, str encoding, + object dataset_kwargs, object layer_kwargs, bint append, + dataset_metadata, layer_metadata, + OGRDataSourceH* ogr_dataset_out, OGRLayerH* ogr_layer_out, ): cdef const char *path_c = NULL cdef const char *layer_c = NULL @@ -1697,55 +1695,9 @@ def ogr_write( cdef const char *ogr_name = NULL cdef OGRDataSourceH ogr_dataset = NULL cdef OGRLayerH ogr_layer = NULL - cdef OGRFeatureH ogr_feature = NULL - cdef OGRGeometryH ogr_geometry = NULL - cdef OGRGeometryH ogr_geometry_multi = NULL - cdef OGRFeatureDefnH ogr_featuredef = NULL - cdef OGRFieldDefnH ogr_fielddef = NULL - cdef unsigned char *wkb_buffer = NULL cdef OGRSpatialReferenceH ogr_crs = NULL - cdef int layer_idx = -1 - cdef int supports_transactions = 0 cdef OGRwkbGeometryType geometry_code - cdef int err = 0 - cdef int i = 0 - cdef int num_records = -1 - cdef int num_field_data = len(field_data) if field_data is not None else 0 - cdef int num_fields = len(fields) if fields is not None else 0 - - if num_fields != num_field_data: - raise ValueError("field_data array needs to be same length as fields array") - - if num_fields == 0 and geometry is None: - raise ValueError("You must provide at least a geometry column or a field") - - if num_fields > 0: - num_records = len(field_data[0]) - for i in range(1, len(field_data)): - if len(field_data[i]) != num_records: - raise ValueError("field_data arrays must be same length") - - if geometry is None: - # If no geometry data, we ignore the geometry_type and don't create a geometry - # column - geometry_type = None - else: - if num_fields > 0: - if len(geometry) != num_records: - raise ValueError( - "field_data arrays must be same length as geometry array" - ) - else: - num_records = len(geometry) - - if field_mask is not None: - if len(field_data) != len(field_mask): - raise ValueError("field_data and field_mask must be same length") - for i in range(0, len(field_mask)): - if field_mask[i] is not None and len(field_mask[i]) != num_records: - raise ValueError("field_mask arrays must be same length as geometry array") - else: - field_mask = [None] * num_fields + cdef int layer_idx = -1 path_b = path.encode('UTF-8') path_c = path_b @@ -1756,10 +1708,6 @@ def ogr_write( if not layer: layer = os.path.splitext(os.path.split(path)[1])[0] - if gdal_tz_offsets is None: - gdal_tz_offsets = {} - - # if shapefile, GeoJSON, or FlatGeobuf, always delete first # for other types, check if we can create layers # GPKG might be the only multi-layer writeable type. TODO: check this @@ -1818,8 +1766,6 @@ def ogr_write( raise exc # Setup layer creation options - if not encoding: - encoding = locale.getpreferredencoding() if driver == 'ESRI Shapefile': # Fiona only sets encoding for shapefiles; other drivers do not support @@ -1873,13 +1819,89 @@ def ogr_write( CSLDestroy(layer_options) layer_options = NULL + ogr_dataset_out[0] = ogr_dataset + ogr_layer_out[0] = ogr_layer + return create_layer + + +# TODO: set geometry and field data as memory views? +def ogr_write( + str path, str layer, str driver, geometry, fields, field_data, field_mask, + str crs, str geometry_type, str encoding, object dataset_kwargs, + object layer_kwargs, bint promote_to_multi=False, bint nan_as_null=True, + bint append=False, dataset_metadata=None, layer_metadata=None, + gdal_tz_offsets=None +): + cdef OGRDataSourceH ogr_dataset = NULL + cdef OGRLayerH ogr_layer = NULL + cdef OGRFeatureH ogr_feature = NULL + cdef OGRGeometryH ogr_geometry = NULL + cdef OGRGeometryH ogr_geometry_multi = NULL + cdef OGRFeatureDefnH ogr_featuredef = NULL + cdef OGRFieldDefnH ogr_fielddef = NULL + cdef unsigned char *wkb_buffer = NULL + cdef int supports_transactions = 0 + cdef int err = 0 + cdef int i = 0 + cdef int num_records = -1 + cdef int num_field_data = len(field_data) if field_data is not None else 0 + cdef int num_fields = len(fields) if fields is not None else 0 + + if num_fields != num_field_data: + raise ValueError("field_data array needs to be same length as fields array") + + if num_fields == 0 and geometry is None: + raise ValueError("You must provide at least a geometry column or a field") + + if num_fields > 0: + num_records = len(field_data[0]) + for i in range(1, len(field_data)): + if len(field_data[i]) != num_records: + raise ValueError("field_data arrays must be same length") + + if geometry is None: + # If no geometry data, we ignore the geometry_type and don't create a geometry + # column + geometry_type = None + else: + if num_fields > 0: + if len(geometry) != num_records: + raise ValueError( + "field_data arrays must be same length as geometry array" + ) + else: + num_records = len(geometry) + + if field_mask is not None: + if len(field_data) != len(field_mask): + raise ValueError("field_data and field_mask must be same length") + for i in range(0, len(field_mask)): + if field_mask[i] is not None and len(field_mask[i]) != num_records: + raise ValueError("field_mask arrays must be same length as geometry array") + else: + field_mask = [None] * num_fields + + if gdal_tz_offsets is None: + gdal_tz_offsets = {} + + ### Setup up dataset and layer + if not encoding: + encoding = locale.getpreferredencoding() + + layer_created = create_dataset_layer( + path, layer, driver, crs, geometry_type, encoding, + dataset_kwargs, layer_kwargs, append, + dataset_metadata, layer_metadata, + &ogr_dataset, &ogr_layer, + ) + ### Create the fields field_types = None if num_fields > 0: field_types = infer_field_types([field.dtype for field in field_data]) ### Create the fields - if create_layer: + if layer_created: for i in range(num_fields): field_type, field_subtype, width, precision = field_types[i] From fd9cad9c6be4a57e8d1932f6999372495e1600f8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 26 Jan 2024 12:34:21 +0100 Subject: [PATCH 2/4] return value --- pyogrio/_io.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index a0d6e254..52961996 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1679,7 +1679,7 @@ cdef infer_field_types(list dtypes): return field_types -cdef bint create_dataset_layer( +cdef create_dataset_layer( str path, str layer, str driver, str crs, str geometry_type, str encoding, object dataset_kwargs, object layer_kwargs, bint append, dataset_metadata, layer_metadata, From eb9ae4716a523a7bde3768dc1e7e51b91ca20c6a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 29 Jan 2024 15:11:49 +0100 Subject: [PATCH 3/4] add docstring --- pyogrio/_io.pyx | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 11747dff..02443669 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1679,12 +1679,33 @@ cdef infer_field_types(list dtypes): return field_types -cdef create_dataset_layer( +cdef create_ogr_dataset_layer( str path, str layer, str driver, str crs, str geometry_type, str encoding, object dataset_kwargs, object layer_kwargs, bint append, dataset_metadata, layer_metadata, OGRDataSourceH* ogr_dataset_out, OGRLayerH* ogr_layer_out, ): + """ + Construct the OGRDataSource and OGRLayer objects based on input + path and layer. + + If the file already exists, will open the existing dataset and overwrite + or append the layer (depending on `append`), otherwise will create a new + dataset. + + Fills in the `ogr_dataset_out` and `ogr_layer_out` pointers passed as + parameter with initialized objects (or raise error is it fails to do so). + It is the responsibility of the caller to clean up those objects after use. + Returns whether a new layer was created or not (when the layer was created, + the caller still needs to set up the layer definition, i.e. create the + fields). + + Returns + ------- + bool : + Whether a new layer was created, or False if we are appending to an + existing layer. + """ cdef const char *path_c = NULL cdef const char *layer_c = NULL cdef const char *driver_c = NULL From 0310170bbad5cd6c9fb637ef89d7f7d880c36c5c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 29 Jan 2024 15:12:23 +0100 Subject: [PATCH 4/4] fix rename --- pyogrio/_io.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 02443669..5e6ff9aa 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1909,7 +1909,7 @@ def ogr_write( if not encoding: encoding = locale.getpreferredencoding() - layer_created = create_dataset_layer( + layer_created = create_ogr_dataset_layer( path, layer, driver, crs, geometry_type, encoding, dataset_kwargs, layer_kwargs, append, dataset_metadata, layer_metadata,