Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Use smarter string decoding in GeoDjango

The first try to solve the Python 3 GIS encoding/decoding issue
was too naive. Using decode() on all read strings is bound to fail
as soon as a non-ascii string is concerned.
This patch is a little more clever, leaving ascii decoding when
plain ascii strings are expected, and allowing to specify a custom
encoding in DataSource hierarchy.
  • Loading branch information...
commit 9a2bceed1aab52f65820c378f5ae1f608322b55c 1 parent a62d53c
@claudep claudep authored
View
9 django/contrib/gis/gdal/datasource.py
@@ -45,7 +45,7 @@
# Getting the ctypes prototypes for the DataSource.
from django.contrib.gis.gdal.prototypes import ds as capi
-from django.utils.encoding import force_bytes
+from django.utils.encoding import force_bytes, force_text
from django.utils import six
from django.utils.six.moves import xrange
@@ -57,12 +57,14 @@ class DataSource(GDALBase):
"Wraps an OGR Data Source object."
#### Python 'magic' routines ####
- def __init__(self, ds_input, ds_driver=False, write=False):
+ def __init__(self, ds_input, ds_driver=False, write=False, encoding='utf-8'):
# The write flag.
if write:
self._write = 1
else:
self._write = 0
+ # See also http://trac.osgeo.org/gdal/wiki/rfc23_ogr_unicode
+ self.encoding = encoding
# Registering all the drivers, this needs to be done
# _before_ we try to open up a data source.
@@ -129,4 +131,5 @@ def layer_count(self):
@property
def name(self):
"Returns the name of the data source."
- return capi.get_ds_name(self._ptr)
+ name = capi.get_ds_name(self._ptr)
+ return force_text(name, self.encoding, strings_only=True)
View
9 django/contrib/gis/gdal/feature.py
@@ -7,7 +7,7 @@
# ctypes function prototypes
from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api
-from django.utils.encoding import force_bytes
+from django.utils.encoding import force_bytes, force_text
from django.utils import six
from django.utils.six.moves import xrange
@@ -69,6 +69,10 @@ def __eq__(self, other):
#### Feature Properties ####
@property
+ def encoding(self):
+ return self._layer._ds.encoding
+
+ @property
def fid(self):
"Returns the feature identifier."
return capi.get_fid(self.ptr)
@@ -76,7 +80,8 @@ def fid(self):
@property
def layer_name(self):
"Returns the name of the layer for the feature."
- return capi.get_feat_name(self._layer._ldefn)
+ name = capi.get_feat_name(self._layer._ldefn)
+ return force_text(name, self.encoding, strings_only=True)
@property
def num_fields(self):
View
8 django/contrib/gis/gdal/field.py
@@ -3,6 +3,8 @@
from django.contrib.gis.gdal.base import GDALBase
from django.contrib.gis.gdal.error import OGRException
from django.contrib.gis.gdal.prototypes import ds as capi
+from django.utils.encoding import force_text
+
# For more information, see the OGR C API source code:
# http://www.gdal.org/ogr/ogr__api_8h.html
@@ -53,7 +55,8 @@ def as_int(self):
def as_string(self):
"Retrieves the Field's value as a string."
- return capi.get_field_as_string(self._feat.ptr, self._index)
+ string = capi.get_field_as_string(self._feat.ptr, self._index)
+ return force_text(string, encoding=self._feat.encoding, strings_only=True)
def as_datetime(self):
"Retrieves the Field's value as a tuple of date & time components."
@@ -70,7 +73,8 @@ def as_datetime(self):
@property
def name(self):
"Returns the name of this Field."
- return capi.get_field_name(self.ptr)
+ name = capi.get_field_name(self.ptr)
+ return force_text(name, encoding=self._feat.encoding, strings_only=True)
@property
def precision(self):
View
10 django/contrib/gis/gdal/layer.py
@@ -14,7 +14,7 @@
# GDAL ctypes function prototypes.
from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api, srs as srs_api
-from django.utils.encoding import force_bytes
+from django.utils.encoding import force_bytes, force_text
from django.utils import six
from django.utils.six.moves import xrange
@@ -103,7 +103,8 @@ def extent(self):
@property
def name(self):
"Returns the name of this layer in the Data Source."
- return capi.get_fd_name(self._ldefn)
+ name = capi.get_fd_name(self._ldefn)
+ return force_text(name, self._ds.encoding, strings_only=True)
@property
def num_feat(self, force=1):
@@ -135,8 +136,9 @@ def fields(self):
Returns a list of string names corresponding to each of the Fields
available in this Layer.
"""
- return [capi.get_field_name(capi.get_field_defn(self._ldefn, i))
- for i in xrange(self.num_fields) ]
+ return [force_text(capi.get_field_name(capi.get_field_defn(self._ldefn, i)),
+ self._ds.encoding, strings_only=True)
+ for i in xrange(self.num_fields)]
@property
def field_types(self):
View
2  django/contrib/gis/gdal/prototypes/ds.py
@@ -17,7 +17,7 @@
get_driver = voidptr_output(lgdal.OGRGetDriver, [c_int])
get_driver_by_name = voidptr_output(lgdal.OGRGetDriverByName, [c_char_p])
get_driver_count = int_output(lgdal.OGRGetDriverCount, [])
-get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p])
+get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p], decoding='ascii')
### DataSource ###
open_ds = voidptr_output(lgdal.OGROpen, [c_char_p, c_int, POINTER(c_void_p)])
View
9 django/contrib/gis/gdal/prototypes/errcheck.py
@@ -30,10 +30,9 @@ def check_const_string(result, func, cargs, offset=None):
if offset:
check_err(result)
ptr = ptr_byref(cargs, offset)
- return ptr.value.decode()
+ return ptr.value
else:
- if result is not None:
- return result.decode()
+ return result
def check_string(result, func, cargs, offset=-1, str_result=False):
"""
@@ -48,13 +47,13 @@ def check_string(result, func, cargs, offset=-1, str_result=False):
# For routines that return a string.
ptr = result
if not ptr: s = None
- else: s = string_at(result).decode()
+ else: s = string_at(result)
else:
# Error-code return specified.
check_err(result)
ptr = ptr_byref(cargs, offset)
# Getting the string value
- s = ptr.value.decode()
+ s = ptr.value
# Correctly freeing the allocated memory beind GDAL pointer
# w/the VSIFree routine.
if ptr: lgdal.VSIFree(ptr)
View
14 django/contrib/gis/gdal/prototypes/generation.py
@@ -57,7 +57,7 @@ def srs_output(func, argtypes):
func.errcheck = check_srs
return func
-def const_string_output(func, argtypes, offset=None):
+def const_string_output(func, argtypes, offset=None, decoding=None):
func.argtypes = argtypes
if offset:
func.restype = c_int
@@ -65,12 +65,15 @@ def const_string_output(func, argtypes, offset=None):
func.restype = c_char_p
def _check_const(result, func, cargs):
- return check_const_string(result, func, cargs, offset=offset)
+ res = check_const_string(result, func, cargs, offset=offset)
+ if res and decoding:
+ res = res.decode(decoding)
+ return res
func.errcheck = _check_const
return func
-def string_output(func, argtypes, offset=-1, str_result=False):
+def string_output(func, argtypes, offset=-1, str_result=False, decoding=None):
"""
Generates a ctypes prototype for the given function with the
given argument types that returns a string from a GDAL pointer.
@@ -90,8 +93,11 @@ def string_output(func, argtypes, offset=-1, str_result=False):
# Dynamically defining our error-checking function with the
# given offset.
def _check_str(result, func, cargs):
- return check_string(result, func, cargs,
+ res = check_string(result, func, cargs,
offset=offset, str_result=str_result)
+ if res and decoding:
+ res = res.decode(decoding)
+ return res
func.errcheck = _check_str
return func
View
10 django/contrib/gis/gdal/prototypes/geom.py
@@ -27,8 +27,8 @@ def topology_func(f):
# GeoJSON routines.
from_json = geom_output(lgdal.OGR_G_CreateGeometryFromJson, [c_char_p])
-to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True)
-to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True)
+to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True, decoding='ascii')
+to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True, decoding='ascii')
# GetX, GetY, GetZ all return doubles.
getx = pnt_func(lgdal.OGR_G_GetX)
@@ -57,8 +57,8 @@ def topology_func(f):
# Geometry export routines.
to_wkb = void_output(lgdal.OGR_G_ExportToWkb, None, errcheck=True) # special handling for WKB.
-to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)])
-to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True)
+to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)], decoding='ascii')
+to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True, decoding='ascii')
get_wkbsize = int_output(lgdal.OGR_G_WkbSize, [c_void_p])
# Geometry spatial-reference related routines.
@@ -73,7 +73,7 @@ def topology_func(f):
set_coord_dim = void_output(lgdal.OGR_G_SetCoordinateDimension, [c_void_p, c_int], errcheck=False)
get_geom_count = int_output(lgdal.OGR_G_GetGeometryCount, [c_void_p])
-get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p])
+get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p], decoding='ascii')
get_geom_type = int_output(lgdal.OGR_G_GetGeometryType, [c_void_p])
get_point_count = int_output(lgdal.OGR_G_GetPointCount, [c_void_p])
get_point = void_output(lgdal.OGR_G_GetPoint, [c_void_p, c_int, POINTER(c_double), POINTER(c_double), POINTER(c_double)], errcheck=False)
View
14 django/contrib/gis/gdal/prototypes/srs.py
@@ -49,17 +49,17 @@ def units_func(f):
angular_units = units_func(lgdal.OSRGetAngularUnits)
# For exporting to WKT, PROJ.4, "Pretty" WKT, and XML.
-to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)])
-to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)])
-to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2)
+to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
+to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
+to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2, decoding='ascii')
# Memory leak fixed in GDAL 1.5; still exists in 1.4.
-to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2)
+to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2, decoding='ascii')
# String attribute retrival routines.
-get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int])
-get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p])
-get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p])
+get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int], decoding='ascii')
+get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p], decoding='ascii')
+get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p], decoding='ascii')
# SRS Properties
isgeographic = int_output(lgdal.OSRIsGeographic, [c_void_p])
View
5 django/contrib/gis/gdal/srs.py
@@ -34,7 +34,7 @@
from django.contrib.gis.gdal.prototypes import srs as capi
from django.utils import six
-from django.utils.encoding import force_bytes, force_text
+from django.utils.encoding import force_bytes
#### Spatial Reference class. ####
@@ -139,8 +139,7 @@ def attr_value(self, target, index=0):
"""
if not isinstance(target, six.string_types) or not isinstance(index, int):
raise TypeError
- value = capi.get_attr_value(self.ptr, force_bytes(target), index)
- return force_text(value, 'ascii', strings_only=True)
+ return capi.get_attr_value(self.ptr, force_bytes(target), index)
def auth_name(self, target):
"Returns the authority name for the given string target node."
View
3  django/contrib/gis/gdal/tests/test_ds.py
@@ -167,7 +167,8 @@ def test04_features(self):
self.assertEqual(True, isinstance(feat[k], v))
# Testing Feature.__iter__
- for fld in feat: self.assertEqual(True, fld.name in source.fields.keys())
+ for fld in feat:
+ self.assertEqual(True, fld.name in source.fields.keys())
def test05_geometries(self):
"Testing Geometries from Data Source Features."
View
BIN  django/contrib/gis/tests/data/ch-city/ch-city.dbf
Binary file not shown
View
1  django/contrib/gis/tests/data/ch-city/ch-city.prj
@@ -0,0 +1 @@
+GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
View
BIN  django/contrib/gis/tests/data/ch-city/ch-city.shp
Binary file not shown
View
BIN  django/contrib/gis/tests/data/ch-city/ch-city.shx
Binary file not shown
View
10 django/contrib/gis/tests/layermap/tests.py
@@ -1,4 +1,5 @@
-from __future__ import absolute_import
+# coding: utf-8
+from __future__ import absolute_import, unicode_literals
import os
from copy import copy
@@ -286,6 +287,13 @@ def test_textfield(self):
self.assertEqual(City.objects.count(), 3)
self.assertEqual(City.objects.all().order_by('name_txt')[0].name_txt, "Houston")
+ def test_encoded_name(self):
+ """ Test a layer containing utf-8-encoded name """
+ city_shp = os.path.join(shp_path, 'ch-city', 'ch-city.shp')
+ lm = LayerMapping(City, city_shp, city_mapping)
+ lm.save(silent=True, strict=True)
+ self.assertEqual(City.objects.count(), 1)
+ self.assertEqual(City.objects.all()[0].name, "Zürich")
class OtherRouter(object):
def db_for_read(self, model, **hints):
View
8 django/contrib/gis/utils/layermapping.py
@@ -18,6 +18,8 @@
from django.db import models, transaction
from django.contrib.localflavor.us.models import USStateField
from django.utils import six
+from django.utils.encoding import force_text
+
# LayerMapping exceptions.
class LayerMapError(Exception): pass
@@ -65,7 +67,7 @@ class LayerMapping(object):
}
def __init__(self, model, data, mapping, layer=0,
- source_srs=None, encoding=None,
+ source_srs=None, encoding='utf-8',
transaction_mode='commit_on_success',
transform=True, unique=None, using=None):
"""
@@ -76,7 +78,7 @@ def __init__(self, model, data, mapping, layer=0,
"""
# Getting the DataSource and the associated Layer.
if isinstance(data, six.string_types):
- self.ds = DataSource(data)
+ self.ds = DataSource(data, encoding=encoding)
else:
self.ds = data
self.layer = self.ds[layer]
@@ -330,7 +332,7 @@ def verify_ogr_field(self, ogr_field, model_field):
if self.encoding:
# The encoding for OGR data sources may be specified here
# (e.g., 'cp437' for Census Bureau boundary files).
- val = six.text_type(ogr_field.value, self.encoding)
+ val = force_text(ogr_field.value, self.encoding)
else:
val = ogr_field.value
if model_field.max_length and len(val) > model_field.max_length:
Please sign in to comment.
Something went wrong with that request. Please try again.