Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Use smarter string decoding in GeoDjango

The first try to solve the Python 3 GIS encoding/decoding issue
was too naive. Using decode() on all read strings is bound to fail
as soon as a non-ascii string is concerned.
This patch is a little more clever, leaving ascii decoding when
plain ascii strings are expected, and allowing to specify a custom
encoding in DataSource hierarchy.
  • Loading branch information...
commit 9a2bceed1aab52f65820c378f5ae1f608322b55c 1 parent a62d53c
@claudep claudep authored
View
9 django/contrib/gis/gdal/datasource.py
@@ -45,7 +45,7 @@
# Getting the ctypes prototypes for the DataSource.
from django.contrib.gis.gdal.prototypes import ds as capi
-from django.utils.encoding import force_bytes
+from django.utils.encoding import force_bytes, force_text
from django.utils import six
from django.utils.six.moves import xrange
@@ -57,12 +57,14 @@ class DataSource(GDALBase):
"Wraps an OGR Data Source object."
#### Python 'magic' routines ####
- def __init__(self, ds_input, ds_driver=False, write=False):
+ def __init__(self, ds_input, ds_driver=False, write=False, encoding='utf-8'):
# The write flag.
if write:
self._write = 1
else:
self._write = 0
+ # See also http://trac.osgeo.org/gdal/wiki/rfc23_ogr_unicode
+ self.encoding = encoding
# Registering all the drivers, this needs to be done
# _before_ we try to open up a data source.
@@ -129,4 +131,5 @@ def layer_count(self):
@property
def name(self):
"Returns the name of the data source."
- return capi.get_ds_name(self._ptr)
+ name = capi.get_ds_name(self._ptr)
+ return force_text(name, self.encoding, strings_only=True)
View
9 django/contrib/gis/gdal/feature.py
@@ -7,7 +7,7 @@
# ctypes function prototypes
from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api
-from django.utils.encoding import force_bytes
+from django.utils.encoding import force_bytes, force_text
from django.utils import six
from django.utils.six.moves import xrange
@@ -69,6 +69,10 @@ def __eq__(self, other):
#### Feature Properties ####
@property
+ def encoding(self):
+ return self._layer._ds.encoding
+
+ @property
def fid(self):
"Returns the feature identifier."
return capi.get_fid(self.ptr)
@@ -76,7 +80,8 @@ def fid(self):
@property
def layer_name(self):
"Returns the name of the layer for the feature."
- return capi.get_feat_name(self._layer._ldefn)
+ name = capi.get_feat_name(self._layer._ldefn)
+ return force_text(name, self.encoding, strings_only=True)
@property
def num_fields(self):
View
8 django/contrib/gis/gdal/field.py
@@ -3,6 +3,8 @@
from django.contrib.gis.gdal.base import GDALBase
from django.contrib.gis.gdal.error import OGRException
from django.contrib.gis.gdal.prototypes import ds as capi
+from django.utils.encoding import force_text
+
# For more information, see the OGR C API source code:
# http://www.gdal.org/ogr/ogr__api_8h.html
@@ -53,7 +55,8 @@ def as_int(self):
def as_string(self):
"Retrieves the Field's value as a string."
- return capi.get_field_as_string(self._feat.ptr, self._index)
+ string = capi.get_field_as_string(self._feat.ptr, self._index)
+ return force_text(string, encoding=self._feat.encoding, strings_only=True)
def as_datetime(self):
"Retrieves the Field's value as a tuple of date & time components."
@@ -70,7 +73,8 @@ def as_datetime(self):
@property
def name(self):
"Returns the name of this Field."
- return capi.get_field_name(self.ptr)
+ name = capi.get_field_name(self.ptr)
+ return force_text(name, encoding=self._feat.encoding, strings_only=True)
@property
def precision(self):
View
10 django/contrib/gis/gdal/layer.py
@@ -14,7 +14,7 @@
# GDAL ctypes function prototypes.
from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api, srs as srs_api
-from django.utils.encoding import force_bytes
+from django.utils.encoding import force_bytes, force_text
from django.utils import six
from django.utils.six.moves import xrange
@@ -103,7 +103,8 @@ def extent(self):
@property
def name(self):
"Returns the name of this layer in the Data Source."
- return capi.get_fd_name(self._ldefn)
+ name = capi.get_fd_name(self._ldefn)
+ return force_text(name, self._ds.encoding, strings_only=True)
@property
def num_feat(self, force=1):
@@ -135,8 +136,9 @@ def fields(self):
Returns a list of string names corresponding to each of the Fields
available in this Layer.
"""
- return [capi.get_field_name(capi.get_field_defn(self._ldefn, i))
- for i in xrange(self.num_fields) ]
+ return [force_text(capi.get_field_name(capi.get_field_defn(self._ldefn, i)),
+ self._ds.encoding, strings_only=True)
+ for i in xrange(self.num_fields)]
@property
def field_types(self):
View
2  django/contrib/gis/gdal/prototypes/ds.py
@@ -17,7 +17,7 @@
get_driver = voidptr_output(lgdal.OGRGetDriver, [c_int])
get_driver_by_name = voidptr_output(lgdal.OGRGetDriverByName, [c_char_p])
get_driver_count = int_output(lgdal.OGRGetDriverCount, [])
-get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p])
+get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p], decoding='ascii')
### DataSource ###
open_ds = voidptr_output(lgdal.OGROpen, [c_char_p, c_int, POINTER(c_void_p)])
View
9 django/contrib/gis/gdal/prototypes/errcheck.py
@@ -30,10 +30,9 @@ def check_const_string(result, func, cargs, offset=None):
if offset:
check_err(result)
ptr = ptr_byref(cargs, offset)
- return ptr.value.decode()
+ return ptr.value
else:
- if result is not None:
- return result.decode()
+ return result
def check_string(result, func, cargs, offset=-1, str_result=False):
"""
@@ -48,13 +47,13 @@ def check_string(result, func, cargs, offset=-1, str_result=False):
# For routines that return a string.
ptr = result
if not ptr: s = None
- else: s = string_at(result).decode()
+ else: s = string_at(result)
else:
# Error-code return specified.
check_err(result)
ptr = ptr_byref(cargs, offset)
# Getting the string value
- s = ptr.value.decode()
+ s = ptr.value
# Correctly freeing the allocated memory beind GDAL pointer
# w/the VSIFree routine.
if ptr: lgdal.VSIFree(ptr)
View
14 django/contrib/gis/gdal/prototypes/generation.py
@@ -57,7 +57,7 @@ def srs_output(func, argtypes):
func.errcheck = check_srs
return func
-def const_string_output(func, argtypes, offset=None):
+def const_string_output(func, argtypes, offset=None, decoding=None):
func.argtypes = argtypes
if offset:
func.restype = c_int
@@ -65,12 +65,15 @@ def const_string_output(func, argtypes, offset=None):
func.restype = c_char_p
def _check_const(result, func, cargs):
- return check_const_string(result, func, cargs, offset=offset)
+ res = check_const_string(result, func, cargs, offset=offset)
+ if res and decoding:
+ res = res.decode(decoding)
+ return res
func.errcheck = _check_const
return func
-def string_output(func, argtypes, offset=-1, str_result=False):
+def string_output(func, argtypes, offset=-1, str_result=False, decoding=None):
"""
Generates a ctypes prototype for the given function with the
given argument types that returns a string from a GDAL pointer.
@@ -90,8 +93,11 @@ def string_output(func, argtypes, offset=-1, str_result=False):
# Dynamically defining our error-checking function with the
# given offset.
def _check_str(result, func, cargs):
- return check_string(result, func, cargs,
+ res = check_string(result, func, cargs,
offset=offset, str_result=str_result)
+ if res and decoding:
+ res = res.decode(decoding)
+ return res
func.errcheck = _check_str
return func
View
10 django/contrib/gis/gdal/prototypes/geom.py
@@ -27,8 +27,8 @@ def topology_func(f):
# GeoJSON routines.
from_json = geom_output(lgdal.OGR_G_CreateGeometryFromJson, [c_char_p])
-to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True)
-to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True)
+to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True, decoding='ascii')
+to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True, decoding='ascii')
# GetX, GetY, GetZ all return doubles.
getx = pnt_func(lgdal.OGR_G_GetX)
@@ -57,8 +57,8 @@ def topology_func(f):
# Geometry export routines.
to_wkb = void_output(lgdal.OGR_G_ExportToWkb, None, errcheck=True) # special handling for WKB.
-to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)])
-to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True)
+to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)], decoding='ascii')
+to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True, decoding='ascii')
get_wkbsize = int_output(lgdal.OGR_G_WkbSize, [c_void_p])
# Geometry spatial-reference related routines.
@@ -73,7 +73,7 @@ def topology_func(f):
set_coord_dim = void_output(lgdal.OGR_G_SetCoordinateDimension, [c_void_p, c_int], errcheck=False)
get_geom_count = int_output(lgdal.OGR_G_GetGeometryCount, [c_void_p])
-get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p])
+get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p], decoding='ascii')
get_geom_type = int_output(lgdal.OGR_G_GetGeometryType, [c_void_p])
get_point_count = int_output(lgdal.OGR_G_GetPointCount, [c_void_p])
get_point = void_output(lgdal.OGR_G_GetPoint, [c_void_p, c_int, POINTER(c_double), POINTER(c_double), POINTER(c_double)], errcheck=False)
View
14 django/contrib/gis/gdal/prototypes/srs.py
@@ -49,17 +49,17 @@ def units_func(f):
angular_units = units_func(lgdal.OSRGetAngularUnits)
# For exporting to WKT, PROJ.4, "Pretty" WKT, and XML.
-to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)])
-to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)])
-to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2)
+to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
+to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
+to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2, decoding='ascii')
# Memory leak fixed in GDAL 1.5; still exists in 1.4.
-to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2)
+to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2, decoding='ascii')
# String attribute retrival routines.
-get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int])
-get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p])
-get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p])
+get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int], decoding='ascii')
+get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p], decoding='ascii')
+get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p], decoding='ascii')
# SRS Properties
isgeographic = int_output(lgdal.OSRIsGeographic, [c_void_p])
View
5 django/contrib/gis/gdal/srs.py
@@ -34,7 +34,7 @@
from django.contrib.gis.gdal.prototypes import srs as capi
from django.utils import six
-from django.utils.encoding import force_bytes, force_text
+from django.utils.encoding import force_bytes
#### Spatial Reference class. ####
@@ -139,8 +139,7 @@ def attr_value(self, target, index=0):
"""
if not isinstance(target, six.string_types) or not isinstance(index, int):
raise TypeError
- value = capi.get_attr_value(self.ptr, force_bytes(target), index)
- return force_text(value, 'ascii', strings_only=True)
+ return capi.get_attr_value(self.ptr, force_bytes(target), index)
def auth_name(self, target):
"Returns the authority name for the given string target node."
View
3  django/contrib/gis/gdal/tests/test_ds.py
@@ -167,7 +167,8 @@ def test04_features(self):
self.assertEqual(True, isinstance(feat[k], v))
# Testing Feature.__iter__
- for fld in feat: self.assertEqual(True, fld.name in source.fields.keys())
+ for fld in feat:
+ self.assertEqual(True, fld.name in source.fields.keys())
def test05_geometries(self):
"Testing Geometries from Data Source Features."
View
BIN  django/contrib/gis/tests/data/ch-city/ch-city.dbf
Binary file not shown
View
1  django/contrib/gis/tests/data/ch-city/ch-city.prj
@@ -0,0 +1 @@
+GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
View
BIN  django/contrib/gis/tests/data/ch-city/ch-city.shp
Binary file not shown
View
BIN  django/contrib/gis/tests/data/ch-city/ch-city.shx
Binary file not shown
View
10 django/contrib/gis/tests/layermap/tests.py
@@ -1,4 +1,5 @@
-from __future__ import absolute_import
+# coding: utf-8
+from __future__ import absolute_import, unicode_literals
import os
from copy import copy
@@ -286,6 +287,13 @@ def test_textfield(self):
self.assertEqual(City.objects.count(), 3)
self.assertEqual(City.objects.all().order_by('name_txt')[0].name_txt, "Houston")
+ def test_encoded_name(self):
+ """ Test a layer containing utf-8-encoded name """
+ city_shp = os.path.join(shp_path, 'ch-city', 'ch-city.shp')
+ lm = LayerMapping(City, city_shp, city_mapping)
+ lm.save(silent=True, strict=True)
+ self.assertEqual(City.objects.count(), 1)
+ self.assertEqual(City.objects.all()[0].name, "Zürich")
class OtherRouter(object):
def db_for_read(self, model, **hints):
View
8 django/contrib/gis/utils/layermapping.py
@@ -18,6 +18,8 @@
from django.db import models, transaction
from django.contrib.localflavor.us.models import USStateField
from django.utils import six
+from django.utils.encoding import force_text
+
# LayerMapping exceptions.
class LayerMapError(Exception): pass
@@ -65,7 +67,7 @@ class LayerMapping(object):
}
def __init__(self, model, data, mapping, layer=0,
- source_srs=None, encoding=None,
+ source_srs=None, encoding='utf-8',
transaction_mode='commit_on_success',
transform=True, unique=None, using=None):
"""
@@ -76,7 +78,7 @@ def __init__(self, model, data, mapping, layer=0,
"""
# Getting the DataSource and the associated Layer.
if isinstance(data, six.string_types):
- self.ds = DataSource(data)
+ self.ds = DataSource(data, encoding=encoding)
else:
self.ds = data
self.layer = self.ds[layer]
@@ -330,7 +332,7 @@ def verify_ogr_field(self, ogr_field, model_field):
if self.encoding:
# The encoding for OGR data sources may be specified here
# (e.g., 'cp437' for Census Bureau boundary files).
- val = six.text_type(ogr_field.value, self.encoding)
+ val = force_text(ogr_field.value, self.encoding)
else:
val = ogr_field.value
if model_field.max_length and len(val) > model_field.max_length:

0 comments on commit 9a2bcee

Please sign in to comment.
Something went wrong with that request. Please try again.