Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Use smarter string decoding in GeoDjango

The first try to solve the Python 3 GIS encoding/decoding issue
was too naive. Using decode() on all read strings is bound to fail
as soon as a non-ascii string is concerned.
This patch is a little more clever, leaving ascii decoding when
plain ascii strings are expected, and allowing to specify a custom
encoding in DataSource hierarchy.
  • Loading branch information...
commit 9a2bceed1aab52f65820c378f5ae1f608322b55c 1 parent a62d53c
Claude Paroz authored October 06, 2012
9  django/contrib/gis/gdal/datasource.py
@@ -45,7 +45,7 @@
45 45
 # Getting the ctypes prototypes for the DataSource.
46 46
 from django.contrib.gis.gdal.prototypes import ds as capi
47 47
 
48  
-from django.utils.encoding import force_bytes
  48
+from django.utils.encoding import force_bytes, force_text
49 49
 from django.utils import six
50 50
 from django.utils.six.moves import xrange
51 51
 
@@ -57,12 +57,14 @@ class DataSource(GDALBase):
57 57
     "Wraps an OGR Data Source object."
58 58
 
59 59
     #### Python 'magic' routines ####
60  
-    def __init__(self, ds_input, ds_driver=False, write=False):
  60
+    def __init__(self, ds_input, ds_driver=False, write=False, encoding='utf-8'):
61 61
         # The write flag.
62 62
         if write:
63 63
             self._write = 1
64 64
         else:
65 65
             self._write = 0
  66
+        # See also http://trac.osgeo.org/gdal/wiki/rfc23_ogr_unicode
  67
+        self.encoding = encoding
66 68
 
67 69
         # Registering all the drivers, this needs to be done
68 70
         #  _before_ we try to open up a data source.
@@ -129,4 +131,5 @@ def layer_count(self):
129 131
     @property
130 132
     def name(self):
131 133
         "Returns the name of the data source."
132  
-        return capi.get_ds_name(self._ptr)
  134
+        name = capi.get_ds_name(self._ptr)
  135
+        return force_text(name, self.encoding, strings_only=True)
9  django/contrib/gis/gdal/feature.py
@@ -7,7 +7,7 @@
7 7
 # ctypes function prototypes
8 8
 from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api
9 9
 
10  
-from django.utils.encoding import force_bytes
  10
+from django.utils.encoding import force_bytes, force_text
11 11
 from django.utils import six
12 12
 from django.utils.six.moves import xrange
13 13
 
@@ -69,6 +69,10 @@ def __eq__(self, other):
69 69
 
70 70
     #### Feature Properties ####
71 71
     @property
  72
+    def encoding(self):
  73
+        return self._layer._ds.encoding
  74
+
  75
+    @property
72 76
     def fid(self):
73 77
         "Returns the feature identifier."
74 78
         return capi.get_fid(self.ptr)
@@ -76,7 +80,8 @@ def fid(self):
76 80
     @property
77 81
     def layer_name(self):
78 82
         "Returns the name of the layer for the feature."
79  
-        return capi.get_feat_name(self._layer._ldefn)
  83
+        name = capi.get_feat_name(self._layer._ldefn)
  84
+        return force_text(name, self.encoding, strings_only=True)
80 85
 
81 86
     @property
82 87
     def num_fields(self):
8  django/contrib/gis/gdal/field.py
@@ -3,6 +3,8 @@
3 3
 from django.contrib.gis.gdal.base import GDALBase
4 4
 from django.contrib.gis.gdal.error import OGRException
5 5
 from django.contrib.gis.gdal.prototypes import ds as capi
  6
+from django.utils.encoding import force_text
  7
+
6 8
 
7 9
 # For more information, see the OGR C API source code:
8 10
 #  http://www.gdal.org/ogr/ogr__api_8h.html
@@ -53,7 +55,8 @@ def as_int(self):
53 55
 
54 56
     def as_string(self):
55 57
         "Retrieves the Field's value as a string."
56  
-        return capi.get_field_as_string(self._feat.ptr, self._index)
  58
+        string = capi.get_field_as_string(self._feat.ptr, self._index)
  59
+        return force_text(string, encoding=self._feat.encoding, strings_only=True)
57 60
 
58 61
     def as_datetime(self):
59 62
         "Retrieves the Field's value as a tuple of date & time components."
@@ -70,7 +73,8 @@ def as_datetime(self):
70 73
     @property
71 74
     def name(self):
72 75
         "Returns the name of this Field."
73  
-        return capi.get_field_name(self.ptr)
  76
+        name = capi.get_field_name(self.ptr)
  77
+        return force_text(name, encoding=self._feat.encoding, strings_only=True)
74 78
 
75 79
     @property
76 80
     def precision(self):
10  django/contrib/gis/gdal/layer.py
@@ -14,7 +14,7 @@
14 14
 # GDAL ctypes function prototypes.
15 15
 from django.contrib.gis.gdal.prototypes import ds as capi, geom as geom_api, srs as srs_api
16 16
 
17  
-from django.utils.encoding import force_bytes
  17
+from django.utils.encoding import force_bytes, force_text
18 18
 from django.utils import six
19 19
 from django.utils.six.moves import xrange
20 20
 
@@ -103,7 +103,8 @@ def extent(self):
103 103
     @property
104 104
     def name(self):
105 105
         "Returns the name of this layer in the Data Source."
106  
-        return capi.get_fd_name(self._ldefn)
  106
+        name = capi.get_fd_name(self._ldefn)
  107
+        return force_text(name, self._ds.encoding, strings_only=True)
107 108
 
108 109
     @property
109 110
     def num_feat(self, force=1):
@@ -135,8 +136,9 @@ def fields(self):
135 136
         Returns a list of string names corresponding to each of the Fields
136 137
         available in this Layer.
137 138
         """
138  
-        return [capi.get_field_name(capi.get_field_defn(self._ldefn, i))
139  
-                for i in xrange(self.num_fields) ]
  139
+        return [force_text(capi.get_field_name(capi.get_field_defn(self._ldefn, i)),
  140
+                           self._ds.encoding, strings_only=True)
  141
+                for i in xrange(self.num_fields)]
140 142
 
141 143
     @property
142 144
     def field_types(self):
2  django/contrib/gis/gdal/prototypes/ds.py
@@ -17,7 +17,7 @@
17 17
 get_driver = voidptr_output(lgdal.OGRGetDriver, [c_int])
18 18
 get_driver_by_name = voidptr_output(lgdal.OGRGetDriverByName, [c_char_p])
19 19
 get_driver_count = int_output(lgdal.OGRGetDriverCount, [])
20  
-get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p])
  20
+get_driver_name = const_string_output(lgdal.OGR_Dr_GetName, [c_void_p], decoding='ascii')
21 21
 
22 22
 ### DataSource ###
23 23
 open_ds = voidptr_output(lgdal.OGROpen, [c_char_p, c_int, POINTER(c_void_p)])
9  django/contrib/gis/gdal/prototypes/errcheck.py
@@ -30,10 +30,9 @@ def check_const_string(result, func, cargs, offset=None):
30 30
     if offset:
31 31
         check_err(result)
32 32
         ptr = ptr_byref(cargs, offset)
33  
-        return ptr.value.decode()
  33
+        return ptr.value
34 34
     else:
35  
-        if result is not None:
36  
-            return result.decode()
  35
+        return result
37 36
 
38 37
 def check_string(result, func, cargs, offset=-1, str_result=False):
39 38
     """
@@ -48,13 +47,13 @@ def check_string(result, func, cargs, offset=-1, str_result=False):
48 47
         # For routines that return a string.
49 48
         ptr = result
50 49
         if not ptr: s = None
51  
-        else: s = string_at(result).decode()
  50
+        else: s = string_at(result)
52 51
     else:
53 52
         # Error-code return specified.
54 53
         check_err(result)
55 54
         ptr = ptr_byref(cargs, offset)
56 55
         # Getting the string value
57  
-        s = ptr.value.decode()
  56
+        s = ptr.value
58 57
     # Correctly freeing the allocated memory beind GDAL pointer
59 58
     # w/the VSIFree routine.
60 59
     if ptr: lgdal.VSIFree(ptr)
14  django/contrib/gis/gdal/prototypes/generation.py
@@ -57,7 +57,7 @@ def srs_output(func, argtypes):
57 57
     func.errcheck = check_srs
58 58
     return func
59 59
 
60  
-def const_string_output(func, argtypes, offset=None):
  60
+def const_string_output(func, argtypes, offset=None, decoding=None):
61 61
     func.argtypes = argtypes
62 62
     if offset:
63 63
         func.restype = c_int
@@ -65,12 +65,15 @@ def const_string_output(func, argtypes, offset=None):
65 65
         func.restype = c_char_p
66 66
 
67 67
     def _check_const(result, func, cargs):
68  
-        return check_const_string(result, func, cargs, offset=offset)
  68
+        res = check_const_string(result, func, cargs, offset=offset)
  69
+        if res and decoding:
  70
+            res = res.decode(decoding)
  71
+        return res
69 72
     func.errcheck = _check_const
70 73
 
71 74
     return func
72 75
 
73  
-def string_output(func, argtypes, offset=-1, str_result=False):
  76
+def string_output(func, argtypes, offset=-1, str_result=False, decoding=None):
74 77
     """
75 78
     Generates a ctypes prototype for the given function with the
76 79
     given argument types that returns a string from a GDAL pointer.
@@ -90,8 +93,11 @@ def string_output(func, argtypes, offset=-1, str_result=False):
90 93
     # Dynamically defining our error-checking function with the
91 94
     # given offset.
92 95
     def _check_str(result, func, cargs):
93  
-        return check_string(result, func, cargs,
  96
+        res = check_string(result, func, cargs,
94 97
                             offset=offset, str_result=str_result)
  98
+        if res and decoding:
  99
+            res = res.decode(decoding)
  100
+        return res
95 101
     func.errcheck = _check_str
96 102
     return func
97 103
 
10  django/contrib/gis/gdal/prototypes/geom.py
@@ -27,8 +27,8 @@ def topology_func(f):
27 27
 
28 28
 # GeoJSON routines.
29 29
 from_json = geom_output(lgdal.OGR_G_CreateGeometryFromJson, [c_char_p])
30  
-to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True)
31  
-to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True)
  30
+to_json = string_output(lgdal.OGR_G_ExportToJson, [c_void_p], str_result=True, decoding='ascii')
  31
+to_kml = string_output(lgdal.OGR_G_ExportToKML, [c_void_p, c_char_p], str_result=True, decoding='ascii')
32 32
 
33 33
 # GetX, GetY, GetZ all return doubles.
34 34
 getx = pnt_func(lgdal.OGR_G_GetX)
@@ -57,8 +57,8 @@ def topology_func(f):
57 57
 
58 58
 # Geometry export routines.
59 59
 to_wkb = void_output(lgdal.OGR_G_ExportToWkb, None, errcheck=True) # special handling for WKB.
60  
-to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)])
61  
-to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True)
  60
+to_wkt = string_output(lgdal.OGR_G_ExportToWkt, [c_void_p, POINTER(c_char_p)], decoding='ascii')
  61
+to_gml = string_output(lgdal.OGR_G_ExportToGML, [c_void_p], str_result=True, decoding='ascii')
62 62
 get_wkbsize = int_output(lgdal.OGR_G_WkbSize, [c_void_p])
63 63
 
64 64
 # Geometry spatial-reference related routines.
@@ -73,7 +73,7 @@ def topology_func(f):
73 73
 set_coord_dim = void_output(lgdal.OGR_G_SetCoordinateDimension, [c_void_p, c_int], errcheck=False)
74 74
 
75 75
 get_geom_count = int_output(lgdal.OGR_G_GetGeometryCount, [c_void_p])
76  
-get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p])
  76
+get_geom_name = const_string_output(lgdal.OGR_G_GetGeometryName, [c_void_p], decoding='ascii')
77 77
 get_geom_type = int_output(lgdal.OGR_G_GetGeometryType, [c_void_p])
78 78
 get_point_count = int_output(lgdal.OGR_G_GetPointCount, [c_void_p])
79 79
 get_point = void_output(lgdal.OGR_G_GetPoint, [c_void_p, c_int, POINTER(c_double), POINTER(c_double), POINTER(c_double)], errcheck=False)
14  django/contrib/gis/gdal/prototypes/srs.py
@@ -49,17 +49,17 @@ def units_func(f):
49 49
 angular_units = units_func(lgdal.OSRGetAngularUnits)
50 50
 
51 51
 # For exporting to WKT, PROJ.4, "Pretty" WKT, and XML.
52  
-to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)])
53  
-to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)])
54  
-to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2)
  52
+to_wkt = string_output(std_call('OSRExportToWkt'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
  53
+to_proj = string_output(std_call('OSRExportToProj4'), [c_void_p, POINTER(c_char_p)], decoding='ascii')
  54
+to_pretty_wkt = string_output(std_call('OSRExportToPrettyWkt'), [c_void_p, POINTER(c_char_p), c_int], offset=-2, decoding='ascii')
55 55
 
56 56
 # Memory leak fixed in GDAL 1.5; still exists in 1.4.
57  
-to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2)
  57
+to_xml = string_output(lgdal.OSRExportToXML, [c_void_p, POINTER(c_char_p), c_char_p], offset=-2, decoding='ascii')
58 58
 
59 59
 # String attribute retrival routines.
60  
-get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int])
61  
-get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p])
62  
-get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p])
  60
+get_attr_value = const_string_output(std_call('OSRGetAttrValue'), [c_void_p, c_char_p, c_int], decoding='ascii')
  61
+get_auth_name = const_string_output(lgdal.OSRGetAuthorityName, [c_void_p, c_char_p], decoding='ascii')
  62
+get_auth_code = const_string_output(lgdal.OSRGetAuthorityCode, [c_void_p, c_char_p], decoding='ascii')
63 63
 
64 64
 # SRS Properties
65 65
 isgeographic = int_output(lgdal.OSRIsGeographic, [c_void_p])
5  django/contrib/gis/gdal/srs.py
@@ -34,7 +34,7 @@
34 34
 from django.contrib.gis.gdal.prototypes import srs as capi
35 35
 
36 36
 from django.utils import six
37  
-from django.utils.encoding import force_bytes, force_text
  37
+from django.utils.encoding import force_bytes
38 38
 
39 39
 
40 40
 #### Spatial Reference class. ####
@@ -139,8 +139,7 @@ def attr_value(self, target, index=0):
139 139
         """
140 140
         if not isinstance(target, six.string_types) or not isinstance(index, int):
141 141
             raise TypeError
142  
-        value = capi.get_attr_value(self.ptr, force_bytes(target), index)
143  
-        return force_text(value, 'ascii', strings_only=True)
  142
+        return capi.get_attr_value(self.ptr, force_bytes(target), index)
144 143
 
145 144
     def auth_name(self, target):
146 145
         "Returns the authority name for the given string target node."
3  django/contrib/gis/gdal/tests/test_ds.py
@@ -167,7 +167,8 @@ def test04_features(self):
167 167
                         self.assertEqual(True, isinstance(feat[k], v))
168 168
 
169 169
                     # Testing Feature.__iter__
170  
-                    for fld in feat: self.assertEqual(True, fld.name in source.fields.keys())
  170
+                    for fld in feat:
  171
+                        self.assertEqual(True, fld.name in source.fields.keys())
171 172
 
172 173
     def test05_geometries(self):
173 174
         "Testing Geometries from Data Source Features."
BIN  django/contrib/gis/tests/data/ch-city/ch-city.dbf
Binary file not shown
1  django/contrib/gis/tests/data/ch-city/ch-city.prj
... ...
@@ -0,0 +1 @@
  1
+GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
BIN  django/contrib/gis/tests/data/ch-city/ch-city.shp
Binary file not shown
BIN  django/contrib/gis/tests/data/ch-city/ch-city.shx
Binary file not shown
10  django/contrib/gis/tests/layermap/tests.py
... ...
@@ -1,4 +1,5 @@
1  
-from __future__ import absolute_import
  1
+# coding: utf-8
  2
+from __future__ import absolute_import, unicode_literals
2 3
 
3 4
 import os
4 5
 from copy import copy
@@ -286,6 +287,13 @@ def test_textfield(self):
286 287
         self.assertEqual(City.objects.count(), 3)
287 288
         self.assertEqual(City.objects.all().order_by('name_txt')[0].name_txt, "Houston")
288 289
 
  290
+    def test_encoded_name(self):
  291
+        """ Test a layer containing utf-8-encoded name """
  292
+        city_shp = os.path.join(shp_path, 'ch-city', 'ch-city.shp')
  293
+        lm = LayerMapping(City, city_shp, city_mapping)
  294
+        lm.save(silent=True, strict=True)
  295
+        self.assertEqual(City.objects.count(), 1)
  296
+        self.assertEqual(City.objects.all()[0].name, "Zürich")
289 297
 
290 298
 class OtherRouter(object):
291 299
     def db_for_read(self, model, **hints):
8  django/contrib/gis/utils/layermapping.py
@@ -18,6 +18,8 @@
18 18
 from django.db import models, transaction
19 19
 from django.contrib.localflavor.us.models import USStateField
20 20
 from django.utils import six
  21
+from django.utils.encoding import force_text
  22
+
21 23
 
22 24
 # LayerMapping exceptions.
23 25
 class LayerMapError(Exception): pass
@@ -65,7 +67,7 @@ class LayerMapping(object):
65 67
                          }
66 68
 
67 69
     def __init__(self, model, data, mapping, layer=0,
68  
-                 source_srs=None, encoding=None,
  70
+                 source_srs=None, encoding='utf-8',
69 71
                  transaction_mode='commit_on_success',
70 72
                  transform=True, unique=None, using=None):
71 73
         """
@@ -76,7 +78,7 @@ def __init__(self, model, data, mapping, layer=0,
76 78
         """
77 79
         # Getting the DataSource and the associated Layer.
78 80
         if isinstance(data, six.string_types):
79  
-            self.ds = DataSource(data)
  81
+            self.ds = DataSource(data, encoding=encoding)
80 82
         else:
81 83
             self.ds = data
82 84
         self.layer = self.ds[layer]
@@ -330,7 +332,7 @@ def verify_ogr_field(self, ogr_field, model_field):
330 332
             if self.encoding:
331 333
                 # The encoding for OGR data sources may be specified here
332 334
                 # (e.g., 'cp437' for Census Bureau boundary files).
333  
-                val = six.text_type(ogr_field.value, self.encoding)
  335
+                val = force_text(ogr_field.value, self.encoding)
334 336
             else:
335 337
                 val = ogr_field.value
336 338
                 if model_field.max_length and len(val) > model_field.max_length:

0 notes on commit 9a2bcee

Please sign in to comment.
Something went wrong with that request. Please try again.