Skip to content

Commit

Permalink
- Rename columns of non-UTF-8 shapefiles attributes before ingesting
Browse files Browse the repository at this point in the history
  • Loading branch information
afabiani committed Oct 31, 2018
1 parent d2b1f81 commit 671c250
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 51 deletions.
14 changes: 12 additions & 2 deletions geonode/layers/views.py
Expand Up @@ -27,7 +27,7 @@
import uuid
import decimal
import re

import cPickle as pickle
from django.db.models import Q
from celery.exceptions import TimeoutError

Expand Down Expand Up @@ -239,7 +239,7 @@ def layer_upload(request, template='upload/layer_upload.html'):
user=request.user).order_by('-date')
if latest_uploads.count() > 0:
upload_session = latest_uploads[0]
upload_session.error = str(error)
upload_session.error = pickle.dumps(error).decode("utf-8", "replace")
upload_session.traceback = traceback.format_exc(tb)
upload_session.context = log_snippet(CONTEXT_LOG_FILE)
upload_session.save()
Expand Down Expand Up @@ -286,6 +286,16 @@ def layer_upload(request, template='upload/layer_upload.html'):
layer_name = saved_layer.alternate if hasattr(
saved_layer, 'alternate') else name
request.add_resource('layer', layer_name)
_keys = ['info', 'errors']
for _k in _keys:
if _k in out:
if isinstance(out[_k], unicode) or isinstance(
out[_k], str):
out[_k] = out[_k].decode(saved_layer.charset).encode("utf-8")
elif isinstance(out[_k], dict):
for key, value in out[_k].iteritems():
out[_k][key] = out[_k][key].decode(saved_layer.charset).encode("utf-8")
out[_k][key.decode(saved_layer.charset).encode("utf-8")] = out[_k].pop(key)
return HttpResponse(
json.dumps(out),
content_type='application/json',
Expand Down
25 changes: 11 additions & 14 deletions geonode/upload/utils.py
Expand Up @@ -523,20 +523,17 @@ def _get_layer_values(layer, upload_session, expand=0):
lyr = inDataSource.GetLayer(str(layer.name))
limit = 100
for feat in islice(lyr, 0, limit):
try:
feat_values = json_loads_byteified(feat.ExportToJson()).get('properties')
for k in feat_values.keys():
type_code = feat.GetFieldDefnRef(k).GetType()
binding = feat.GetFieldDefnRef(k).GetFieldTypeName(type_code)
feat_value = feat_values[k] if str(feat_values[k]) != 'None' else 0
if expand > 0:
ff = {'value': feat_value, 'binding': binding}
feat_values[k] = ff
else:
feat_values[k] = feat_value
layer_values.append(feat_values)
except BaseException:
pass
feat_values = json_loads_byteified(feat.ExportToJson()).get('properties')
for k in feat_values.keys():
type_code = feat.GetFieldDefnRef(k).GetType()
binding = feat.GetFieldDefnRef(k).GetFieldTypeName(type_code)
feat_value = feat_values[k] if str(feat_values[k]) != 'None' else 0
if expand > 0:
ff = {'value': feat_value, 'binding': binding}
feat_values[k] = ff
else:
feat_values[k] = feat_value
layer_values.append(feat_values)
return layer_values


Expand Down
73 changes: 38 additions & 35 deletions geonode/utils.py
Expand Up @@ -61,7 +61,7 @@
from django.core.serializers.json import DjangoJSONEncoder
from django.utils import timezone

from geonode import geoserver, qgis_server # noqa
from geonode import geoserver, qgis_server, GeoNodeException # noqa

try:
import json
Expand Down Expand Up @@ -1004,46 +1004,49 @@ def check_shp_columnnames(layer):

if a.match(field_name):
list_col_original.append(field_name)
try:
for i in range(0, inLayerDefn.GetFieldCount()):
charset = layer.charset if layer.charset and 'undefined' not in layer.charset \
else 'UTF-8'
field_name = unicode(
inLayerDefn.GetFieldDefn(i).GetName(),
charset)

if not a.match(field_name):
# once the field_name contains Chinese, to use slugify_zh
has_ch = False
for ch in field_name:
if u'\u4e00' <= ch <= u'\u9fff':

for i in range(0, inLayerDefn.GetFieldCount()):
charset = layer.charset if layer.charset and 'undefined' not in layer.charset \
else 'UTF-8'

field_name = inLayerDefn.GetFieldDefn(i).GetName()
if not a.match(field_name):
# once the field_name contains Chinese, to use slugify_zh
has_ch = False
for ch in field_name:
try:
if u'\u4e00' <= ch.decode("utf-8", "replace") <= u'\u9fff':
has_ch = True
break
if has_ch:
new_field_name = slugify_zh(field_name, separator='_')
else:
new_field_name = custom_slugify(field_name)
if not b.match(new_field_name):
new_field_name = '_' + new_field_name
j = 0
while new_field_name in list_col_original or new_field_name in list_col.values():
if j == 0:
new_field_name += '_0'
if new_field_name.endswith('_' + str(j)):
j += 1
new_field_name = new_field_name[:-2] + '_' + str(j)
list_col.update({field_name: new_field_name})
except UnicodeDecodeError as e:
logger.error(str(e))
return False, None, None
except UnicodeDecodeError:
has_ch = True
break
if has_ch:
new_field_name = slugify_zh(field_name, separator='_')
else:
new_field_name = custom_slugify(field_name)
if not b.match(new_field_name):
new_field_name = '_' + new_field_name
j = 0
while new_field_name in list_col_original or new_field_name in list_col.values():
if j == 0:
new_field_name += '_0'
if new_field_name.endswith('_' + str(j)):
j += 1
new_field_name = new_field_name[:-2] + '_' + str(j)
list_col.update({field_name: new_field_name})

if len(list_col) == 0:
return True, None, None
else:
for key in list_col.keys():
qry = u"ALTER TABLE {0} RENAME COLUMN \"{1}\" TO \"{2}\"".format(
inLayer.GetName(), key, list_col[key])
inDataSource.ExecuteSQL(qry.encode(layer.charset))
try:
for key in list_col.keys():
qry = u"ALTER TABLE {} RENAME COLUMN \"".format(inLayer.GetName())
qry = qry + key.decode(charset) + u"\" TO \"{}\"".format(list_col[key])
inDataSource.ExecuteSQL(qry.encode(charset))
except UnicodeDecodeError:
raise GeoNodeException(
"Could not decode layer attributes by using the specified charset '{}'.".format(charset))
return True, None, list_col


Expand Down

0 comments on commit 671c250

Please sign in to comment.