Skip to content

Commit

Permalink
[Fixes #4025] Regression with uploading a shapefile with no ascii cha…
Browse files Browse the repository at this point in the history
…racters (#4026)

* [Fixes #4025] Regression with uploading a shapefile with no ascii characters

* [Fixes #4025] Regression with uploading a shapefile with no ascii characters

*  - Rename columns of non-UTF-8 shapefiles attributes before ingesting

 - Fix test cases
  • Loading branch information
Alessio Fabiani authored and capooti committed Oct 31, 2018
1 parent ff92a00 commit cc94e82
Show file tree
Hide file tree
Showing 26 changed files with 198 additions and 96 deletions.
8 changes: 4 additions & 4 deletions geonode/base/models.py
Expand Up @@ -799,7 +799,7 @@ def csw_crs(self):
@property
def group_name(self):
if self.group:
return str(self.group)
return str(self.group).encode("utf-8", "replace")
return None

@property
Expand Down Expand Up @@ -902,13 +902,13 @@ def metadata_completeness(self):
return '{}%'.format(len(filled_fields) * 100 / len(required_fields))

def keyword_list(self):
return [kw.name for kw in self.keywords.all()]
return [kw.name.encode("utf-8", "replace") for kw in self.keywords.all()]

def keyword_slug_list(self):
return [kw.slug for kw in self.keywords.all()]
return [kw.slug.encode("utf-8", "replace") for kw in self.keywords.all()]

def region_name_list(self):
return [region.name for region in self.regions.all()]
return [region.name.encode("utf-8", "replace") for region in self.regions.all()]

def spatial_representation_type_string(self):
if hasattr(self.spatial_representation_type, 'identifier'):
Expand Down
47 changes: 31 additions & 16 deletions geonode/geoserver/signals.py
Expand Up @@ -243,14 +243,17 @@ def geoserver_post_save_local(instance, *args, **kwargs):
instance.workspace = gs_resource.store.workspace.name
instance.store = gs_resource.store.name

bbox = gs_resource.native_bbox

# Set bounding box values
instance.bbox_x0 = bbox[0]
instance.bbox_x1 = bbox[1]
instance.bbox_y0 = bbox[2]
instance.bbox_y1 = bbox[3]
instance.srid = bbox[4]
try:
bbox = gs_resource.native_bbox

# Set bounding box values
instance.bbox_x0 = bbox[0]
instance.bbox_x1 = bbox[1]
instance.bbox_y0 = bbox[2]
instance.bbox_y1 = bbox[3]
instance.srid = bbox[4]
except BaseException:
pass

if instance.srid:
instance.srid_url = "http://www.spatialreference.org/ref/" + \
Expand All @@ -271,14 +274,17 @@ def geoserver_post_save_local(instance, *args, **kwargs):
gs_catalog.save(gs_resource)

if not settings.FREETEXT_KEYWORDS_READONLY:
if len(instance.keyword_list()) == 0 and gs_resource.keywords:
for keyword in gs_resource.keywords:
if keyword not in instance.keyword_list():
instance.keywords.add(keyword)
try:
if len(instance.keyword_list()) == 0 and gs_resource.keywords:
for keyword in gs_resource.keywords:
if keyword not in instance.keyword_list():
instance.keywords.add(keyword)
except BaseException:
pass

if any(instance.keyword_list()):
keywords = instance.keyword_list()
gs_resource.keywords = list(set(keywords))
gs_resource.keywords = [kw.decode("utf-8", "replace") for kw in list(set(keywords))]

# gs_resource should only be called if
# ogc_server_settings.BACKEND_WRITE_ENABLED == True
Expand Down Expand Up @@ -321,7 +327,10 @@ def geoserver_post_save_local(instance, *args, **kwargs):
# store the resource to avoid another geoserver call in the post_save
instance.gs_resource = gs_resource

bbox = gs_resource.native_bbox
try:
bbox = gs_resource.native_bbox
except BaseException:
bbox = instance.bbox
dx = float(bbox[1]) - float(bbox[0])
dy = float(bbox[3]) - float(bbox[2])

Expand All @@ -337,7 +346,10 @@ def geoserver_post_save_local(instance, *args, **kwargs):
instance.bbox_x1, instance.bbox_y1])

# Create Raw Data download link
path = gs_resource.dom.findall('nativeName')
try:
path = gs_resource.dom.findall('nativeName')
except BaseException:
path = instance.alternate
download_url = urljoin(settings.SITEURL,
reverse('download', args=[instance.id]))
Link.objects.get_or_create(resource=instance.resourcebase_ptr,
Expand Down Expand Up @@ -397,7 +409,10 @@ def geoserver_post_save_local(instance, *args, **kwargs):
url=ogc_server_settings.public_url,
repo_name=geogig_repo_name)

path = gs_resource.dom.findall('nativeName')
try:
path = gs_resource.dom.findall('nativeName')
except BaseException:
path = instance.alternate

if path:
path = 'path={path}'.format(path=path[0].text)
Expand Down
11 changes: 8 additions & 3 deletions geonode/geoserver/upload.py
Expand Up @@ -180,8 +180,13 @@ def geoserver_upload(
# Step 6. Make sure our data always has a valid projection
# FIXME: Put this in gsconfig.py
logger.info('>>> Step 6. Making sure [%s] has a valid projection' % name)
if gs_resource.native_bbox is None:
box = gs_resource.native_bbox[:4]
_native_bbox = None
try:
_native_bbox = gs_resource.native_bbox
except BaseException:
pass
if _native_bbox and len(_native_bbox) >= 5 and _native_bbox[4:5][0] == 'EPSG:4326':
box = _native_bbox[:4]
minx, maxx, miny, maxy = [float(a) for a in box]
if -180 <= minx <= 180 and -180 <= maxx <= 180 and \
- 90 <= miny <= 90 and -90 <= maxy <= 90:
Expand All @@ -190,7 +195,7 @@ def geoserver_upload(
# If GeoServer couldn't figure out the projection, we just
# assume it's lat/lon to avoid a bad GeoServer configuration

gs_resource.latlon_bbox = gs_resource.native_bbox
gs_resource.latlon_bbox = _native_bbox
gs_resource.projection = "EPSG:4326"
cat.save(gs_resource)
else:
Expand Down
2 changes: 1 addition & 1 deletion geonode/geoserver/views.py
Expand Up @@ -839,7 +839,7 @@ def get_capabilities(request, layerid=None, user=None,
'catalogue_url': settings.CATALOGUE['default']['URL'],
}
gc_str = tpl.render(ctx)
gc_str = gc_str.encode("utf-8")
gc_str = gc_str.encode("utf-8", "replace")
layerelem = etree.XML(gc_str)
rootdoc = etree.ElementTree(layerelem)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion geonode/layers/models.py
Expand Up @@ -495,7 +495,7 @@ class Attribute(models.Model):

def __unicode__(self):
return "%s" % self.attribute_label.encode(
"utf-8") if self.attribute_label else self.attribute.encode("utf-8")
"utf-8", "replace") if self.attribute_label else self.attribute.encode("utf-8", "replace")

def unique_values_as_list(self):
return self.unique_values.split(',')
Expand Down
14 changes: 12 additions & 2 deletions geonode/layers/views.py
Expand Up @@ -27,7 +27,7 @@
import uuid
import decimal
import re

import cPickle as pickle
from django.db.models import Q
from celery.exceptions import TimeoutError

Expand Down Expand Up @@ -239,7 +239,7 @@ def layer_upload(request, template='upload/layer_upload.html'):
user=request.user).order_by('-date')
if latest_uploads.count() > 0:
upload_session = latest_uploads[0]
upload_session.error = str(error)
upload_session.error = pickle.dumps(error).decode("utf-8", "replace")
upload_session.traceback = traceback.format_exc(tb)
upload_session.context = log_snippet(CONTEXT_LOG_FILE)
upload_session.save()
Expand Down Expand Up @@ -286,6 +286,16 @@ def layer_upload(request, template='upload/layer_upload.html'):
layer_name = saved_layer.alternate if hasattr(
saved_layer, 'alternate') else name
request.add_resource('layer', layer_name)
_keys = ['info', 'errors']
for _k in _keys:
if _k in out:
if isinstance(out[_k], unicode) or isinstance(
out[_k], str):
out[_k] = out[_k].decode(saved_layer.charset).encode("utf-8")
elif isinstance(out[_k], dict):
for key, value in out[_k].iteritems():
out[_k][key] = out[_k][key].decode(saved_layer.charset).encode("utf-8")
out[_k][key.decode(saved_layer.charset).encode("utf-8")] = out[_k].pop(key)
return HttpResponse(
json.dumps(out),
content_type='application/json',
Expand Down
Binary file added geonode/tests/data/ming_female_1.zip
Binary file not shown.
1 change: 1 addition & 0 deletions geonode/tests/data/ming_female_1/ming_female_1.cst
@@ -0,0 +1 @@
UTF-8
Binary file not shown.
1 change: 1 addition & 0 deletions geonode/tests/data/ming_female_1/ming_female_1.prj
@@ -0,0 +1 @@
GEOGCS["Xian 1980", DATUM["Xian 1980", SPHEROID["IAG 1975", 6378140.0, 298.257, AUTHORITY["EPSG","7049"]], AUTHORITY["EPSG","6610"]], PRIMEM["Greenwich", 0.0, AUTHORITY["EPSG","8901"]], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH], AUTHORITY["EPSG","4610"]]
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions geonode/tests/data/ming_female_1/wfsrequest.txt
@@ -0,0 +1 @@
http://amap.zju.edu.cn:8080/geoserver/wfs?access_token=vKa0jUDKSIJVS8mxn3QbeXlNKbxFlO?format_options=charset%3AUTF-8&typename=geonode%3Aming_female_1&outputFormat=SHAPE-ZIP&version=1.0.0&service=WFS&request=GetFeature&access_token=vKa0jUDKSIJVS8mxn3QbeXlNKbxFlO
Binary file added geonode/tests/data/zhejiang_yangcan_yanyu.zip
Binary file not shown.
1 change: 1 addition & 0 deletions geonode/tests/data/zhejiang_yangcan_yanyu/wfsrequest.txt
@@ -0,0 +1 @@
http://128.31.22.46:8080/geoserver/wfs?access_token=TCSt2nAzYCZeeSF9jTDULPff6YUWrI?outputFormat=SHAPE-ZIP&service=WFS&srs=EPSG%3A4610&request=GetFeature&format_options=charset%3AUTF-8&typename=geonode%3Azhejiang_yangcan_yanyu&version=1.0.0&access_token=TCSt2nAzYCZeeSF9jTDULPff6YUWrI
@@ -0,0 +1 @@
UTF-8
Binary file not shown.
@@ -0,0 +1 @@
GEOGCS["Xian 1980", DATUM["Xian 1980", SPHEROID["IAG 1975", 6378140.0, 298.257, AUTHORITY["EPSG","7049"]], AUTHORITY["EPSG","6610"]], PRIMEM["Greenwich", 0.0, AUTHORITY["EPSG","8901"]], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH], AUTHORITY["EPSG","4610"]]
Binary file not shown.
Binary file not shown.
65 changes: 53 additions & 12 deletions geonode/tests/integration.py
Expand Up @@ -512,13 +512,8 @@ def test_layer_upload_metadata(self):
# layer have projection file, but has no valid srid
self.assertEqual(
str(e),
"Invalid Layers. "
"Needs an authoritative SRID in its CRS to be accepted")
# except:
# # Sometimes failes with the message:
# # UploadError: Could not save the layer air_runways,
# # there was an upload error: Error occured unzipping file
# pass
"GeoServer failed to detect the projection for layer [air_runways]. "
"It doesn't look like EPSG:4326, so backing out the layer.")
finally:
# Clean up and completely delete the layer
if uploaded:
Expand Down Expand Up @@ -610,11 +605,57 @@ def test_layer_zip_upload_metadata(self):
uploaded.metadata_xml = thelayer_metadata
regions_resolved, regions_unresolved = resolve_regions(regions)
self.assertIsNotNone(regions_resolved)
# except:
# # Sometimes failes with the message:
# # UploadError: Could not save the layer air_runways,
# # there was an upload error: Error occured unzipping file
# pass
finally:
# Clean up and completely delete the layer
if uploaded:
uploaded.delete()

@on_ogc_backend(geoserver.BACKEND_PACKAGE)
@timeout_decorator.timeout(LOCAL_TIMEOUT)
def test_layer_zip_upload_non_utf8(self):
"""Test uploading a layer with non UTF-8 attributes names"""
uploaded = None
PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))
thelayer_path = os.path.join(
PROJECT_ROOT,
'data/zhejiang_yangcan_yanyu')
thelayer_zip = os.path.join(
PROJECT_ROOT,
'data/',
'zhejiang_yangcan_yanyu.zip')
try:
if os.path.exists(thelayer_zip):
os.remove(thelayer_zip)
if os.path.exists(thelayer_path) and not os.path.exists(thelayer_zip):
zip_dir(thelayer_path, thelayer_zip)
if os.path.exists(thelayer_zip):
uploaded = file_upload(thelayer_zip, overwrite=True, charset='windows-1258')
self.assertEquals(uploaded.title, 'Zhejiang Yangcan Yanyu')
self.assertEquals(len(uploaded.keyword_list()), 2)
self.assertEquals(uploaded.constraints_other, None)
finally:
# Clean up and completely delete the layer
if uploaded:
uploaded.delete()

uploaded = None
thelayer_path = os.path.join(
PROJECT_ROOT,
'data/ming_female_1')
thelayer_zip = os.path.join(
PROJECT_ROOT,
'data/',
'ming_female_1.zip')
try:
if os.path.exists(thelayer_zip):
os.remove(thelayer_zip)
if os.path.exists(thelayer_path) and not os.path.exists(thelayer_zip):
zip_dir(thelayer_path, thelayer_zip)
if os.path.exists(thelayer_zip):
uploaded = file_upload(thelayer_zip, overwrite=True, charset='windows-1258')
self.assertEquals(uploaded.title, 'Ming Female 1')
self.assertEquals(len(uploaded.keyword_list()), 2)
self.assertEquals(uploaded.constraints_other, None)
finally:
# Clean up and completely delete the layer
if uploaded:
Expand Down
36 changes: 24 additions & 12 deletions geonode/upload/files.py
Expand Up @@ -25,9 +25,9 @@
'''

import os.path
from geoserver.resource import FeatureType
from geoserver.resource import Coverage

from geonode.utils import fixup_shp_columnnames
from geoserver.resource import FeatureType, Coverage
from django.utils.translation import ugettext as _

from UserList import UserList
Expand Down Expand Up @@ -256,17 +256,24 @@ def get_scan_hint(valid_extensions):
return result


def scan_file(file_name, scan_hint=None):
def scan_file(file_name, scan_hint=None, charset=None):
'''get a list of SpatialFiles for the provided file'''
if not os.path.exists(file_name):
raise Exception(_("Could not access to uploaded data."))

dirname = os.path.dirname(file_name)
if zipfile.is_zipfile(file_name):
paths, kept_zip = _process_zip(file_name, dirname, scan_hint=scan_hint)
paths, kept_zip = _process_zip(file_name,
dirname,
scan_hint=scan_hint,
charset=charset)
archive = file_name if kept_zip else None
else:
paths = [os.path.join(dirname, p) for p in os.listdir(dirname)]
paths = []
for p in os.listdir(dirname):
_f = os.path.join(dirname, p)
fixup_shp_columnnames(_f, charset)
paths.append(_f)
archive = None
if paths is not None:
safe_paths = _rename_files(paths)
Expand Down Expand Up @@ -305,7 +312,7 @@ def scan_file(file_name, scan_hint=None):
return SpatialFiles(dirname, found, archive=archive)


def _process_zip(zip_path, destination_dir, scan_hint=None):
def _process_zip(zip_path, destination_dir, scan_hint=None, charset=None):
"""Perform sanity checks on uploaded zip file
This function will check if the zip file's contents have legal names.
Expand All @@ -318,10 +325,10 @@ def _process_zip(zip_path, destination_dir, scan_hint=None):
safe_zip_path = _rename_files([zip_path])[0]
with zipfile.ZipFile(safe_zip_path, "r") as zip_handler:
if scan_hint in _keep_original_data:
extracted_paths = _extract_zip(zip_handler, destination_dir)
extracted_paths = _extract_zip(zip_handler, destination_dir, charset)
else:
extracted_paths = _sanitize_zip_contents(
zip_handler, destination_dir)
zip_handler, destination_dir, charset)
if extracted_paths is not None:
all_paths = extracted_paths
kept_zip = False
Expand All @@ -333,16 +340,21 @@ def _process_zip(zip_path, destination_dir, scan_hint=None):
return all_paths, kept_zip


def _sanitize_zip_contents(zip_handler, destination_dir):
def _sanitize_zip_contents(zip_handler, destination_dir, charset):
clean_macosx_dir(zip_handler.namelist())
result = _extract_zip(zip_handler, destination_dir)
result = _extract_zip(zip_handler, destination_dir, charset)
return result


def _extract_zip(zip_handler, destination):
def _extract_zip(zip_handler, destination, charset):
file_names = zip_handler.namelist()
zip_handler.extractall(destination)
return [os.path.join(destination, p) for p in file_names]
paths = []
for p in file_names:
_f = os.path.join(destination, p)
fixup_shp_columnnames(_f, charset)
paths.append(_f)
return paths


def _probe_zip_for_sld(zip_handler, destination_dir):
Expand Down
1 change: 1 addition & 0 deletions geonode/upload/forms.py
Expand Up @@ -47,6 +47,7 @@ class LayerUploadForm(forms.Form):
shx_file = forms.FileField(required=False)
prj_file = forms.FileField(required=False)
xml_file = forms.FileField(required=False)
charset = forms.CharField(required=False)

if check_ogc_backend(geoserver.BACKEND_PACKAGE):
sld_file = forms.FileField(required=False)
Expand Down

0 comments on commit cc94e82

Please sign in to comment.