Skip to content

Commit

Permalink
Adds geojson support (#155)
Browse files Browse the repository at this point in the history
* fix default serializer

* basic support geopoint

* using self.fields and checking for geojson

* moving to write_transformed_row

* fixes

* finalizing

* Some code cleanup

* Lint fixes

Co-authored-by: Giuseppe PERONATO <giuseppe.peronato@idiap.ch>
  • Loading branch information
akariv and gperonato committed Dec 22, 2020
1 parent d3dcfe3 commit af3f7d9
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 7 deletions.
2 changes: 1 addition & 1 deletion PROCESSORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def dump_to_path(out_path='.',
- `force-format` - Specifies whether to force all output files to be generated with the same format
- if `True` (the default), all resources will use the same format
- if `False`, format will be deduced from the file extension. Resources with unknown extensions will be discarded.
- `format` - Specifies the type of output files to be generated (if `force-format` is true): `csv` (the default) or `json`
- `format` - Specifies the type of output files to be generated (if `force-format` is true): `csv` (the default), `json` or `geojson`
- `temporal-format-property` - Specifies a property to be used for temporal values serialization. For example, if some field has a property `outputFormat: %d/%m/%y` setting `temporal-format-property` to `outputFormat` will lead to using this format for this field serialization.
- `add-filehash-to-path`: Specifies whether to include file md5 hash into the resource path. Defaults to `False`. If `True` Embeds hash in path like so:
- If original path is `path/to/the/file.ext`
Expand Down
4 changes: 4 additions & 0 deletions data/cities_location.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
id,city,lat,long
1,london,51.509865,-0.118092
2,paris,48.8566,2.3522
3,rome,41.9028,2.4964
5 changes: 3 additions & 2 deletions dataflows/processors/dumpers/file_dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from datapackage import Resource

from .dumper_base import DumperBase
from .file_formats import CSVFormat, JSONFormat
from .file_formats import CSVFormat, JSONFormat, GeoJSONFormat


class FileDumper(DumperBase):
Expand Down Expand Up @@ -34,7 +34,8 @@ def process_datapackage(self, datapackage):
file_format = file_format[1:]
file_formatter = {
'csv': CSVFormat,
'json': JSONFormat
'json': JSONFormat,
'geojson': GeoJSONFormat
}.get(file_format)
if file_format is not None:
self.file_formatters[resource.name] = file_formatter
Expand Down
43 changes: 39 additions & 4 deletions dataflows/processors/dumpers/file_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ class JSONFormat(FileFormat):
'geopoint': lambda d: list(map(float, d)),
'yearmonth': lambda d: '{:04d}-{:02d}'.format(*d),
}

NULL_VALUE = None

PYTHON_DIALECT = {
Expand All @@ -176,10 +177,12 @@ class JSONFormat(FileFormat):
}

def __init__(self, file, schema, **options):
writer = file
writer.write('[')
writer.__first = True
super(JSONFormat, self).__init__(writer, schema, default_serializer=identity, **options)
self.initialize_file(file)
super(JSONFormat, self).__init__(file, schema, default_serializer=identity, **options)

def initialize_file(self, file):
file.write('[')
file.__first = True

@classmethod
def prepare_resource(cls, resource):
Expand All @@ -200,3 +203,35 @@ def write_transformed_row(self, transformed_row):

def finalize_file(self):
self.writer.write(']')


class GeoJSONFormat(JSONFormat):

def initialize_file(self, file):
file.write('{"type": "FeatureCollection","features":')
super(GeoJSONFormat, self).initialize_file(file)

def write_transformed_row(self, transformed_row):
properties = dict()
for k, v in transformed_row.items():
if self.fields[k].type == "geopoint":
geometry = dict(
type='Point',
coordinates=v
)
break
elif self.fields[k].type == "geojson":
geometry = v
break
else:
properties[k] = v
feature = dict(
geometry=geometry,
type='Feature',
properties=properties
)
super(GeoJSONFormat, self).write_transformed_row(feature)

def finalize_file(self):
super(GeoJSONFormat, self).finalize_file()
self.writer.write('}')
9 changes: 9 additions & 0 deletions tests/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1999,3 +1999,12 @@ def test_dump_to_zip():
zz = dump_to_zip('out/test_dump_to_zip.zip')
Flow([dict(a=1)], zz).process()
assert zz.out_file.closed

def test_dump_to_geojson():
from dataflows import Flow, dump_to_path, load, add_computed_field, delete_fields
Flow(
load('data/cities_location.csv'),
add_computed_field(target=dict(name='Location', type='geopoint'), operation='format', with_='{lat}, {long}'),
delete_fields(['lat', 'long']),
dump_to_path(out_path='out', format='geojson'),
).process()

0 comments on commit af3f7d9

Please sign in to comment.