Skip to content

Commit

Permalink
Merge ec53ec7 into cf4eb4d
Browse files Browse the repository at this point in the history
  • Loading branch information
jwass committed Dec 2, 2014
2 parents cf4eb4d + ec53ec7 commit 36626e8
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 41 deletions.
50 changes: 11 additions & 39 deletions geopandas/geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,59 +291,31 @@ def feature(i, row):
geo['bbox'] = self.total_bounds

return geo

def to_file(self, filename, driver="ESRI Shapefile", **kwargs):

def to_file(self, filename, driver="ESRI Shapefile", schema=None,
**kwargs):
"""
Write this GeoDataFrame to an OGR data source
A dictionary of supported OGR providers is available via:
>>> import fiona
>>> fiona.supported_drivers
Parameters
----------
filename : string
filename : string
File path or file handle to write to.
driver : string, default 'ESRI Shapefile'
The OGR format driver used to write the vector file.
schema : dict, default None
If specified, the schema dictionary is passed to Fiona to
better control how the file is written.
The *kwargs* are passed to fiona.open and can be used to write
The *kwargs* are passed to fiona.open and can be used to write
to multi-layer data, store data within archives (zip files), etc.
"""
import fiona
def convert_type(in_type):
if in_type == object:
return 'str'
out_type = type(np.asscalar(np.zeros(1, in_type))).__name__
if out_type == 'long':
out_type = 'int'
return out_type

def feature(i, row):
return {
'id': str(i),
'type': 'Feature',
'properties':
dict((k, v) for k, v in iteritems(row) if k != 'geometry'),
'geometry': mapping(row['geometry']) }

properties = OrderedDict([(col, convert_type(_type)) for col, _type
in zip(self.columns, self.dtypes) if col!='geometry'])
# Need to check geom_types before we write to file...
# Some (most?) providers expect a single geometry type:
# Point, LineString, or Polygon
geom_types = self['geometry'].geom_type.unique()
from os.path import commonprefix # To find longest common prefix
geom_type = commonprefix([g[::-1] for g in geom_types])[::-1] # Reverse
if geom_type == '': # No common suffix = mixed geometry types
raise ValueError("Geometry column cannot contains mutiple "
"geometry types when writing to file.")
schema = {'geometry': geom_type, 'properties': properties}
filename = os.path.abspath(os.path.expanduser(filename))
with fiona.open(filename, 'w', driver=driver, crs=self.crs,
schema=schema, **kwargs) as c:
for i, row in self.iterrows():
c.write(feature(i, row))
from geopandas.io.file import to_file
to_file(self, filename, driver, schema, **kwargs)

def to_crs(self, crs=None, epsg=None, inplace=False):
"""Transform geometries to a new coordinate reference system
Expand Down
81 changes: 79 additions & 2 deletions geopandas/io/file.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
import collections
import os

import fiona
import numpy as np
from shapely.geometry import mapping

from six import iteritems
from geopandas import GeoDataFrame


def read_file(filename, **kwargs):
"""
Returns a GeoDataFrame from a file.
Expand All @@ -8,15 +17,83 @@ def read_file(filename, **kwargs):
opened and *kwargs* are keyword args to be passed to the method when
opening the file.
"""
import fiona
bbox = kwargs.pop('bbox', None)
with fiona.open(filename, **kwargs) as f:
crs = f.crs
if bbox != None:
if bbox is not None:
assert len(bbox)==4
f_filt = f.filter(bbox=bbox)
else:
f_filt = f
gdf = GeoDataFrame.from_features(f, crs=crs)

return gdf


def to_file(df, filename, driver="ESRI Shapefile", schema=None,
**kwargs):
"""
Write this GeoDataFrame to an OGR data source
A dictionary of supported OGR providers is available via:
>>> import fiona
>>> fiona.supported_drivers
Parameters
----------
df : GeoDataFrame to be written
filename : string
File path or file handle to write to.
driver : string, default 'ESRI Shapefile'
The OGR format driver used to write the vector file.
schema : dict, default None
If specified, the schema dictionary is passed to Fiona to
better control how the file is written. If None, GeoPandas
will determine the schema based on each column's dtype
The *kwargs* are passed to fiona.open and can be used to write
to multi-layer data, store data within archives (zip files), etc.
"""
def feature(i, row):
return {
'id': str(i),
'type': 'Feature',
'properties':
dict((k, v) for k, v in iteritems(row) if k != 'geometry'),
'geometry': mapping(row['geometry'])
}

if schema is None:
schema = infer_schema(df)
filename = os.path.abspath(os.path.expanduser(filename))
with fiona.open(filename, 'w', driver=driver, crs=df.crs,
schema=schema, **kwargs) as c:
for i, row in df.iterrows():
c.write(feature(i, row))


def infer_schema(df):
def convert_type(in_type):
if in_type == object:
return 'str'
out_type = type(np.asscalar(np.zeros(1, in_type))).__name__
if out_type == 'long':
out_type = 'int'
return out_type

properties = collections.OrderedDict([
(col, convert_type(_type)) for col, _type in
zip(df.columns, df.dtypes) if col != 'geometry'
])
# Need to check geom_types before we write to file...
# Some (most?) providers expect a single geometry type:
# Point, LineString, or Polygon
geom_types = df['geometry'].geom_type.unique()
from os.path import commonprefix # To find longest common prefix
geom_type = commonprefix([g[::-1] for g in geom_types])[::-1] # Reverse
if geom_type == '': # No common suffix = mixed geometry types
raise ValueError("Geometry column cannot contains mutiple "
"geometry types when writing to file.")
schema = {'geometry': geom_type, 'properties': properties}

return schema
26 changes: 26 additions & 0 deletions tests/test_geodataframe.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import absolute_import

import collections
import json
import os
import tempfile
Expand All @@ -24,6 +25,8 @@ def setUp(self):
nybb_filename = download_nybb()

self.df = read_file('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename)
with fiona.open('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename) as f:
self.schema = f.schema
self.tempdir = tempfile.mkdtemp()
self.boros = self.df['BoroName']
self.crs = {'init': 'epsg:4326'}
Expand Down Expand Up @@ -315,6 +318,29 @@ def test_mixed_types_to_file(self):
with self.assertRaises(ValueError):
s.to_file(tempfilename)

def test_to_file_schema(self):
"""
Ensure that the file is written according to the schema
if it is specified
"""
tempfilename = os.path.join(self.tempdir, 'test.shp')
properties = collections.OrderedDict([
('Shape_Leng', 'float:19.11'),
('BoroName', 'str:40'),
('BoroCode', 'int:10'),
('Shape_Area', 'float:19.11'),
])
schema = {'geometry': 'Polygon', 'properties': properties}

# Take the first 2 features to speed things up a bit
self.df.iloc[:2].to_file(tempfilename, schema=schema)

with fiona.open(tempfilename) as f:
result_schema = f.schema

self.assertEqual(result_schema, schema)

def test_bool_index(self):
# Find boros with 'B' in their name
df = self.df[self.df['BoroName'].str.contains('B')]
Expand Down

0 comments on commit 36626e8

Please sign in to comment.