Merge ec53ec7 into cf4eb4d

geopandas · Dec 2, 2014 · 36626e8 · 36626e8
2 parents cf4eb4d + ec53ec7
commit 36626e8
Show file tree

Hide file tree

Showing 3 changed files with 116 additions and 41 deletions.
diff --git a/geopandas/geodataframe.py b/geopandas/geodataframe.py
@@ -291,59 +291,31 @@ def feature(i, row):
             geo['bbox'] = self.total_bounds
 
         return geo
-
-    def to_file(self, filename, driver="ESRI Shapefile", **kwargs):
+
+    def to_file(self, filename, driver="ESRI Shapefile", schema=None,
+                **kwargs):
         """
         Write this GeoDataFrame to an OGR data source
-        
+
         A dictionary of supported OGR providers is available via:
         >>> import fiona
         >>> fiona.supported_drivers
 
         Parameters
         ----------
-        filename : string 
+        filename : string
             File path or file handle to write to.
         driver : string, default 'ESRI Shapefile'
             The OGR format driver used to write the vector file.
+        schema : dict, default None
+            If specified, the schema dictionary is passed to Fiona to
+            better control how the file is written.
 
-        The *kwargs* are passed to fiona.open and can be used to write 
+        The *kwargs* are passed to fiona.open and can be used to write
         to multi-layer data, store data within archives (zip files), etc.
         """
-        import fiona
-        def convert_type(in_type):
-            if in_type == object:
-                return 'str'
-            out_type = type(np.asscalar(np.zeros(1, in_type))).__name__
-            if out_type == 'long':
-                out_type = 'int'
-            return out_type
-
-        def feature(i, row):
-            return {
-                'id': str(i),
-                'type': 'Feature',
-                'properties':
-                    dict((k, v) for k, v in iteritems(row) if k != 'geometry'),
-                'geometry': mapping(row['geometry']) }
-
-        properties = OrderedDict([(col, convert_type(_type)) for col, _type 
-            in zip(self.columns, self.dtypes) if col!='geometry'])
-        # Need to check geom_types before we write to file... 
-        # Some (most?) providers expect a single geometry type: 
-        # Point, LineString, or Polygon
-        geom_types = self['geometry'].geom_type.unique()
-        from os.path import commonprefix # To find longest common prefix
-        geom_type = commonprefix([g[::-1] for g in geom_types])[::-1]  # Reverse
-        if geom_type == '': # No common suffix = mixed geometry types
-            raise ValueError("Geometry column cannot contains mutiple "
-                             "geometry types when writing to file.")
-        schema = {'geometry': geom_type, 'properties': properties}
-        filename = os.path.abspath(os.path.expanduser(filename))
-        with fiona.open(filename, 'w', driver=driver, crs=self.crs, 
-                        schema=schema, **kwargs) as c:
-            for i, row in self.iterrows():
-                c.write(feature(i, row))
+        from geopandas.io.file import to_file
+        to_file(self, filename, driver, schema, **kwargs)
 
     def to_crs(self, crs=None, epsg=None, inplace=False):
         """Transform geometries to a new coordinate reference system

diff --git a/geopandas/io/file.py b/geopandas/io/file.py
@@ -1,5 +1,14 @@
+import collections
+import os
+
+import fiona
+import numpy as np
+from shapely.geometry import mapping
+
+from six import iteritems
 from geopandas import GeoDataFrame
 
+
 def read_file(filename, **kwargs):
     """
     Returns a GeoDataFrame from a file.
@@ -8,15 +17,83 @@ def read_file(filename, **kwargs):
     opened and *kwargs* are keyword args to be passed to the method when
     opening the file.
     """
-    import fiona
     bbox = kwargs.pop('bbox', None)
     with fiona.open(filename, **kwargs) as f:
         crs = f.crs
-        if bbox != None:
+        if bbox is not None:
             assert len(bbox)==4
             f_filt = f.filter(bbox=bbox)
         else:
             f_filt = f
         gdf = GeoDataFrame.from_features(f, crs=crs)
 
     return gdf
+
+
+def to_file(df, filename, driver="ESRI Shapefile", schema=None,
+            **kwargs):
+    """
+    Write this GeoDataFrame to an OGR data source
+
+    A dictionary of supported OGR providers is available via:
+    >>> import fiona
+    >>> fiona.supported_drivers
+
+    Parameters
+    ----------
+    df : GeoDataFrame to be written
+    filename : string
+        File path or file handle to write to.
+    driver : string, default 'ESRI Shapefile'
+        The OGR format driver used to write the vector file.
+    schema : dict, default None
+        If specified, the schema dictionary is passed to Fiona to
+        better control how the file is written. If None, GeoPandas
+        will determine the schema based on each column's dtype
+
+    The *kwargs* are passed to fiona.open and can be used to write
+    to multi-layer data, store data within archives (zip files), etc.
+    """
+    def feature(i, row):
+        return {
+            'id': str(i),
+            'type': 'Feature',
+            'properties':
+                dict((k, v) for k, v in iteritems(row) if k != 'geometry'),
+            'geometry': mapping(row['geometry'])
+        }
+
+    if schema is None:
+        schema = infer_schema(df)
+    filename = os.path.abspath(os.path.expanduser(filename))
+    with fiona.open(filename, 'w', driver=driver, crs=df.crs,
+                    schema=schema, **kwargs) as c:
+        for i, row in df.iterrows():
+            c.write(feature(i, row))
+
+
+def infer_schema(df):
+    def convert_type(in_type):
+        if in_type == object:
+            return 'str'
+        out_type = type(np.asscalar(np.zeros(1, in_type))).__name__
+        if out_type == 'long':
+            out_type = 'int'
+        return out_type
+
+    properties = collections.OrderedDict([
+        (col, convert_type(_type)) for col, _type in
+        zip(df.columns, df.dtypes) if col != 'geometry'
+    ])
+    # Need to check geom_types before we write to file...
+    # Some (most?) providers expect a single geometry type:
+    # Point, LineString, or Polygon
+    geom_types = df['geometry'].geom_type.unique()
+    from os.path import commonprefix   # To find longest common prefix
+    geom_type = commonprefix([g[::-1] for g in geom_types])[::-1]  # Reverse
+    if geom_type == '':  # No common suffix = mixed geometry types
+        raise ValueError("Geometry column cannot contains mutiple "
+                         "geometry types when writing to file.")
+    schema = {'geometry': geom_type, 'properties': properties}
+
+    return schema
diff --git a/tests/test_geodataframe.py b/tests/test_geodataframe.py
@@ -1,5 +1,6 @@
 from __future__ import absolute_import
 
+import collections
 import json
 import os
 import tempfile
@@ -24,6 +25,8 @@ def setUp(self):
         nybb_filename = download_nybb()
 
         self.df = read_file('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename)
+        with fiona.open('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename) as f:
+            self.schema = f.schema
         self.tempdir = tempfile.mkdtemp()
         self.boros = self.df['BoroName']
         self.crs = {'init': 'epsg:4326'}
@@ -315,6 +318,29 @@ def test_mixed_types_to_file(self):
         with self.assertRaises(ValueError):
             s.to_file(tempfilename)
 
+    def test_to_file_schema(self):
+        """
+        Ensure that the file is written according to the schema
+        if it is specified
+        
+        """
+        tempfilename = os.path.join(self.tempdir, 'test.shp')
+        properties = collections.OrderedDict([
+            ('Shape_Leng', 'float:19.11'),
+            ('BoroName', 'str:40'),
+            ('BoroCode', 'int:10'),
+            ('Shape_Area', 'float:19.11'),
+        ])
+        schema = {'geometry': 'Polygon', 'properties': properties}
+
+        # Take the first 2 features to speed things up a bit
+        self.df.iloc[:2].to_file(tempfilename, schema=schema)
+
+        with fiona.open(tempfilename) as f:
+            result_schema = f.schema
+
+        self.assertEqual(result_schema, schema)
+
     def test_bool_index(self):
         # Find boros with 'B' in their name
         df = self.df[self.df['BoroName'].str.contains('B')]