Skip to content

Commit

Permalink
dissolve bug fixes (#323)
Browse files Browse the repository at this point in the history
  • Loading branch information
nickeubank authored and jorisvandenbossche committed May 27, 2016
1 parent 0f82ced commit f58b5f4
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 27 deletions.
36 changes: 18 additions & 18 deletions geopandas/geodataframe.py
Expand Up @@ -449,9 +449,14 @@ def plot(self, *args, **kwargs):
plot.__doc__ = plot_dataframe.__doc__


def dissolve(self, by=None, aggfunc='first'):
def dissolve(self, by=None, aggfunc='first', as_index=True):
"""
Dissolve geometries within `groupby` into single observation.
This is accomplished by applying the `unary_union` method
to all geometries within a groupself.
Observations associated with each `groupby` group will be aggregated
using the `aggfunc`.
Parameters
----------
Expand All @@ -460,40 +465,35 @@ def dissolve(self, by=None, aggfunc='first'):
aggfunc : function or string, default "first"
Aggregation function for manipulation of data associated
with each group. Passed to pandas `groupby.agg` method.
as_index : boolean, default True
If true, groupby columns become index of result.
Returns
-------
GeoDataFrame
"""

# Process non-spatial component
data = self.drop(labels=self.geometry.name, axis=1).copy()
data = self.drop(labels=self.geometry.name, axis=1)
aggregated_data = data.groupby(by=by).agg(aggfunc)


# Process spatial component
groupby_plus_geometry_cols = [self.geometry.name]
groupby_plus_geometry_cols.append(by)
geometry = self[groupby_plus_geometry_cols].copy()

def merge_geometries(block):

merged_geom = block.unary_union
return merged_geom

new_index = block.drop(self.geometry.name, axis=1).iloc[0][by]
merged_w_index = GeoSeries(merged_geom, index=Index(Series(new_index),name=by),
name=self.geometry.name)
return merged_w_index

g = self.groupby(by=by, group_keys=False)[self.geometry.name].agg(merge_geometries)

g = geometry.groupby(by=by, group_keys=False).apply(merge_geometries)

aggregated_geometry = GeoDataFrame(g,
index=g.index,
geometry=self.geometry.name)
# Aggregate
aggregated_geometry = GeoDataFrame(g, geometry=self.geometry.name)
# Recombine
aggregated = aggregated_geometry.join(aggregated_data)
aggregated = aggregated.set_geometry(self.geometry.name)

# Reset if requested
if not as_index:
aggregated = aggregated.reset_index()

return aggregated

def _dataframe_set_geometry(self, col, drop=False, inplace=False, crs=None):
Expand Down
37 changes: 28 additions & 9 deletions tests/test_dissolve.py → geopandas/tests/test_dissolve.py
Expand Up @@ -8,6 +8,11 @@
from .util import unittest, download_nybb
from pandas.util.testing import assert_frame_equal
from pandas import Index
from distutils.version import LooseVersion
import pandas as pd

pandas_0_15_problem = 'fails under pandas < 0.16 due to issue 324,'\
'not problem with dissolve.'

class TestDataFrame(unittest.TestCase):

Expand All @@ -28,7 +33,7 @@ def setUp(self):
others = self.polydf.loc[0:2,]

collapsed = [others.geometry.unary_union, manhattan_bronx.geometry.unary_union]
merged_shapes = GeoDataFrame({'myshapes': collapsed}, geometry='myshapes',
merged_shapes = GeoDataFrame({'myshapes': collapsed}, geometry='myshapes',
index=Index([5,6], name='manhattan_bronx'))

# Different expected results
Expand All @@ -40,25 +45,39 @@ def setUp(self):
self.mean['BoroCode'] = [4,1.5]


@unittest.skipIf(str(pd.__version__) < LooseVersion('0.16'), pandas_0_15_problem)
def test_geom_dissolve(self):
test = self.polydf.dissolve('manhattan_bronx')
self.assertTrue(test.geometry.name == 'myshapes')
self.assertTrue(test.geom_almost_equals(self.first).all())

@unittest.skipIf(str(pd.__version__) < LooseVersion('0.16'), pandas_0_15_problem)
def test_first_dissolve(self):
test = self.polydf.dissolve('manhattan_bronx')
test = test.drop('myshapes', axis=1)
first = self.first.drop('myshapes', axis=1)
assert_frame_equal(first, test)
assert_frame_equal(self.first, test, check_column_type=False)

@unittest.skipIf(str(pd.__version__) < LooseVersion('0.16'), pandas_0_15_problem)
def test_mean_dissolve(self):
test = self.polydf.dissolve('manhattan_bronx', aggfunc='mean')
test = test.drop('myshapes', axis=1)
mean = self.mean.drop('myshapes', axis=1)
assert_frame_equal(mean, test)
assert_frame_equal(self.mean, test, check_column_type=False)

test = self.polydf.dissolve('manhattan_bronx', aggfunc=np.mean)
test = test.drop('myshapes', axis=1)
assert_frame_equal(mean, test)
assert_frame_equal(self.mean, test, check_column_type=False)

@unittest.skipIf(str(pd.__version__) < LooseVersion('0.16'), pandas_0_15_problem)
def test_multicolumn_dissolve(self):
multi = self.polydf.copy()
multi['dup_col'] = multi.manhattan_bronx
multi_test = multi.dissolve(['manhattan_bronx', 'dup_col'], aggfunc='first')

first = self.first.copy()
first['dup_col'] = first.index
first = first.set_index([first.index, 'dup_col'])

assert_frame_equal(multi_test, first, check_column_type=False)

@unittest.skipIf(str(pd.__version__) < LooseVersion('0.16'), pandas_0_15_problem)
def test_reset_index(self):
test = self.polydf.dissolve('manhattan_bronx', as_index=False)
comparison = self.first.reset_index()
assert_frame_equal(comparison, test, check_column_type=False)

0 comments on commit f58b5f4

Please sign in to comment.