Workflow to select blocks with fewer than 50 households and join them with adjacent blocks. But only so that the total number of households < 100. 

In [39]:
import pandas as pd
import geopandas as gpd
%matplotlib inline

In [27]:
#Get block data
fcBlocks = gpd.read_file('./data/WAKE/BlockMece.shp')

In [48]:
#Subset blocks with fewer than 50 black households
fcBlocksSubset  = fcBlocks[fcBlocks.BlackHH < 50].reset_index()
fcBlocksDissolved = gpd.GeoDataFrame(geometry = list(fcBlocksSubset.unary_union))
fcBlocksDissolved['ID'] = fcBlocksDissolved.index
#Copy over crs to new file
fcBlocksDissolved.crs = fcBlocksSubset.crs
fcBlocksDissolved.head()

Unnamed: 0,geometry,ID
0,"POLYGON ((-78.570272 35.757264, -78.570391 35....",0
1,"POLYGON ((-78.59899299999999 35.740001, -78.60...",1
2,"POLYGON ((-78.625834 35.760773, -78.626323 35....",2
3,"POLYGON ((-78.42842899999999 35.783968, -78.42...",3
4,"POLYGON ((-78.77895099999999 35.540928, -78.77...",4


In [49]:
#Spatially join the dissolved ID to the subset layer
fcBlockSubset2 = gpd.sjoin(fcBlocksSubset,fcBlocksDissolved,how='left',op='within')
fcBlockSubset2.head()

Unnamed: 0,index,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE,BLOCKID10,PARTFLG,HOUSING10,POP10,P003001,...,MECE1,MECE2,MECE3,MECE4,MECE5,Total,BlackHH,geometry,index_right,ID
0,1,37,183,52806,2014,371830528062014,N,1,1,1,...,2.0,0.0,0.0,0.0,0.0,2.0,1.0,"POLYGON ((-78.568485 35.765272, -78.568608 35....",106,106
1,2,37,183,52706,3004,371830527063004,N,34,93,93,...,18.0,7.0,8.0,0.0,5.0,33.0,17.182796,"POLYGON ((-78.58127 35.808012, -78.58125199999...",181,181
2,3,37,183,52803,2011,371830528032011,N,39,112,112,...,5.0,36.0,6.0,2.0,12.0,49.0,31.339286,"POLYGON ((-78.582753 35.702583, -78.5834249999...",151,151
3,6,37,183,53108,1016,371830531081016,N,5,11,11,...,0.0,1.0,4.0,0.0,1.0,5.0,5.0,"POLYGON ((-78.724447 35.546779, -78.724259 35....",5,5
4,7,37,183,53608,1043,371830536081043,N,3,4,4,...,0.0,0.0,1.0,1.0,1.0,2.0,3.0,"POLYGON ((-78.842438 35.850746, -78.842748 35....",249,249


In [57]:
#Compute total HH for the dissolved blocks
sumHH = fcBlockSubset2.groupby('ID').agg({'BlackHH':'sum'})
fcBlocksNew=pd.merge(fcBlocksDissolved,sumHH,left_index=True,right_index=True)
fcBlocksNew.to_file('./scratch/foo.shp')

Now to select new blocks with > 100 HH and break them up.
* Find IDs of dissolved blocks with HH > 100
* Iterate through each:
 * Select the subset and ID-joined blocks with the ID matching the current dissolved block
 * From those, select the eastern most block
  * Extract it's HH value to a varaiable "HH"
  * Select adjacent blocks and add their HH values to "HH"; keep a list of block IDs
  * Stop when HH > 100 and dissolve those blocks together. 
  * Select the eastern most of the remaining blocks and repeat
 * Move to the next dissolve block. 

In [54]:
#Find IDs of dissolved blocks with HH > 100
fcTooBig = fcBlocksNew.query('BlackHH > 100')
#Select blocks 
fcBlockSubset2

Unnamed: 0,geometry,ID,BlackHH
1,"POLYGON ((-78.59899299999999 35.740001, -78.60...",1,780.254682
2,"POLYGON ((-78.625834 35.760773, -78.626323 35....",2,2834.916727
17,"POLYGON ((-78.81147 35.586576, -78.810321 35.5...",17,166.40639
69,"POLYGON ((-78.53402299999999 35.721782, -78.53...",69,285.323307
85,"POLYGON ((-78.597206 35.686495, -78.598141 35....",85,225.147763


In [58]:
#We'll iterate through each
fcX = fcBlockSubset2.query('ID == 1')
fcX

Unnamed: 0,index,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE,BLOCKID10,PARTFLG,HOUSING10,POP10,P003001,...,MECE1,MECE2,MECE3,MECE4,MECE5,Total,BlackHH,geometry,index_right,ID
9,14,37,183,52102,1023,371830521021023,N,3,6,6,...,0.0,3.0,0.0,1.0,0.0,4.0,1.5,"POLYGON ((-78.60115399999999 35.75393, -78.601...",1,1
93,104,37,183,52102,1029,371830521021029,N,35,92,92,...,17.0,7.0,9.0,7.0,9.0,40.0,34.619565,"POLYGON ((-78.60628699999999 35.753717, -78.60...",1,1
149,166,37,183,52102,1034,371830521021034,N,33,99,99,...,20.0,12.0,3.0,3.0,5.0,38.0,28.666667,"POLYGON ((-78.60309099999999 35.747419, -78.60...",1,1
154,171,37,183,52102,1037,371830521021037,N,17,33,33,...,12.0,7.0,3.0,0.0,1.0,22.0,14.424242,"POLYGON ((-78.606644 35.748284, -78.6068000000...",1,1
210,237,37,183,52102,2001,371830521022001,N,18,47,47,...,10.0,11.0,5.0,2.0,2.0,28.0,14.553191,"POLYGON ((-78.607069 35.74529, -78.60735 35.74...",1,1
230,257,37,183,52101,2000,371830521012000,N,15,29,29,...,12.0,6.0,3.0,1.0,0.0,22.0,13.965517,"POLYGON ((-78.61336799999999 35.754466, -78.61...",1,1
231,258,37,183,52101,4029,371830521014029,N,30,72,72,...,18.0,8.0,5.0,8.0,9.0,39.0,27.5,"POLYGON ((-78.62652299999999 35.750865, -78.62...",1,1
232,259,37,183,52101,4024,371830521014024,N,14,36,36,...,7.0,6.0,4.0,4.0,2.0,21.0,11.666667,"POLYGON ((-78.61920000000001 35.751111, -78.61...",1,1
281,313,37,183,52803,3035,371830528033035,N,67,202,202,...,27.0,35.0,23.0,14.0,19.0,99.0,46.435644,"POLYGON ((-78.59899299999999 35.740001, -78.60...",1,1
309,342,37,183,52803,3028,371830528033028,N,24,74,74,...,5.0,16.0,11.0,3.0,9.0,35.0,20.756757,"POLYGON ((-78.600678 35.747259, -78.6005589999...",1,1
